JAVA-6390: Move kafka articles from libraries-data to new module

apache-kafka
2021-08-15 17:15:36 +05:30 · 2021-08-15 17:15:36 +05:30 · c0cae35714
commit c0cae35714
parent 2caf4b52f3
13 changed files with 279 additions and 62 deletions
--- a/libraries-data/src/main/resources/kafka-connect/01_Quick_Start/connect-file-sink.properties
+++ b/libraries-data/src/main/resources/kafka-connect/01_Quick_Start/connect-file-sink.properties
--- a/libraries-data/src/main/resources/kafka-connect/01_Quick_Start/connect-file-source.properties
+++ b/libraries-data/src/main/resources/kafka-connect/01_Quick_Start/connect-file-source.properties
--- a/libraries-data/src/main/resources/kafka-connect/01_Quick_Start/connect-standalone.properties
+++ b/libraries-data/src/main/resources/kafka-connect/01_Quick_Start/connect-standalone.properties
--- a/libraries-data/src/main/resources/kafka-connect/02_Distributed/connect-distributed.properties
+++ b/libraries-data/src/main/resources/kafka-connect/02_Distributed/connect-distributed.properties
--- a/libraries-data/src/main/resources/kafka-connect/02_Distributed/connect-file-sink.json
+++ b/libraries-data/src/main/resources/kafka-connect/02_Distributed/connect-file-sink.json
--- a/libraries-data/src/main/resources/kafka-connect/02_Distributed/connect-file-source.json
+++ b/libraries-data/src/main/resources/kafka-connect/02_Distributed/connect-file-source.json
--- a/libraries-data/src/main/resources/kafka-connect/03_Transform/connect-distributed.properties
+++ b/libraries-data/src/main/resources/kafka-connect/03_Transform/connect-distributed.properties
--- a/libraries-data/src/main/resources/kafka-connect/03_Transform/connect-file-source-transform.json
+++ b/libraries-data/src/main/resources/kafka-connect/03_Transform/connect-file-source-transform.json
--- a/libraries-data/src/main/resources/kafka-connect/04_Custom/connect-mongodb-sink.json
+++ b/libraries-data/src/main/resources/kafka-connect/04_Custom/connect-mongodb-sink.json
--- a/libraries-data/src/main/resources/kafka-connect/04_Custom/connect-mqtt-source.json
+++ b/libraries-data/src/main/resources/kafka-connect/04_Custom/connect-mqtt-source.json
--- a/libraries-data/src/main/resources/kafka-connect/04_Custom/docker-compose.yaml
+++ b/libraries-data/src/main/resources/kafka-connect/04_Custom/docker-compose.yaml
--- a/apache-kafka/src/test/java/com/baeldung/kafka/streamsvsconsumer/KafkaStreamsLiveTest.java
+++ b/apache-kafka/src/test/java/com/baeldung/kafka/streamsvsconsumer/KafkaStreamsLiveTest.java
@ -0,0 +1,279 @@
+package com.baeldung.kafka.streamsvsconsumer;
+
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.producer.KafkaProducer;
+import org.apache.kafka.clients.producer.ProducerRecord;
+import org.apache.kafka.common.serialization.Serde;
+import org.apache.kafka.common.serialization.Serdes;
+import org.apache.kafka.common.utils.Bytes;
+import org.apache.kafka.streams.KafkaStreams;
+import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.StoreQueryParameters;
+import org.apache.kafka.streams.StreamsBuilder;
+import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.Topology;
+import org.apache.kafka.streams.kstream.Consumed;
+import org.apache.kafka.streams.kstream.Grouped;
+import org.apache.kafka.streams.kstream.JoinWindows;
+import org.apache.kafka.streams.kstream.KGroupedStream;
+import org.apache.kafka.streams.kstream.KGroupedTable;
+import org.apache.kafka.streams.kstream.KStream;
+import org.apache.kafka.streams.kstream.KTable;
+import org.apache.kafka.streams.kstream.Materialized;
+import org.apache.kafka.streams.kstream.Produced;
+import org.apache.kafka.streams.kstream.TimeWindows;
+import org.apache.kafka.streams.state.KeyValueIterator;
+import org.apache.kafka.streams.state.KeyValueStore;
+import org.apache.kafka.streams.state.QueryableStoreTypes;
+import org.apache.kafka.streams.state.ReadOnlyKeyValueStore;
+import org.apache.kafka.streams.state.StoreBuilder;
+import org.apache.kafka.streams.state.Stores;
+import org.apache.kafka.streams.state.WindowStore;
+import org.junit.Before;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.testcontainers.containers.KafkaContainer;
+import org.testcontainers.utility.DockerImageName;
+
+import java.time.Duration;
+import java.util.Arrays;
+import java.util.Locale;
+import java.util.Properties;
+
+import static org.apache.kafka.clients.consumer.ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG;
+import static org.apache.kafka.clients.producer.ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG;
+import static org.apache.kafka.clients.producer.ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG;
+
+public class KafkaStreamsLiveTest {
+    private final String LEFT_TOPIC = "left-stream-topic";
+    private final String RIGHT_TOPIC = "right-stream-topic";
+    private final String LEFT_RIGHT_TOPIC = "left-right-stream-topic";
+
+    private KafkaProducer<String, String> producer = createKafkaProducer();
+    private Properties streamsConfiguration = new Properties();
+
+    static final String TEXT_LINES_TOPIC = "TextLinesTopic";
+
+    private final String TEXT_EXAMPLE_1 = "test test and test";
+    private final String TEXT_EXAMPLE_2 = "test filter filter this sentence";
+
+    @ClassRule
+    public static KafkaContainer kafka = new KafkaContainer(DockerImageName.parse("confluentinc/cp-kafka:5.4.3"));
+
+    @Before
+    public void setUp() {
+        streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, kafka.getBootstrapServers());
+        streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
+        streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
+        streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000);
+        streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
+    }
+
+    @Test
+    public void shouldTestKafkaTableLatestWord() throws InterruptedException {
+        String inputTopic = "topicTable";
+
+        final StreamsBuilder builder = new StreamsBuilder();
+
+        KTable<String, String> textLinesTable = builder.table(inputTopic,
+            Consumed.with(Serdes.String(), Serdes.String()));
+
+        textLinesTable.toStream().foreach((word, count) -> System.out.println("Latest word: " + word + " -> " + count));
+
+        final Topology topology = builder.build();
+        streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "latest-word-id");
+        KafkaStreams streams = new KafkaStreams(topology, streamsConfiguration);
+
+        streams.cleanUp();
+        streams.start();
+        producer.send(new ProducerRecord<String, String>(inputTopic, "1", TEXT_EXAMPLE_1));
+        producer.send(new ProducerRecord<String, String>(inputTopic, "2", TEXT_EXAMPLE_2));
+
+        Thread.sleep(2000);
+        streams.close();
+    }
+
+    @Test
+    public void shouldTestWordCountKafkaStreams() throws InterruptedException {
+        String wordCountTopic = "wordCountTopic";
+
+        final StreamsBuilder builder = new StreamsBuilder();
+        KStream<String, String> textLines = builder.stream(wordCountTopic,
+          Consumed.with(Serdes.String(), Serdes.String()));
+
+        KTable<String, Long> wordCounts = textLines
+          .flatMapValues(value -> Arrays.asList(value.toLowerCase(Locale.ROOT)
+            .split("\\W+")))
+          .groupBy((key, word) -> word)
+          .count(Materialized.<String, Long, KeyValueStore<Bytes, byte[]>> as("counts-store"));
+
+        wordCounts.toStream().foreach((word, count) -> System.out.println("Word: " + word + " -> " + count));
+
+        wordCounts.toStream().to("outputTopic",
+          Produced.with(Serdes.String(), Serdes.Long()));
+
+        streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-stream-table-id");
+        final Topology topology = builder.build();
+        KafkaStreams streams = new KafkaStreams(topology, streamsConfiguration);
+
+        streams.cleanUp();
+        streams.start();
+
+        producer.send(new ProducerRecord<String, String>(wordCountTopic, "1", TEXT_EXAMPLE_1));
+        producer.send(new ProducerRecord<String, String>(wordCountTopic, "2", TEXT_EXAMPLE_2));
+
+        Thread.sleep(2000);
+        streams.close();
+    }
+
+    // Filter, map
+    @Test
+    public void shouldTestStatelessTransformations() throws InterruptedException {
+        String wordCountTopic = "wordCountTopic";
+
+        //when
+        final StreamsBuilder builder = new StreamsBuilder();
+        KStream<String, String> textLines = builder.stream(wordCountTopic,
+          Consumed.with(Serdes.String(), Serdes.String()));
+
+        final KStream<String, String> textLinesUpperCase =
+          textLines
+            .map((key, value) -> KeyValue.pair(value, value.toUpperCase()))
+            .filter((key, value) -> value.contains("FILTER"));
+
+        KTable<String, Long> wordCounts = textLinesUpperCase
+          .flatMapValues(value -> Arrays.asList(value.split("\\W+")))
+          .groupBy((key, word) -> word)
+          .count(Materialized.<String, Long, KeyValueStore<Bytes, byte[]>> as("counts-store"));
+
+        wordCounts.toStream().foreach((word, count) -> System.out.println("Word: " + word + " -> " + count));
+
+        streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-filter-map-id");
+        final Topology topology = builder.build();
+        KafkaStreams streams = new KafkaStreams(topology, streamsConfiguration);
+
+        streams.cleanUp();
+        streams.start();
+
+        producer.send(new ProducerRecord<String, String>(wordCountTopic, "1", TEXT_EXAMPLE_1));
+        producer.send(new ProducerRecord<String, String>(wordCountTopic, "2", TEXT_EXAMPLE_2));
+
+        Thread.sleep(2000);
+        streams.close();
+
+    }
+
+    @Test
+    public void shouldTestAggregationStatefulTransformations() throws InterruptedException {
+        String aggregationTopic = "aggregationTopic";
+
+        final StreamsBuilder builder = new StreamsBuilder();
+        final KStream<byte[], String> input = builder.stream(aggregationTopic,
+          Consumed.with(Serdes.ByteArray(), Serdes.String()));
+        final KTable<String, Long> aggregated = input
+          .groupBy((key, value) -> (value != null && value.length() > 0) ? value.substring(0, 2).toLowerCase() : "",
+            Grouped.with(Serdes.String(), Serdes.String()))
+          .aggregate(() -> 0L, (aggKey, newValue, aggValue) -> aggValue + newValue.length(),
+            Materialized.with(Serdes.String(), Serdes.Long()));
+
+        aggregated.toStream().foreach((word, count) -> System.out.println("Word: " + word + " -> " + count));
+
+        streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "aggregation-id");
+        final Topology topology = builder.build();
+        KafkaStreams streams = new KafkaStreams(topology, streamsConfiguration);
+
+        streams.cleanUp();
+        streams.start();
+
+        producer.send(new ProducerRecord<String, String>(aggregationTopic, "1", "one"));
+        producer.send(new ProducerRecord<String, String>(aggregationTopic, "2", "two"));
+        producer.send(new ProducerRecord<String, String>(aggregationTopic, "3", "three"));
+        producer.send(new ProducerRecord<String, String>(aggregationTopic, "4", "four"));
+        producer.send(new ProducerRecord<String, String>(aggregationTopic, "5", "five"));
+
+        Thread.sleep(5000);
+        streams.close();
+
+    }
+
+    @Test
+    public void shouldTestWindowingJoinStatefulTransformations() throws InterruptedException {
+        final StreamsBuilder builder = new StreamsBuilder();
+
+       KStream<String, String> leftSource = builder.stream(LEFT_TOPIC);
+       KStream<String, String> rightSource = builder.stream(RIGHT_TOPIC);
+
+        KStream<String, String> leftRightSource = leftSource.outerJoin(rightSource,
+         (leftValue, rightValue) -> "left=" + leftValue + ", right=" + rightValue,
+         JoinWindows.of(Duration.ofSeconds(5)))
+           .groupByKey()
+           .reduce(((key, lastValue) -> lastValue))
+           .toStream();
+
+        leftRightSource.foreach((key, value) -> System.out.println("(key= " + key + ") -> (" + value + ")"));
+
+        final Topology topology = builder.build();
+        streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "windowing-join-id");
+        KafkaStreams streams = new KafkaStreams(topology, streamsConfiguration);
+
+        streams.cleanUp();
+        streams.start();
+
+        producer.send(new ProducerRecord<String, String>(LEFT_TOPIC, "1", "left"));
+        producer.send(new ProducerRecord<String, String>(RIGHT_TOPIC, "2", "right"));
+
+        Thread.sleep(2000);
+        streams.close();
+    }
+
+    @Test
+    public void shouldTestWordCountWithInteractiveQueries() throws InterruptedException {
+
+        final Serde<String> stringSerde = Serdes.String();
+        final StreamsBuilder builder = new StreamsBuilder();
+        final KStream<String, String>
+          textLines = builder.stream(TEXT_LINES_TOPIC, Consumed.with(Serdes.String(), Serdes.String()));
+
+        final KGroupedStream<String, String> groupedByWord = textLines
+          .flatMapValues(value -> Arrays.asList(value.toLowerCase().split("\\W+")))
+          .groupBy((key, word) -> word, Grouped.with(stringSerde, stringSerde));
+
+        groupedByWord.count(Materialized.<String, Long, KeyValueStore<Bytes, byte[]>>as("WordCountsStore")
+          .withValueSerde(Serdes.Long()));
+
+        streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-interactive-queries");
+
+        final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
+        streams.cleanUp();
+        streams.start();
+
+        producer.send(new ProducerRecord<String, String>(TEXT_LINES_TOPIC, "1", TEXT_EXAMPLE_1));
+        producer.send(new ProducerRecord<String, String>(TEXT_LINES_TOPIC, "2", TEXT_EXAMPLE_2));
+
+        Thread.sleep(2000);
+        ReadOnlyKeyValueStore<String, Long> keyValueStore =
+          streams.store(StoreQueryParameters.fromNameAndType(
+            "WordCountsStore", QueryableStoreTypes.keyValueStore()));
+
+        KeyValueIterator<String, Long> range = keyValueStore.all();
+        while (range.hasNext()) {
+            KeyValue<String, Long> next = range.next();
+            System.out.println("Count for " + next.key + ": " + next.value);
+        }
+
+        streams.close();
+    }
+
+    private static KafkaProducer<String, String> createKafkaProducer() {
+
+        Properties props = new Properties();
+        props.put(BOOTSTRAP_SERVERS_CONFIG, kafka.getBootstrapServers());
+        props.put(KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
+        props.put(VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
+
+        return new KafkaProducer(props);
+
+    }
+}
+
+
--- a/libraries-data/src/test/java/com/baeldung/kafkastreams/KafkaStreamsLiveTest.java
+++ b/libraries-data/src/test/java/com/baeldung/kafkastreams/KafkaStreamsLiveTest.java
@ -1,62 +0,0 @@
-package com.baeldung.kafkastreams;
-
-import org.apache.kafka.clients.consumer.ConsumerConfig;
-import org.apache.kafka.common.serialization.Serde;
-import org.apache.kafka.common.serialization.Serdes;
-import org.apache.kafka.streams.KafkaStreams;
-import org.apache.kafka.streams.StreamsConfig;
-import org.apache.kafka.streams.kstream.KStream;
-import org.apache.kafka.streams.kstream.KStreamBuilder;
-import org.apache.kafka.streams.kstream.KTable;
-import org.apache.kafka.test.TestUtils;
-import org.junit.Ignore;
-import org.junit.Test;
-
-import java.util.Arrays;
-import java.util.Properties;
-import java.util.regex.Pattern;
-
-public class KafkaStreamsLiveTest {
-    private String bootstrapServers = "localhost:9092";
-
-    @Test
-    @Ignore("it needs to have kafka broker running on local")
-    public void shouldTestKafkaStreams() throws InterruptedException {
-        //given
-        String inputTopic = "inputTopic";
-
-        Properties streamsConfiguration = new Properties();
-        streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-live-test");
-        streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
-        streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
-        streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
-        streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000);
-        streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
-        // Use a temporary directory for storing state, which will be automatically removed after the test.
-        streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());
-
-        //when
-        KStreamBuilder builder = new KStreamBuilder();
-        KStream<String, String> textLines = builder.stream(inputTopic);
-        Pattern pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS);
-
-        KTable<String, Long> wordCounts = textLines
-                .flatMapValues(value -> Arrays.asList(pattern.split(value.toLowerCase())))
-                .groupBy((key, word) -> word)
-                .count();
-
-        wordCounts.foreach((word, count) -> System.out.println("word: " + word + " -> " + count));
-
-        String outputTopic = "outputTopic";
-        final Serde<String> stringSerde = Serdes.String();
-        final Serde<Long> longSerde = Serdes.Long();
-        wordCounts.to(stringSerde, longSerde, outputTopic);
-
-        KafkaStreams streams = new KafkaStreams(builder, streamsConfiguration);
-        streams.start();
-
-        //then
-        Thread.sleep(30000);
-        streams.close();
-    }
-}