java-20079: upgrade apache-spark module libraries (#14087)
This commit is contained in:
parent
a36cca4ab7
commit
e8b723d563
|
@ -17,17 +17,17 @@
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-core_2.11</artifactId>
|
<artifactId>spark-core_2.12</artifactId>
|
||||||
<version>${org.apache.spark.spark-core.version}</version>
|
<version>${org.apache.spark.spark-core.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-sql_2.11</artifactId>
|
<artifactId>spark-sql_2.12</artifactId>
|
||||||
<version>${org.apache.spark.spark-sql.version}</version>
|
<version>${org.apache.spark.spark-sql.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-graphx_2.11</artifactId>
|
<artifactId>spark-graphx_2.12</artifactId>
|
||||||
<version>${org.apache.spark.spark-graphx.version}</version>
|
<version>${org.apache.spark.spark-graphx.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -37,22 +37,22 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-streaming_2.11</artifactId>
|
<artifactId>spark-streaming_2.12</artifactId>
|
||||||
<version>${org.apache.spark.spark-streaming.version}</version>
|
<version>${org.apache.spark.spark-streaming.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-mllib_2.11</artifactId>
|
<artifactId>spark-mllib_2.12</artifactId>
|
||||||
<version>${org.apache.spark.spark-mllib.version}</version>
|
<version>${org.apache.spark.spark-mllib.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-streaming-kafka-0-10_2.11</artifactId>
|
<artifactId>spark-streaming-kafka-0-10_2.12</artifactId>
|
||||||
<version>${org.apache.spark.spark-streaming-kafka.version}</version>
|
<version>${org.apache.spark.spark-streaming-kafka.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.datastax.spark</groupId>
|
<groupId>com.datastax.spark</groupId>
|
||||||
<artifactId>spark-cassandra-connector_2.11</artifactId>
|
<artifactId>spark-cassandra-connector_2.12</artifactId>
|
||||||
<version>${com.datastax.spark.spark-cassandra-connector.version}</version>
|
<version>${com.datastax.spark.spark-cassandra-connector.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -97,17 +97,17 @@
|
||||||
</repositories>
|
</repositories>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<org.apache.spark.spark-core.version>2.4.8</org.apache.spark.spark-core.version>
|
<org.apache.spark.spark-core.version>3.3.2</org.apache.spark.spark-core.version>
|
||||||
<org.apache.spark.spark-sql.version>2.4.8</org.apache.spark.spark-sql.version>
|
<org.apache.spark.spark-sql.version>3.3.2</org.apache.spark.spark-sql.version>
|
||||||
<org.apache.spark.spark-streaming.version>2.4.8</org.apache.spark.spark-streaming.version>
|
<org.apache.spark.spark-streaming.version>3.3.2</org.apache.spark.spark-streaming.version>
|
||||||
<org.apache.spark.spark-mllib.version>2.4.8</org.apache.spark.spark-mllib.version>
|
<org.apache.spark.spark-mllib.version>3.3.2</org.apache.spark.spark-mllib.version>
|
||||||
<org.apache.spark.spark-graphx.version>2.4.8</org.apache.spark.spark-graphx.version>
|
<org.apache.spark.spark-graphx.version>3.3.2</org.apache.spark.spark-graphx.version>
|
||||||
<graphframes.version>0.8.1-spark3.0-s_2.12</graphframes.version>
|
<graphframes.version>0.8.1-spark3.0-s_2.12</graphframes.version>
|
||||||
<org.apache.spark.spark-streaming-kafka.version>2.4.8</org.apache.spark.spark-streaming-kafka.version>
|
<org.apache.spark.spark-streaming-kafka.version>3.3.2</org.apache.spark.spark-streaming-kafka.version>
|
||||||
<com.datastax.spark.spark-cassandra-connector.version>2.5.2</com.datastax.spark.spark-cassandra-connector.version>
|
<com.datastax.spark.spark-cassandra-connector.version>3.3.0</com.datastax.spark.spark-cassandra-connector.version>
|
||||||
<com.datastax.spark.spark-cassandra-connector-java.version>1.6.0-M1</com.datastax.spark.spark-cassandra-connector-java.version>
|
<com.datastax.spark.spark-cassandra-connector-java.version>1.6.0-M1</com.datastax.spark.spark-cassandra-connector-java.version>
|
||||||
<maven-assembly-plugin.version>3.3.0</maven-assembly-plugin.version>
|
<maven-assembly-plugin.version>3.3.0</maven-assembly-plugin.version>
|
||||||
<postgres.version>42.3.3</postgres.version>
|
<postgres.version>42.5.4</postgres.version>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
</project>
|
</project>
|
|
@ -16,8 +16,11 @@ import org.apache.log4j.Logger;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
import org.apache.spark.api.java.Optional;
|
||||||
import org.apache.spark.api.java.function.Function2;
|
import org.apache.spark.api.java.function.Function2;
|
||||||
|
import org.apache.spark.api.java.function.Function3;
|
||||||
import org.apache.spark.streaming.Durations;
|
import org.apache.spark.streaming.Durations;
|
||||||
|
import org.apache.spark.streaming.State;
|
||||||
import org.apache.spark.streaming.StateSpec;
|
import org.apache.spark.streaming.StateSpec;
|
||||||
import org.apache.spark.streaming.api.java.JavaDStream;
|
import org.apache.spark.streaming.api.java.JavaDStream;
|
||||||
import org.apache.spark.streaming.api.java.JavaInputDStream;
|
import org.apache.spark.streaming.api.java.JavaInputDStream;
|
||||||
|
@ -74,7 +77,8 @@ public class WordCountingAppWithCheckpoint {
|
||||||
JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2<>(s, 1))
|
JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2<>(s, 1))
|
||||||
.reduceByKey((Function2<Integer, Integer, Integer>) (i1, i2) -> i1 + i2);
|
.reduceByKey((Function2<Integer, Integer, Integer>) (i1, i2) -> i1 + i2);
|
||||||
|
|
||||||
JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> cumulativeWordCounts = wordCounts.mapWithState(StateSpec.function((word, one, state) -> {
|
JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> cumulativeWordCounts =
|
||||||
|
wordCounts.mapWithState(StateSpec.function((Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>) (word, one, state) -> {
|
||||||
int sum = one.orElse(0) + (state.exists() ? state.get() : 0);
|
int sum = one.orElse(0) + (state.exists() ? state.get() : 0);
|
||||||
Tuple2<String, Integer> output = new Tuple2<>(word, sum);
|
Tuple2<String, Integer> output = new Tuple2<>(word, sum);
|
||||||
state.update(sum);
|
state.update(sum);
|
||||||
|
|
|
@ -9,6 +9,7 @@ public class SparkDriver implements Serializable {
|
||||||
public static SparkSession getSparkSession() {
|
public static SparkSession getSparkSession() {
|
||||||
return SparkSession.builder()
|
return SparkSession.builder()
|
||||||
.appName("Customer Aggregation pipeline")
|
.appName("Customer Aggregation pipeline")
|
||||||
|
.config("spark.sql.legacy.timeParserPolicy", "LEGACY")
|
||||||
.master("local")
|
.master("local")
|
||||||
.getOrCreate();
|
.getOrCreate();
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue