java-20079: upgrade apache-spark module libraries (#14087)

This commit is contained in:
Ehsan Sasanianno 2023-05-23 18:57:01 +02:00 committed by GitHub
parent a36cca4ab7
commit e8b723d563
3 changed files with 21 additions and 16 deletions

View File

@ -17,17 +17,17 @@
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_2.12</artifactId>
<version>${org.apache.spark.spark-core.version}</version> <version>${org.apache.spark.spark-core.version}</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId> <artifactId>spark-sql_2.12</artifactId>
<version>${org.apache.spark.spark-sql.version}</version> <version>${org.apache.spark.spark-sql.version}</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-graphx_2.11</artifactId> <artifactId>spark-graphx_2.12</artifactId>
<version>${org.apache.spark.spark-graphx.version}</version> <version>${org.apache.spark.spark-graphx.version}</version>
</dependency> </dependency>
<dependency> <dependency>
@ -37,22 +37,22 @@
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId> <artifactId>spark-streaming_2.12</artifactId>
<version>${org.apache.spark.spark-streaming.version}</version> <version>${org.apache.spark.spark-streaming.version}</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-mllib_2.11</artifactId> <artifactId>spark-mllib_2.12</artifactId>
<version>${org.apache.spark.spark-mllib.version}</version> <version>${org.apache.spark.spark-mllib.version}</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-10_2.11</artifactId> <artifactId>spark-streaming-kafka-0-10_2.12</artifactId>
<version>${org.apache.spark.spark-streaming-kafka.version}</version> <version>${org.apache.spark.spark-streaming-kafka.version}</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.datastax.spark</groupId> <groupId>com.datastax.spark</groupId>
<artifactId>spark-cassandra-connector_2.11</artifactId> <artifactId>spark-cassandra-connector_2.12</artifactId>
<version>${com.datastax.spark.spark-cassandra-connector.version}</version> <version>${com.datastax.spark.spark-cassandra-connector.version}</version>
</dependency> </dependency>
<dependency> <dependency>
@ -97,17 +97,17 @@
</repositories> </repositories>
<properties> <properties>
<org.apache.spark.spark-core.version>2.4.8</org.apache.spark.spark-core.version> <org.apache.spark.spark-core.version>3.3.2</org.apache.spark.spark-core.version>
<org.apache.spark.spark-sql.version>2.4.8</org.apache.spark.spark-sql.version> <org.apache.spark.spark-sql.version>3.3.2</org.apache.spark.spark-sql.version>
<org.apache.spark.spark-streaming.version>2.4.8</org.apache.spark.spark-streaming.version> <org.apache.spark.spark-streaming.version>3.3.2</org.apache.spark.spark-streaming.version>
<org.apache.spark.spark-mllib.version>2.4.8</org.apache.spark.spark-mllib.version> <org.apache.spark.spark-mllib.version>3.3.2</org.apache.spark.spark-mllib.version>
<org.apache.spark.spark-graphx.version>2.4.8</org.apache.spark.spark-graphx.version> <org.apache.spark.spark-graphx.version>3.3.2</org.apache.spark.spark-graphx.version>
<graphframes.version>0.8.1-spark3.0-s_2.12</graphframes.version> <graphframes.version>0.8.1-spark3.0-s_2.12</graphframes.version>
<org.apache.spark.spark-streaming-kafka.version>2.4.8</org.apache.spark.spark-streaming-kafka.version> <org.apache.spark.spark-streaming-kafka.version>3.3.2</org.apache.spark.spark-streaming-kafka.version>
<com.datastax.spark.spark-cassandra-connector.version>2.5.2</com.datastax.spark.spark-cassandra-connector.version> <com.datastax.spark.spark-cassandra-connector.version>3.3.0</com.datastax.spark.spark-cassandra-connector.version>
<com.datastax.spark.spark-cassandra-connector-java.version>1.6.0-M1</com.datastax.spark.spark-cassandra-connector-java.version> <com.datastax.spark.spark-cassandra-connector-java.version>1.6.0-M1</com.datastax.spark.spark-cassandra-connector-java.version>
<maven-assembly-plugin.version>3.3.0</maven-assembly-plugin.version> <maven-assembly-plugin.version>3.3.0</maven-assembly-plugin.version>
<postgres.version>42.3.3</postgres.version> <postgres.version>42.5.4</postgres.version>
</properties> </properties>
</project> </project>

View File

@ -16,8 +16,11 @@ import org.apache.log4j.Logger;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.Optional;
import org.apache.spark.api.java.function.Function2; import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.Function3;
import org.apache.spark.streaming.Durations; import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.State;
import org.apache.spark.streaming.StateSpec; import org.apache.spark.streaming.StateSpec;
import org.apache.spark.streaming.api.java.JavaDStream; import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaInputDStream; import org.apache.spark.streaming.api.java.JavaInputDStream;
@ -74,7 +77,8 @@ public class WordCountingAppWithCheckpoint {
JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2<>(s, 1)) JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2<>(s, 1))
.reduceByKey((Function2<Integer, Integer, Integer>) (i1, i2) -> i1 + i2); .reduceByKey((Function2<Integer, Integer, Integer>) (i1, i2) -> i1 + i2);
JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> cumulativeWordCounts = wordCounts.mapWithState(StateSpec.function((word, one, state) -> { JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> cumulativeWordCounts =
wordCounts.mapWithState(StateSpec.function((Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>) (word, one, state) -> {
int sum = one.orElse(0) + (state.exists() ? state.get() : 0); int sum = one.orElse(0) + (state.exists() ? state.get() : 0);
Tuple2<String, Integer> output = new Tuple2<>(word, sum); Tuple2<String, Integer> output = new Tuple2<>(word, sum);
state.update(sum); state.update(sum);

View File

@ -9,6 +9,7 @@ public class SparkDriver implements Serializable {
public static SparkSession getSparkSession() { public static SparkSession getSparkSession() {
return SparkSession.builder() return SparkSession.builder()
.appName("Customer Aggregation pipeline") .appName("Customer Aggregation pipeline")
.config("spark.sql.legacy.timeParserPolicy", "LEGACY")
.master("local") .master("local")
.getOrCreate(); .getOrCreate();