Bael 1166 intro apache spark (#2812)
* Changes for BAEL-1166 * Changes for BAEL_1166 * Changes for BAEL 1166 * Changes for BAEL 1166
This commit is contained in:
parent
4ebeb99911
commit
3085db7b30
|
@ -0,0 +1,44 @@
|
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>com.baeldung</groupId>
|
||||
<artifactId>apache-spark</artifactId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<name>apache-spark</name>
|
||||
<url>http://maven.apache.org</url>
|
||||
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.10 -->
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.10</artifactId>
|
||||
<version>2.2.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>3.8.1</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.7.0</version>
|
||||
<configuration>
|
||||
<source>1.8</source>
|
||||
<target>1.8</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
|
@ -0,0 +1,53 @@
|
|||
package com.baeldung;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaPairRDD;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.api.java.function.*;
|
||||
import org.apache.spark.api.java.function.Function2;
|
||||
import org.apache.spark.api.java.function.PairFunction;
|
||||
|
||||
import scala.Tuple2;
|
||||
|
||||
public class WordCount {
|
||||
|
||||
private static final Pattern SPACE = Pattern.compile(" ");
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
if (args.length < 1) {
|
||||
System.err.println("Usage: JavaWordCount <file>");
|
||||
System.exit(1);
|
||||
}
|
||||
SparkConf sparkConf = new SparkConf().setAppName("JavaWordCount").setMaster("local");
|
||||
JavaSparkContext ctx = new JavaSparkContext(sparkConf);
|
||||
JavaRDD<String> lines = ctx.textFile(args[0], 1);
|
||||
|
||||
JavaRDD<String> words = lines.flatMap(s -> Arrays.asList(SPACE.split(s)).iterator());
|
||||
JavaPairRDD<String, Integer> ones = words.mapToPair(
|
||||
new PairFunction<String, String, Integer>() {
|
||||
@Override
|
||||
public Tuple2<String, Integer> call(String s) {
|
||||
return new Tuple2<>(s, 1);
|
||||
}
|
||||
});
|
||||
|
||||
JavaPairRDD<String, Integer> counts = ones.reduceByKey(
|
||||
new Function2<Integer, Integer, Integer>() {
|
||||
@Override
|
||||
public Integer call(Integer i1, Integer i2) {
|
||||
return i1 + i2;
|
||||
}
|
||||
});
|
||||
|
||||
List<Tuple2<String, Integer>> output = counts.collect();
|
||||
for (Tuple2<?, ?> tuple : output) {
|
||||
System.out.println(tuple._1() + ": " + tuple._2());
|
||||
}
|
||||
ctx.stop();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
Hello from Baeldung
|
||||
Keep Learning Spark
|
||||
Bye from Baeldung
|
Loading…
Reference in New Issue