Spark 2.0从入门到精通245讲——操作RDD(transformation案例实战)
2017-10-12 17:19
791 查看
package cn.spark.study.core import org.apache.spark.SparkConf import org.apache.spark.SparkContext /** * @author Administrator */ object TransformationOperation { def main(args: Array[String]) { // map() // filter() // flatMap() // groupByKey() // reduceByKey() // sortByKey() join() } def map() { val conf = new SparkConf() .setAppName("map") .setMaster("local") val sc = new SparkContext(conf) val numbers = Array(1, 2, 3, 4, 5) val numberRDD = sc.parallelize(numbers, 1) val multipleNumberRDD = numberRDD.map { num => num * 2 } multipleNumberRDD.foreach { num => println(num) } } def filter() { val conf = new SparkConf() .setAppName("filter") .setMaster("local") val sc = new SparkContext(conf) val numbers = Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) val numberRDD = sc.parallelize(numbers, 1) val evenNumberRDD = numberRDD.filter { num => num % 2 == 0 } evenNumberRDD.foreach { num => println(num) } } def flatMap() { val conf = new SparkConf() .setAppName("flatMap") .setMaster("local") val sc = new SparkContext(conf) val lineArray = Array("hello you", "hello me", "hello world") val lines = sc.parallelize(lineArray, 1) val words = lines.flatMap { line => line.split(" ") } words.foreach { word => println(word) } } def groupByKey() { val conf = new SparkConf() .setAppName("groupByKey") .setMaster("local") val sc = new SparkContext(conf) val scoreList = Array(Tuple2("class1", 80), Tuple2("class2", 75), Tuple2("class1", 90), Tuple2("class2", 60)) val scores = sc.parallelize(scoreList, 1) val groupedScores = scores.groupByKey() groupedScores.foreach(score => { println(score._1); score._2.foreach { singleScore => println(singleScore) }; println("=============================") }) } def reduceByKey() { val conf = new SparkConf() .setAppName("groupByKey") .setMaster("local") val sc = new SparkContext(conf) val scoreList = Array(Tuple2("class1", 80), Tuple2("class2", 75), Tuple2("class1", 90), Tuple2("class2", 60)) val scores = sc.parallelize(scoreList, 1) val totalScores = scores.reduceByKey(_ + _) totalScores.foreach(classScore => println(classScore._1 + ": " + classScore._2)) } def sortByKey() { val conf = new SparkConf() .setAppName("sortByKey") .setMaster("local") val sc = new SparkContext(conf) val scoreList = Array(Tuple2(65, "leo"), Tuple2(50, "tom"), Tuple2(100, "marry"), Tuple2(85, "jack")) val scores = sc.parallelize(scoreList, 1) val sortedScores = scores.sortByKey(false) sortedScores.foreach(studentScore => println(studentScore._1 + ": " + studentScore._2)) } def join() { val conf = new SparkConf() .setAppName("join") .setMaster("local") val sc = new SparkContext(conf) val studentList = Array( Tuple2(1, "leo"), Tuple2(2, "jack"), Tuple2(3, "tom")); val scoreList = Array( Tuple2(1, 100), Tuple2(2, 90), Tuple2(3, 60)); val students = sc.parallelize(studentList); val scores = sc.parallelize(scoreList); val studentScores = students.join(scores) studentScores.foreach(studentScore => { println("student id: " + studentScore._1); println("student name: " + studentScore._2._1) println("student socre: " + studentScore._2._2) println("=======================================") }) } def cogroup() { } }
相关文章推荐
- Spark 2.0从入门到精通245讲——操作RDD(action案例实战)
- Spark RDD/Core 编程 API入门系列 之rdd实战(rdd基本操作实战及transformation和action流程图)(源码)(三)
- Spark核心编程:操作RDD(transformation和action案例实战)
- Spark 2.0从入门到精通:Scala编程、大数据开发、上百个实战案例、内核源码深度剖析(278讲全)
- spark实战之RDD的cache或persist操作不会触发transformation计算
- Spark修炼之道(进阶篇)——Spark入门到精通:第十节 Spark SQL案例实战(一)
- Spark RDD/Core 编程 API入门系列之动手实战和调试Spark文件操作、动手实战操作搜狗日志文件、搜狗日志文件深入实战(二)
- 03、操作RDD(transformation和action案例实战)
- spark的RDD中的action(执行)和transformation(转换)两种操作中常见函数介绍
- Scala入门到精通——第四节 Set、Map、Tuple、队列操作实战
- SPARK 使用Java 在IDE中实战RDD和DataFrame动态转换操作
- 精通Spark:Spark内核剖析、源码解读、性能优化和商业案例实战
- 【备忘】年薪50万2017年最新北风网Spark2.0从入门到精通教程
- 【Spark】RDD操作详解1——Transformation和Actions概况
- spark RDD transformation和action操作
- Spark RDD概念学习系列之transformation操作
- 《Spark商业案例与性能调优实战100课》第1课:商业案例之通过RDD实现分析大数据电影点评系统中电影的用户行为信息
- Spark入门到精通视频学习资料--第八章:项目实战(2讲)
- 【备忘】年薪50万2017年最新北风网Spark2.0从入门到精通教程
- Scala入门到精通——第四节 Set、Map、Tuple、队列操作实战