您的位置:首页 > 数据库

spark-sparkSQL

2014-11-06 11:33 89 查看
package com.spark.test

import org.apache.spark.{ SparkConf, SparkContext }
import org.apache.spark.sql.SQLContext

case class Person(name: String, age: Int) extends java.io.Serializable

object PeopleDao {

def main(args: Array[String]) {
val sparkConf = new SparkConf().setAppName("People")
val sc = new SparkContext(sparkConf)
val sqlContext = new SQLContext(sc)
//    import sqlContext._
import sqlContext.createSchemaRDD
val people = sc.textFile("/ruson/people.txt").map(_.split(",")).map(p => Person(p(0), p(1).trim.toInt))
//1.1.0 method
//    people.registerTempTable("people")
//1.0.0 method
people.registerAsTable("people")
val teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
teenagers.map(t => "Name: " + t(0)).collect().foreach(println)
}
}

用eclipse打成jar,上传people.txt文件到hdfs上的/ruson目录下

在shell下运行:

./bin/spark-submit \

--class com.spark.test.PeopleDao \

--master spark://datanode3:7077 \

--executor-memory 1G \

--total-executor-cores 4 \

/home/lixun/people.jar
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: