您的位置：首页 > 其它

Scala入门学习之【wordcount】

2017-11-02 17:12 330 查看

[plain] view
plain copy

val lines = List("hello tom hello jerry", "hello jerry", "hello kitty")



   println(lines.flatMap(_.split(" ")))

   //List(hello, tom, hello, jerry, hello, jerry, hello, kitty)

   //flatMap中的_表示一行内容 hello tom hello jerry  一共有三行  即对每行操作

   //flatMap(_.split(" ")) 中的_.split(" ") 就相当于"hello tom hello jerry".split(" ")





   println(lines.flatMap(_.split(" ")).map((_ ,1)))

   //List((hello,1), (tom,1), (hello,1), (jerry,1), (hello,1), (jerry,1), (hello,1), (kitty,1))

   //返回的List集合，集合里边每一个元素为元组  访问元组的第一个元素为_._1

   //例如：println((""hello",1)._1)  结果为hello

   //lines.flatMap(_.split(" ")).map((_ ,1))中的map((_ ,1) _表示每一个单词，1表示每出现一次计数为1







   println(lines.flatMap(_.split(" ")).map((_ ,1)).groupBy(_._1))

   //Map(tom -> List((tom,1)), kitty -> List((kitty,1)), jerry -> List((jerry,1), (jerry,1)), hello -> List((hello,1), (hello,1), (hello,1), (hello,1)))

  // lines.flatMap(_.split(" ")).map((_ ,1)).groupBy(_._1)中的groupBy(_._1)表示按照list中每个元组中的第一个字段分组即拿第一个字段作为key，返回结果是一个大Map

   //groupBy(_._1)中的第一个_表示list中的每一个元组,而  ._1  表示取每一个元组中的第一个元素









   println(lines.flatMap(_.split(" ")).map((_ ,1)).groupBy(_._1).mapValues(_.foldLeft(0)(_+_._2)))

   //Map(tom -> 1, kitty -> 1, jerry -> 2, hello -> 4)

  // lines.flatMap(_.split(" ")).map((_ ,1)).groupBy(_._1).mapValues()中的mapValues()仅仅会对value处理，处理完了把key 结合起来

   //  mapValues()中的第一个_表示map里边的value ，而value是一个list

   //lines.flatMap(_.split(" ")).map((_ ,1)).groupBy(_._1).mapValues(_.foldLeft(0)(_+_._2))  中的foldLeft(0)是给一个初始值

    // (_+_._2)中的第一个_表示初始值或者累加过的值，第二个_表示List里边的元组，._2表示拿到元组中的第二个字段









   println(lines.flatMap(_.split(" ")).map((_ ,1)).groupBy(_._1).mapValues(_.foldLeft(0)(_+_._2)).toList)

  // List((tom,1), (kitty,1), (jerry,2), (hello,4))

   // 转化为List







   println(lines.flatMap(_.split(" ")).map((_ ,1)).groupBy(_._1).mapValues(_.foldLeft(0)(_+_._2)).toList.sortBy(_._2))

//List((tom,1), (kitty,1), (jerry,2), (hello,4))

   //sortBy(_._2)中的第一个_ 表示每一个元组，第二个._2 每个元组中的第二个字段







   println(lines.flatMap(_.split(" ")).map((_ ,1)).groupBy(_._1).mapValues(_.foldLeft(0)(_+_._2)).toList.sortBy(_._2).reverse)

   //List((hello,4), (jerry,2), (kitty,1), (tom,1))

   //reverse表示降序排序

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航