您的位置:首页 > 大数据

白话scala系列四 scala矩阵运算和操作

2016-07-04 17:34 477 查看
在做数据挖掘和机器学习项目的时候发现矩阵运算需要经常用到,虽然Java中提供了Jama包能实现大部分需求,但是无法满足定制化需求。我们写spark程序的时候一般使用scala,所以用scala实现了一些矩阵的类。代码实现了矩阵加、乘、转置、求协方差、求平均等。后续会继续扩充,用以实现特许矩阵操作需求。

class Matrix(private val data:Array[Double],private val rownum:Int){
val colnum = (data.length.toDouble/rownum).ceil.toInt
private val matrix:Array[Array[Double]]={
val matrix:Array[Array[Double]] = Array.ofDim[Double](rownum,colnum)
for(i <- 0 until rownum){
for(j <- 0 until colnum){
val index = i * colnum + j
matrix(i)(j) = if(data.isDefinedAt(index)) data(index) else 0
}
}
matrix
}

override def toString = {
var str = ""
matrix.map((p:Array[Double]) => {p.mkString(" ")}).mkString("\n")
}

def mat(row:Int,col:Int) = {
matrix(row - 1)(col - 1)
}

def *(a:Matrix) = {
if(this.colnum != a.rownum){
}else{
val data:ArrayBuffer[Double] = ArrayBuffer()
for(i <- 0 until this.rownum){
for(j <- 0 until a.colnum){
var num = 0.0
for(k <- 0 until this.colnum){
num += this.matrix(i)(k) * a.matrix(k)(j)
}
data += num
}
}
new Matrix(data.toArray,this.rownum)
}
}

def *(a:Double) = {
val data:ArrayBuffer[Double] = ArrayBuffer()
for(i <- 0 until this.rownum){
for(j <- 0 until this.colnum){
data += this.matrix(i)(j) * a
}
}
new Matrix(data.toArray,this.rownum)
}

def +(a:Matrix) = {
if(this.rownum != a.rownum || this.colnum != a.colnum){
}else{
val data:ArrayBuffer[Double] = ArrayBuffer()
for(i <- 0 until this.rownum){
for(j <- 0 until this.colnum){
data += this.matrix(i)(j) + a.matrix(i)(j)
}
}
new Matrix(data.toArray,this.rownum)
}
}

def transpose() = {
val transposeMatrix = for (i <- Array.range(0,colnum)) yield {
for (rowArray <- this.matrix) yield rowArray(i)
}
new Matrix(transposeMatrix.flatten,colnum)
}

def cov() = {
val data:ArrayBuffer[Double] = ArrayBuffer()
for(i <- 0 until this.transpose.rownum){
for(j <- 0 until this.colnum){
var num = 0.0
for(k <- 0 until this.transpose.colnum){
num += this.transpose.matrix(i)(k) * this.matrix(k)(j)
}
data += num
}
}
new Matrix(data.toArray,this.transpose.rownum)*(1.toDouble/this.rownum)
}

def mean() = {
val meanMatrix:Array[Array[Double]] = Array.ofDim[Double](rownum,colnum)
val propertyMean:Array[Double] = new Array[Double](colnum)
for(j <- 0 until colnum){
var propertyValueSum = 0.0
for(i <- 0 until rownum){
propertyValueSum += this.matrix(i)(j)
}
propertyMean(j) = propertyValueSum/rownum
}
for(j <- 0 until colnum){
for(i <- 0 until rownum){
meanMatrix(i)(j) = this.matrix(i)(j) - propertyMean(j)
}
}
new Matrix(meanMatrix.flatten,rownum)
}
}


实验验证:

val matrix = new Matrix(getDateFromFile,150)

println(matrix.mean.cov)

验证数据(矩阵):

5.1, 3.5, 1.4 , 0.2

4.9, 3.0, 1.4 , 0.2

4.7, 3.2, 1.3 , 0.2

4.6, 3.1, 1.5 , 0.2

5.0, 3.6, 1.4 , 0.2

5.4, 3.9, 1.7 , 0.4

4.6, 3.4, 1.4 , 0.3

5.0, 3.4, 1.5 , 0.2

4.4, 2.9, 1.4 , 0.2

4.9, 3.1, 1.5 , 0.1

5.4, 3.7, 1.5 , 0.2

4.8, 3.4, 1.6 , 0.2

4.8, 3.0, 1.4 , 0.1

4.3, 3.0, 1.1 , 0.1

5.8, 4.0, 1.2 , 0.2

5.7, 4.4, 1.5 , 0.4

5.4, 3.9, 1.3 , 0.4

5.1, 3.5, 1.4 , 0.3

5.7, 3.8, 1.7 , 0.3

5.1, 3.8, 1.5 , 0.3

5.4, 3.4, 1.7 , 0.2

5.1, 3.7, 1.5 , 0.4

4.6, 3.6, 1.0 , 0.2

5.1, 3.3, 1.7 , 0.5

4.8, 3.4, 1.9 , 0.2

5.0, 3.0, 1.6 , 0.2

5.0, 3.4, 1.6 , 0.4

5.2, 3.5, 1.5 , 0.2

5.2, 3.4, 1.4 , 0.2

4.7, 3.2, 1.6 , 0.2

4.8, 3.1, 1.6 , 0.2

5.4, 3.4, 1.5 , 0.4

5.2, 4.1, 1.5 , 0.1

5.5, 4.2, 1.4 , 0.2

4.9, 3.1, 1.5 , 0.1

5.0, 3.2, 1.2 , 0.2

5.5, 3.5, 1.3 , 0.2

4.9, 3.1, 1.5 , 0.1

4.4, 3.0, 1.3 , 0.2

5.1, 3.4, 1.5 , 0.2

5.0, 3.5, 1.3 , 0.3

4.5, 2.3, 1.3 , 0.3

4.4, 3.2, 1.3 , 0.2

5.0, 3.5, 1.6 , 0.6

5.1, 3.8, 1.9 , 0.4

4.8, 3.0, 1.4 , 0.3

5.1, 3.8, 1.6 , 0.2

4.6, 3.2, 1.4 , 0.2

5.3, 3.7, 1.5 , 0.2

5.0, 3.3, 1.4 , 0.2

7.0, 3.2, 4.7 , 1.4

6.4, 3.2, 4.5 , 1.5

6.9, 3.1, 4.9 , 1.5

5.5, 2.3, 4.0 , 1.3

6.5, 2.8, 4.6 , 1.5

5.7, 2.8, 4.5 , 1.3

6.3, 3.3, 4.7 , 1.6

4.9, 2.4, 3.3 , 1.0

6.6, 2.9, 4.6 , 1.3

5.2, 2.7, 3.9 , 1.4

5.0, 2.0, 3.5 , 1.0

5.9, 3.0, 4.2 , 1.5

6.0, 2.2, 4.0 , 1.0

6.1, 2.9, 4.7 , 1.4

5.6, 2.9, 3.6 , 1.3

6.7, 3.1, 4.4 , 1.4

5.6, 3.0, 4.5 , 1.5

5.8, 2.7, 4.1 , 1.0

6.2, 2.2, 4.5 , 1.5

5.6, 2.5, 3.9 , 1.1

5.9, 3.2, 4.8 , 1.8

6.1, 2.8, 4.0 , 1.3

6.3, 2.5, 4.9 , 1.5

6.1, 2.8, 4.7 , 1.2

6.4, 2.9, 4.3 , 1.3

6.6, 3.0, 4.4 , 1.4

6.8, 2.8, 4.8 , 1.4

6.7, 3.0, 5.0 , 1.7

6.0, 2.9, 4.5 , 1.5

5.7, 2.6, 3.5 , 1.0

5.5, 2.4, 3.8 , 1.1

5.5, 2.4, 3.7 , 1.0

5.8, 2.7, 3.9 , 1.2

6.0, 2.7, 5.1 , 1.6

5.4, 3.0, 4.5 , 1.5

6.0, 3.4, 4.5 , 1.6

6.7, 3.1, 4.7 , 1.5

6.3, 2.3, 4.4 , 1.3

5.6, 3.0, 4.1 , 1.3

5.5, 2.5, 4.0 , 1.3

5.5, 2.6, 4.4 , 1.2

6.1, 3.0, 4.6 , 1.4

5.8, 2.6, 4.0 , 1.2

5.0, 2.3, 3.3 , 1.0

5.6, 2.7, 4.2 , 1.3

5.7, 3.0, 4.2 , 1.2

5.7, 2.9, 4.2 , 1.3

6.2, 2.9, 4.3 , 1.3

5.1, 2.5, 3.0 , 1.1

5.7, 2.8, 4.1 , 1.3

6.3, 3.3, 6.0 , 2.5

5.8, 2.7, 5.1 , 1.9

7.1, 3.0, 5.9 , 2.1

6.3, 2.9, 5.6 , 1.8

6.5, 3.0, 5.8 , 2.2

7.6, 3.0, 6.6 , 2.1

4.9, 2.5, 4.5 , 1.7

7.3, 2.9, 6.3 , 1.8

6.7, 2.5, 5.8 , 1.8

7.2, 3.6, 6.1 , 2.5

6.5, 3.2, 5.1 , 2.0

6.4, 2.7, 5.3 , 1.9

6.8, 3.0, 5.5 , 2.1

5.7, 2.5, 5.0 , 2.0

5.8, 2.8, 5.1 , 2.4

6.4, 3.2, 5.3 , 2.3

6.5, 3.0, 5.5 , 1.8

7.7, 3.8, 6.7 , 2.2

7.7, 2.6, 6.9 , 2.3

6.0, 2.2, 5.0 , 1.5

6.9, 3.2, 5.7 , 2.3

5.6, 2.8, 4.9 , 2.0

7.7, 2.8, 6.7 , 2.0

6.3, 2.7, 4.9 , 1.8

6.7, 3.3, 5.7 , 2.1

7.2, 3.2, 6.0 , 1.8

6.2, 2.8, 4.8 , 1.8

6.1, 3.0, 4.9 , 1.8

6.4, 2.8, 5.6 , 2.1

7.2, 3.0, 5.8 , 1.6

7.4, 2.8, 6.1 , 1.9

7.9, 3.8, 6.4 , 2.0

6.4, 2.8, 5.6 , 2.2

6.3, 2.8, 5.1 , 1.5

6.1, 2.6, 5.6 , 1.4

7.7, 3.0, 6.1 , 2.3

6.3, 3.4, 5.6 , 2.4

6.4, 3.1, 5.5 , 1.8

6.0, 3.0, 4.8 , 1.8

6.9, 3.1, 5.4 , 2.1

6.7, 3.1, 5.6 , 2.4

6.9, 3.1, 5.1 , 2.3

5.8, 2.7, 5.1 , 1.9

6.8, 3.2, 5.9 , 2.3

6.7, 3.3, 5.7 , 2.5

6.7, 3.0, 5.2 , 2.3

6.3, 2.5, 5.0 , 1.9

6.5, 3.0, 5.2 , 2.0

6.2, 3.4, 5.4 , 2.3

5.9, 3.0, 5.1 , 1.8

验证结果:

0.6811222222222222 -0.03900666666666667 1.2651911111111114 0.513457777777778

-0.03900666666666667 0.18675066666666673 -0.31956800000000013 -0.11719466666666663

1.2651911111111114 -0.31956800000000013 3.092424888888886 1.2877448888888894

0.513457777777778 -0.11719466666666663 1.2877448888888894 0.5785315555555559
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  scala 大数据分析