sklearn 源码解析 基本线性模型 岭回归 ridge.py(1)
2016-08-18 07:21
381 查看
对于前面已经提到的类及一些细节不再给出。对于稀疏矩阵的了解是必要的。
from abc import ABCMeta, abstractmethod
import warnings
import numpy as np
from scipy import linalg
from scipy import sparse
from scipy.sparse import linalg as sp_linalg
from .base import LinearCla
4000
ssifierMixin, LinearModel, _rescale_data
from .sag import sag_solver :是一种随机平均梯度下降式的求解岭回归与logistic回归的包,
使用梯度下降法,
这种算法收敛很快。
from ..base import RegressorMixin
from ..utils.extmath import safe_aparse_dot
from ..utils.extmath import row_norms: 行范数,不支持稀疏矩阵。
from ..utils import check_X_y
from ..utils import check_array :转换为ndarray类型。
from ..utils import check_consistent_length: 检查一个ndarray的list 是否所有元素第一个
维度都相等。(i.e. same length)
from ..utils import column_or_1d: 特殊的拉直函数,接受类似feature形式的ndarray,
即在第二个维度上只有1维,并将其按列拉直为以为数组。
from ..preprocessing import LabelBinarizer:
对一对多问题将标签进行二值化。
from ..model_selection import GridSearchCV
from ..externsls import six
from ..metrics.scorer import check_scoring: 对能够进行返回score估计的模型,返回进行
score计算的函数。
下面先不对具体的类,而是接口进行说明。(无组织架构)
sp_linalg.aslinearoperator: 将对象(ndarray, sparse, matrix an so on)转换为线性算子。
np.empty(shape): 返回相应shape的未初始化的ndarray.
sp_linalg.cg: 使用共轭梯度(Conjugate Gradient)法解线性系统。
sq_linalg.lsqr: lsqr :QR分解。
ndarray.flat:(。。。)
np.atleast_1d: 标量化为一维数组,高维保持。
_solve_sparse_cg:
这里将在理论意义上求逆矩阵的过程转换为线性系统的解。
所用到的线性算子仅仅是定义了对于矩阵的变换,matvec 向量乘积变换,
rmatvec 共轭转置变换。
函数分n_features 与n_samples的大小进行了分类,可以证明在取逆的情况
下是相等的。(Kernel Ridge Regression.pdf)
返回系数
_solve_lsqr:
返回线性系统的最小二乘解。
_solve_cholesky:
由于在linalg.solve中选择了sym_pos为True,故指定了矩阵为对称正定矩阵,
采用cholesky分解来解(分解为三角阵的内积)
A.flat[::n_features + 1]实现了对对角元素的简写。
_solve_cholesky_kernel:
该函数允许对样本赋予权重,唯一与_solve_cholesky_kernel的不同是其使用的核
矩阵需要给出。
还提供了当解等式linalg.solve失效时使用linalg.lstsq求最小二乘解的方案。
_solve_svd:
这里仅仅是将上面_solve_cholesky_kernel的过程先将X进行奇异值分解,
将奇异值推导的岭回归结果求解出来。
这里以1e-15为阈值,砍掉了小的奇异值(与scipy.linalg.pinv求广义逆矩阵
特征值阈值相同)
ridge_regression:
这仅仅是对上面所提到的具体方法的统一接口。
岭回归当数据阵为sparse时仅能用随机梯度下降法"sag"求解。
当solver选项为'auto'时,对非稀疏矩阵或有加权的情况,采用cholesky
分解法解,否则使用sparse_cg
n_features > n_samples 在cholesky分解下采用核方法(仅仅是不变的线性核)
从这里可以看到当矩阵为奇异时采用svd奇异值分解求解。
svd不支持稀疏矩阵求解。
from abc import ABCMeta, abstractmethod
import warnings
import numpy as np
from scipy import linalg
from scipy import sparse
from scipy.sparse import linalg as sp_linalg
from .base import LinearCla
4000
ssifierMixin, LinearModel, _rescale_data
from .sag import sag_solver :是一种随机平均梯度下降式的求解岭回归与logistic回归的包,
使用梯度下降法,
这种算法收敛很快。
from ..base import RegressorMixin
from ..utils.extmath import safe_aparse_dot
from ..utils.extmath import row_norms: 行范数,不支持稀疏矩阵。
from ..utils import check_X_y
from ..utils import check_array :转换为ndarray类型。
from ..utils import check_consistent_length: 检查一个ndarray的list 是否所有元素第一个
维度都相等。(i.e. same length)
from ..utils import column_or_1d: 特殊的拉直函数,接受类似feature形式的ndarray,
即在第二个维度上只有1维,并将其按列拉直为以为数组。
from ..preprocessing import LabelBinarizer:
对一对多问题将标签进行二值化。
from ..model_selection import GridSearchCV
from ..externsls import six
from ..metrics.scorer import check_scoring: 对能够进行返回score估计的模型,返回进行
score计算的函数。
下面先不对具体的类,而是接口进行说明。(无组织架构)
sp_linalg.aslinearoperator: 将对象(ndarray, sparse, matrix an so on)转换为线性算子。
np.empty(shape): 返回相应shape的未初始化的ndarray.
sp_linalg.cg: 使用共轭梯度(Conjugate Gradient)法解线性系统。
sq_linalg.lsqr: lsqr :QR分解。
ndarray.flat:(。。。)
np.atleast_1d: 标量化为一维数组,高维保持。
def _solve_sparse_cg(X, y, alpha, max_iter = None, tol = 1e-3, verbose = 0): n_samples, n_features = X.shape X1 = sp_linalg.aslinearoperator(X) coefs = np.empty((y.shape[1], n_features)) if n_features > n_samples: def create_mv(curr_alpha): def _mv(x): return X1.matvec(X1.rmatvec(x)) + curr_alpha * x return _mv else: def create_mv(curr_alpha): def _mv(curr_alpha): return X1.rmatvec(X1.matvec(x)) + curr_alpha * x return _mv for i in range(y.shape[1]): y_column = y[:, i] mv = create_mv(alpha[i]) if n_features > n_samples: C = sp_linalg.LinearOperator((n_samples, n_samples), matvec = mv, dtype = x.dtype) coef, info = sp_linalg.cg(C, y_column, tol = tol) coefs[i] = X1.rmatvec(coef) else: y_column = X1.rmatvec(y_column) C = sp_linalg.LinearOperator((n_features, n_features), matvec = mv, dtype = x.dtype) coefs[i], info = sp_linalg.cg(C, y_column, maxiter = max_iter, tol = tol) if info < 0: raise ValueError("Failed with error code %d" % info) if max_iter is None and info > 0 and verbose: warning.warn("sparse_cg did not coverge sfter %d iterations." % info) return coefs
_solve_sparse_cg:
这里将在理论意义上求逆矩阵的过程转换为线性系统的解。
所用到的线性算子仅仅是定义了对于矩阵的变换,matvec 向量乘积变换,
rmatvec 共轭转置变换。
函数分n_features 与n_samples的大小进行了分类,可以证明在取逆的情况
下是相等的。(Kernel Ridge Regression.pdf)
返回系数
def _solve_lsqr(X, y, alpha, max_iter = None, tol = 1e-3): n_samples, n_features = X.shape coefs = np.empty((y.shape[1], n_features)) n_iter = np.empty(y.shape[1], dtype = np.int32) sqrt_alpha = np.sqrt(alpha) for i in range(y.shape[1]): y_column = y[:,i] info = sp_linalg.lsqr(X, y_column, damp = sqrt_alpha[i], atol = tol, btol= tol, iter_lim = max_iter) coefs[i] = info[0] n_iter[i] = info[2] return coefs, n_iter
_solve_lsqr:
返回线性系统的最小二乘解。
def _solve_cholesky(X, y, alpha): n_samples, n_features = X.shape n_targets = y.shape[1] A = safe_sparse_dot(X.T, X, dense_output = True) Xy = safe_sparse_dot(X.T, y, dense_output = True) one_alpha = np.array_equal(alpha, len(alpha) * [alpha[0]]) if one_alpha: A.flat[::n_features + 1] ++ alpha[0] return linalg.solve(A, Xy, sym_pos = True, overwrite_a = True) else: coefs = np.empty([n_targets, n_features]) for coef, target , curr_alpha in zip(coefs, Xy.T, alpha): A.flat[::n_features + 1] += curr_alpha coef[:] = linalg.solve(A, target, sym_pos = True, overwrite_a = False).ravel() A.flat[::n_features + 1] -= curr_alpha return coefs
_solve_cholesky:
由于在linalg.solve中选择了sym_pos为True,故指定了矩阵为对称正定矩阵,
采用cholesky分解来解(分解为三角阵的内积)
A.flat[::n_features + 1]实现了对对角元素的简写。
def _solve_cholesky_kernel(K, y, alpha, sample_weight = None, copy = False): n_samples = K.shape[0] n_targets = y.shape[1] if copy: K = K.copy() alpha = np.atleast_1d(alpha) one_alpha = (alpha == alpha[0]).all() has_sw = isinstance(sample_weight, np.ndarray) or sample_weight not in [1.0, None] if has_sw: sw = np.sqrt(np.atleast_1d(sample_weight)) y = y * sw[:, np.newaxis] K *= np.outer(sw, sw) if one_alpha: K.flat[::n_samples + 1] += alpha[0] try: dual_coef = linalg.solve(K, y, sym_pos = True, overwrite_a = False) except np.linalg.LinAlgError: warning.warn("Singular matrix in solving dual problem. using " "least-squares solution instead.") dual_coef = linalg.lstsq(K, y)[0] K.flat[::n_samples + 1] -= alpha[0] if has_sw: dual_coef *= sw[:,np.newaxis] return dual_coef else: dual_coefs = np.empty([n_targets, n_samples]) for dual_coef, target, current_alpha in zip(dual_coefs, y.T, alpha): K.flat[::n_samples + 1] += current_alpha dual_coef[:] = linalg.solve(K, target, sym_pos = True, overwrite_a = False).ravel() K.flat[::n_samples + 1] -= current_alpha if has_sw: dual_coefs *= sw[np.newaxis, :] return dual_coefs.T
_solve_cholesky_kernel:
该函数允许对样本赋予权重,唯一与_solve_cholesky_kernel的不同是其使用的核
矩阵需要给出。
还提供了当解等式linalg.solve失效时使用linalg.lstsq求最小二乘解的方案。
def _solve_svd(X, y, alpha): U, s, Vt = linalg.svd(X, full_matrices = False) idx = s > 1e-15 s_nnz = s[idx][:,np.newaxis] UTy = np.dot(U.T, y) d = np.zeros((s.size, alpha.size)) d[idx] = s_nnz / (s_nnz ** 2 + alpha) d_UT_y = d * UTy return np.dot(Vt.T, d_UT_y).T
_solve_svd:
这里仅仅是将上面_solve_cholesky_kernel的过程先将X进行奇异值分解,
将奇异值推导的岭回归结果求解出来。
这里以1e-15为阈值,砍掉了小的奇异值(与scipy.linalg.pinv求广义逆矩阵
特征值阈值相同)
def ridge_regression(X, y, alpha, sample_weight = None, solver = 'auto', max_iter = None, tol = ie-3, verbose = 0, random_state = None, return_n_iter = False, return_intercept = False): if return_intercept and sparse.issparse(X) and solver != 'sag': if solver != 'auto': warning.warn("In Ridge, only 'sag' solver can currently fit the " "intercept when X is sparse. Solver has been automatically " "changed into 'sag'.") solver = 'sag' if solver == 'sag': X = check_array(X, accept_sparse = ['csr'], dtype = np.float64, order = 'C') y = check_array(y, dtype = np.float64, ensure_2d = False, order = 'F') else: X = check_array(X, accept_sparse = ['csr', 'csc', 'coo'], dtype = np.float64) y = check_array(y, dtype = 'numeric', ensure_2d = False) check_consistent_length(X, y) n_samples, n_features = X.shape if y.ndim > 2: raise ValueError("Target y has the wrong shape %s" % str(y,shape)) ravel = False if y.ndim == 1: y = y.reshpe(-1, 1) ravel = True n_samples_, n_targets = y.shape if n_samples != n_samples_: raise ValueError("Number of samples in X and y does not correspond:" " %d != %d" % (n_samples, n_samples_)) has_sw = sample_weight is not None if solver == 'auto': if not sparse.issparse(X) or has_sw: solver = 'cholesky' else: solver = 'sparse_cg' elif solver == 'lsqr' and not hasattr(sp_linalg, 'lsqr'): warning.warn("lsqr not avaliable on this machine, falling back to sparse_cg") solver = 'sparse_cg' if has_sw: if np.atleast_1d(sample_weight).ndim > 1: raise ValueError("Sample weights must be 1D array or scalar") if solver != 'sag': X, y = _rescale_data(X, y, sample_weight) alpha = np.asarray(alpha).ravel() if alpha.size not in [1, n_targets]: raise ValueError("Number of targets nad number of penalties " "do nt correspond: %d != %d" % (alpha.size, n_targets)) if alpha.size == 1 and n_targets > 1: alpha = np.repeat(alpha, n_targets) if solver not in ('sparse_cg', 'cholesky', 'svd', 'lsqr', 'sag'): raise ValueError('Solver %s not understood' % solver) n_iter = None if solver == 'sparse_cg': coef = _solve_sparse_cg(X, y, alpha, max_iter, tol, verbose) elif solver == 'lsqr': coef, n_iter = _solve_lsqr(X, y, alpha, max_iter, tol) elif solver == 'cholesky': if n_features > n_samples: K = safe_sparse_dot(X, X.T, dense_output = True) try: dual_coef = _solve_cholesky_kernel(K, y, alpha) coef = safe_sparse_dot(X.T, dual_coef, dense_output = True).T except linalg.LinAlgError: solver = 'svd' else: try: coef = _solve_cholesky(X, y, alpha) except linalg.LinAlgError: solver = 'svd' elif solver == 'sag': max_squared_sum = row_norms(X, squared = True).max() coef = np.empty([y.shape[1], n_features]) n_iter = np.empty(y.shape[1], dtype = np.int32) intercept = np.zeros((y.shape[1],)) for i, (alpha_i, target) in enumerate(zip(alpha, y.T)): init = {'coef': np.zeros((n_features + int(return_intercept), 1))} coef_, n_iter_, _ = sag_solver(X, target.ravel(), sample_weight, 'squared', alpha_i, max_iter, tol, verbose, random_state, False, max_squared_sum, init) if return_intercept: coef[i] = coef_[:-1] intercept[i] = coef_[-1] else: coef[i] = coef_ n_iter[i] = n_iter_ if intercept.shape[0] == 1: intercept = intercept[0] coef = np.asarray(coef) if solver == 'svd': if sparse.issparse(X): raise TypeError('SVD solver does not support sparse inputs currently') coef = _solve_svd(X, y, alpha) if ravel: coef = coef.ravel() if return_n_iter and return_intercept: return coef, n_iter, intercept elif return_intercept: return coef, intercept elif return_n_iter: return coef, n_iter else: return coef
ridge_regression:
这仅仅是对上面所提到的具体方法的统一接口。
岭回归当数据阵为sparse时仅能用随机梯度下降法"sag"求解。
当solver选项为'auto'时,对非稀疏矩阵或有加权的情况,采用cholesky
分解法解,否则使用sparse_cg
n_features > n_samples 在cholesky分解下采用核方法(仅仅是不变的线性核)
从这里可以看到当矩阵为奇异时采用svd奇异值分解求解。
svd不支持稀疏矩阵求解。
相关文章推荐
- js继承 Base类的源码解析
- 用Python从零实现贝叶斯分类器的机器学习的教程
- My Machine Learning
- 机器学习---学习首页 3ff0
- Spark机器学习(一) -- Machine Learning Library (MLlib)
- 反向传播(Backpropagation)算法的数学原理
- 关于SVM的那点破事
- 也谈 机器学习到底有没有用 ?
- #ML-SDN
- TensorFlow人工智能引擎入门教程之九 RNN/LSTM循环神经网络长短期记忆网络使用
- TensorFlow人工智能引擎入门教程之十 最强网络 RSNN深度残差网络 平均准确率96-99%
- TensorFlow人工智能引擎入门教程所有目录
- Tensorflow 杂记
- 重新认识java-LinkedList
- 如何用70行代码实现深度神经网络算法
- 量子计算机编程原理简介 和 机器学习
- 近200篇机器学习&深度学习资料分享(含各种文档,视频,源码等)
- 已经证实提高机器学习模型准确率的八大方法
- 一文读懂机器学习
- 初识机器学习算法有哪些?