矩阵乘法的优化
2013-01-15 17:37
218 查看
源于微博上一个同学发了一条微博:“如何对C语言下的矩阵乘法进行优化?我试了几种小技巧,但是发现怎么试都远远比不上MATLAB。”
然后他贴了一下他的基本实现方法如下图:
基于他的方法,我也做了一下程序实现,代码如下:
在我机子上实验结果如下:
(未完待续......)
然后他贴了一下他的基本实现方法如下图:
基于他的方法,我也做了一下程序实现,代码如下:
/* * ===================================================================================== * * Filename: matrix_multi.cpp * * Description: * * Version: 1.0 * Created: 2013年01月15日 13时43分36秒 * Revision: none * Compiler: gcc * * Author: Weidong Yang (cn), ywdong@mail2.sysu.edu.cn * Company: * * ===================================================================================== */ #include <cstdio> #include <cstdlib> #include <ctime> #define MAXNUM 1024 #define BASE 10 #define BLOCK (MAXNUM / 8) double matrix[MAXNUM][MAXNUM]; double res[MAXNUM][MAXNUM]; double matrix_oneD[MAXNUM * MAXNUM]; double res_oneD[MAXNUM * MAXNUM]; char fileName[] = "Matrix_Data.txt"; clock_t start, finish; double duration; bool isPrint = false; void GenData() { srand(time(NULL)); FILE* fp = fopen(fileName, "w+"); if (fp) { for (int i = 0; i < MAXNUM; i++) { for (int j = 0; j < MAXNUM; j++) { double value = (double)rand() / 100000; fprintf(fp, "%lf\t", value); } fprintf(fp, "\n"); } } fclose(fp); } void ReadData() { FILE* fp; int index = 0; if ((fp = fopen(fileName, "r")) != NULL) { for (int i = 0; i < MAXNUM; i++) { for (int j = 0; j < MAXNUM; j++) { double tmp; fscanf(fp, "%lf", &tmp); matrix[i][j] = tmp; matrix_oneD[index++] = tmp; } } printf("Data num is %d\n", index); } else { printf("Read file %s failed.\n", fileName); } fclose(fp); } void MM_Normal() { int i,j,k; for (i = 0; i < MAXNUM; i++) for (j = 0; j < MAXNUM; j++) res[i][j] = 0.0; start = clock(); for (i = 0; i < MAXNUM; i++) for (j = 0; j < MAXNUM; j++) for (k = 0; k < MAXNUM; k++) res[i][j] += matrix[i][k] * matrix[k][j]; finish = clock(); duration = (double)(finish - start) / CLOCKS_PER_SEC; printf("Normal method costs %lf seconds.\n", duration); } void MM_Block() { int i,j,k,i0,j0,k0; for (i = 0; i < MAXNUM; i++) for (j = 0; j < MAXNUM; j++) res[i][j] = 0.0; start = clock(); for (i = 0; i < MAXNUM; i += BLOCK) for (j = 0; j < MAXNUM; j += BLOCK) for (k = 0; k < MAXNUM; k += BLOCK) for (i0 = i; i0 < (i + BLOCK); i0++) for (j0 = j; j0 < (j + BLOCK); j0++) for (k0 = k; k0 < (k + BLOCK); k0++) res[i0][j0] += matrix[i0][k0] * matrix[k0][j0]; finish = clock(); duration = (double)(finish - start) / CLOCKS_PER_SEC; printf("Block method costs %lf seconds.\n", duration); } void MM_Transpose() { int i,j,k; for (i = 0; i < MAXNUM; i++) for (j = 0; j < MAXNUM; j++) res[i][j] = 0.0; start = clock(); for (i = 0; i < MAXNUM; i++) for (k = 0; k < MAXNUM; k++) for (j = 0; j < MAXNUM; j++) res[i][j] += matrix[i][k] * matrix[k][j]; finish = clock(); duration = (double)(finish - start) / CLOCKS_PER_SEC; printf("Transpose method costs %lf seconds.\n", duration); } void MM_T_OneDimension() { int i,j,k; for (i = 0; i < MAXNUM * MAXNUM; i++) res_oneD[i] = 0.0; start = clock(); for (i = 0; i < MAXNUM; i++) for (k = 0; k < MAXNUM; k++) for (j = 0; j < MAXNUM; j++) res_oneD[i * MAXNUM + j] += matrix_oneD[i * MAXNUM + k] * matrix_oneD[k * MAXNUM + j]; finish = clock(); duration = (double)(finish - start) / CLOCKS_PER_SEC; printf("Transpose OneDimention method costs %lf seconds.\n", duration); } void MM_Transpose_Block() { int i,j,k,i0,j0,k0; for (i = 0; i < MAXNUM; i++) for (j = 0; j < MAXNUM; j++) res[i][j] = 0.0; start = clock(); for (i = 0; i < MAXNUM; i += BLOCK) for (k = 0; k < MAXNUM; k += BLOCK) for (j = 0; j < MAXNUM; j += BLOCK) for (i0 = i; i0 < (i + BLOCK); i0++) for (k0 = k; k0 < (k + BLOCK); k0++) for (j0 = j; j0 < (j + BLOCK); j0++) res[i0][j0] += matrix[i0][k0] * matrix[k0][j0]; finish = clock(); duration = (double)(finish - start) / CLOCKS_PER_SEC; printf("Transpose Block method costs %lf seconds.\n", duration); } void PrintResOneD() { if (!isPrint) return; int i,j; for (i = 0; i < MAXNUM; i++) { for (j = 0; j < MAXNUM; j++) printf("%lf ", res_oneD[i * MAXNUM + j]); printf("\n"); } printf("\n"); } void PrintRes() { if (!isPrint) return; int i,j; for (i = 0; i < MAXNUM; i++) { for (j = 0; j < MAXNUM; j++) printf("%lf ", res[i][j]); printf("\n"); } printf("\n"); } int main ( int argc, char *argv[] ) { ReadData(); MM_Normal(); PrintRes(); MM_Transpose(); PrintRes(); MM_T_OneDimension(); PrintResOneD(); MM_Block(); PrintRes(); MM_Transpose_Block(); PrintRes(); return 0; } // ---------- end of function main ----------
在我机子上实验结果如下:
(未完待续......)
相关文章推荐
- 2014多校第五场1010 || HDU 4920 Matrix multiplication(矩阵乘法优化)
- [BZOJ]1875: [SDOI2009]HH去散步 矩阵乘法优化DP
- CUDA: 矩阵乘法优化
- ARM处理器NEON编程及优化技巧三—矩阵乘法的实例
- 矩阵乘法优化DP
- 【矩阵乘法】【快速幂】【递推】斐波那契数列&&矩乘优化递推模板
- 每日算法--矩阵乘法优化递推
- [矩阵乘法优化DP] Topcoder SRM554. TheBrickTowerHardDivOne
- POJ 3233 - 矩阵乘法及其性质和优化
- BZOJ 2875: [Noi2012]随机数生成器【矩阵乘法优化递推
- fzu 1692 Key problem(循环同构矩阵o(n^2)优化乘法)
- [矩阵乘法特征多项式优化]黄金
- 程序碎片- 矩阵乘法优化(dp,循环)
- 程序性能优化探讨(5)——高速缓存、存储器山与矩阵乘法优化
- GDOI2016模拟3.9 暴走的图灵机 矩阵乘法优化暴力
- Python numpy 矩阵特殊加、乘法与循环优化
- 程序性能优化探讨(6)——矩阵乘法优化之分块矩阵
- BZOJ1009 GT考试 (DP 矩阵乘法优化)
- POJ 3735 BNUOJ 3845 Training little cats 矩阵快速幂 稀疏矩阵乘法优化 计算机图形学的齐次坐标
- 矩阵乘法优化线性递推