您的位置:首页 > 其它

MKL 进行矩阵向量运算

2016-05-22 21:09 447 查看
/* C source code is found in dgemm_example.c */

#define min(x,y) (((x) < (y)) ? (x) : (y))

#include <stdio.h>

#include <stdlib.h>

#include "mkl.h"

int main()

{

float *A, *B, *C;

int m, n, k, i, j;

int l,r;

double s_initial,s_elapsed;

int LOOP_COUNT =100;

float alpha, beta;

double sum;

printf ("\n This example computes real matrix C=alpha*A*B+beta*C using \n"

" Intel(R) MKL function dgemm, where A, B, and C are matrices and \n"

" alpha and beta are float precision scalars\n\n");

m = 2000, k = 200, n = 1000;

printf (" Initializing data for matrix multiplication C=A*B for matrix \n"

" A(%ix%i) and matrix B(%ix%i)\n\n", m, k, k, n);

alpha = 1.0; beta = 0.0;

printf (" Allocating memory for matrices aligned on 64-byte boundary for better \n"

" performance \n\n");

A = (float *)mkl_malloc( m*k*sizeof( float ), 64 );

B = (float *)mkl_malloc( k*n*sizeof( float ), 64 );

C = (float *)mkl_malloc( m*n*sizeof( float ), 64 );

if (A == NULL || B == NULL || C == NULL) {

printf( "\n ERROR: Can't allocate memory for matrices. Aborting... \n\n");

mkl_free(A);

mkl_free(B);

mkl_free(C);

return 1;

}

printf (" Intializing matrix data \n\n");

for (i = 0; i < (m*k); i++) {

A[i] = (float)(i+1);

}

for (i = 0; i < (k*n); i++) {

B[i] = (float)(-i-1);

}

for (i = 0; i < (m*n); i++) {

C[i] = 0.0;

}

printf (" Computing matrix product using Intel(R) MKL dgemm function via CBLAS interface \n\n");

cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,

m, n, k, alpha, A, k, B, n, beta, C, n);

printf ("\n Computations completed.\n\n");

printf (" Top left corner of matrix A: \n");

for (i=0; i<min(m,6); i++) {

for (j=0; j<min(k,6); j++) {

printf ("%12.0f", A[j+i*k]);

}

printf ("\n");

}

printf ("\n Top left corner of matrix B: \n");

for (i=0; i<min(k,6); i++) {

for (j=0; j<min(n,6); j++) {

printf ("%12.0f", B[j+i*n]);

}

printf ("\n");

}

printf ("\n Top left corner of matrix C: \n");

for (i=0; i<min(m,6); i++) {

for (j=0; j<min(n,6); j++) {

printf ("%12.5G", C[j+i*n]);

}

printf ("\n");

}

/* C source code is found in dgemm_with_timing.c */

printf (" Making the first run of matrix product using Intel(R) MKL dgemm function \n"

" via CBLAS interface to get stable run time measurements \n\n");

cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,

m, n, k, alpha, A, k, B, n, beta, C, n);

printf (" Measuring performance of matrix product using Intel(R) MKL dgemm function \n"

" via CBLAS interface \n\n");

s_initial = dsecnd();

for (r = 0; r < LOOP_COUNT; r++) {

cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,

m, n, k, alpha, A, k, B, n, beta, C, n);

}

s_elapsed = (dsecnd() - s_initial) / LOOP_COUNT;

printf (" == Matrix multiplication using Intel(R) MKL dgemm completed == \n"

" == at %.5f milliseconds == \n\n", (s_elapsed * 1000));

/* C source code is found in matrix_multiplication.c */

printf (" Making the first run of matrix product using triple nested loop\n"

" to get stable run time measurements \n\n");

for (i = 0; i < m; i++) {

for (j = 0; j < n; j++) {

sum = 0.0;

for (l = 0; l < k; l++)

sum += A[k*i+l] * B[n*l+j];

C[n*i+j] = sum;

}

}

printf (" Measuring performance of matrix product using triple nested loop \n\n");

s_initial = dsecnd();

for (r = 0; r < LOOP_COUNT; r++) {

for (i = 0; i < m; i++) {

for (j = 0; j < n; j++) {

sum = 0.0;

for (l = 0; l < k; l++)

sum += A[k*i+l] * B[n*l+j];

C[n*i+j] = sum;

}

}

}

s_elapsed = (dsecnd() - s_initial) / LOOP_COUNT;

printf (" == Matrix multiplication using triple nested loop completed == \n"

" == at %.5f milliseconds == \n\n", (s_elapsed * 1000));

printf ("\n Deallocating memory \n\n");

mkl_free(A);

mkl_free(B);

mkl_free(C);

printf (" Example completed. \n\n");

return 0;

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: