对函数 cblas

xiaoxiao2021-02-28 83

计算矩阵乘法的函数之一是 cblas_sgemm，使用单精度实数，另外还有对应双精度实数，单精度复数和双精度复数的函数。在此以 cblas_sgemm为例。

函数定义为：

void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,

const enum CBLAS_TRANSPOSE TransB, const int M, const int N,

const int K, const float alpha, const float *A,

const int lda, const float *B, const int ldb,

const float beta, float *C, const int ldc)

得到的结果是:

C = alpha*op( A )*op( B ) + beta*C

const enum CBLAS_ORDER Order，这是指的数据的存储形式，在CBLAS的函数中无论一维还是二维数据都是用一维数组存储，这就要涉及是行主序还是列主序，在C语言中数组是用行主序，fortran中是列主序。我还是习惯于是用行主序，所以这个参数是用CblasRowMajor，如果是列主序的话就是 CblasColMajor。

const int M，矩阵A的行，矩阵C的行

const int N，矩阵B的列，矩阵C的列

const int K，矩阵A的列，矩阵B的行

const float alpha， const float beta，计算公式中的两个参数值，如果只是计算C=A*B，则alpha=1,beta=0

const float *A， const float *B， const float *C，矩阵ABC的数据

const int lda， const int ldb， const int ldc，在BLAS的文档里，这三个参数分别为ABC的行数，但是实际使用发现，在CBLAS里应该是列数。

我在这里计算两个简单矩阵的乘法。

1,2,3

4,5,6

7,8,9

8,7,6

5,4

3,2

1,0

程序代码：

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 //因为程序是C++，而CBLAS是C语言写的，所以在此处用extern关键字 extern "C" { #include } #include using namespace std; int main(void) { const enum CBLAS_ORDER Order=CblasRowMajor; const enum CBLAS_TRANSPOSE TransA=CblasNoTrans; const enum CBLAS_TRANSPOSE TransB=CblasNoTrans; const int M=4;//A的行数，C的行数 const int N=2;//B的列数，C的列数 const int K=3;//A的列数，B的行数 const float alpha=1; const float beta=0; const int lda=K;//A的列 const int ldb=N;//B的列 const int ldc=N;//C的列 const float A[K*M]={1,2,3,4,5,6,7,8,9,8,7,6}; const float B[K*N]={5,4,3,2,1,0}; float C[M*N]; cblas_sgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); for(int i=0;i<</FONT>M;i++) { for(int j=0;j<</FONT>N;j++) { cout<<C[i*N+j]<<"\t"; } cout<<endl; } return EXIT_SUCCESS; }

#include <stdio.h> #include <string.h> #include <ctype.h> #include <stdarg.h> #include <math.h> #include "mkl_cblas.h" //#include <stdlib.h> #include "mkl.h" #define MAX_GRP_COUNT 5 int main() { float *A, *B, *C; int m, n, k, i, j; float alpha, beta; printf("\n This example computes real matrix C=alpha*A*B+beta*C using \n" " Intel(R) MKL function dgemm, where A, B, and C are matrices and \n" " alpha and beta are double precision scalars\n\n"); //m = 2000, k = 200, n = 1000; m = 2, k = 4, n = 3; printf(" Initializing data for matrix multiplication C=A*B for matrix \n" " A(%ix%i) and matrix B(%ix%i)\n\n", m, k, k, n); alpha = 1.0; beta = 0.0; printf(" Allocating memory for matrices aligned on 64-byte boundary for better \n" " performance \n\n"); A = (float *)mkl_malloc(m*k*sizeof(float), 64); B = (float *)mkl_malloc(k*n*sizeof(float), 64); C = (float *)mkl_malloc(m*n*sizeof(float), 64); if (A == NULL || B == NULL || C == NULL) { printf("\n ERROR: Can't allocate memory for matrices. Aborting... \n\n"); mkl_free(A); mkl_free(B); mkl_free(C); return 1; } printf(" Intializing matrix data \n\n"); for (i = 0; i < (m*k); i++) { A[i] = (float)(i + 1); } for (i = 0; i < (k*n); i++) { B[i] = (float)(-i - 1); } for (i = 0; i < (m*n); i++) { C[i] = 0.0; } printf(" Computing matrix product using Intel(R) MKL dgemm function via CBLAS interface \n\n"); cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, m, n, k, alpha, A, k, B, k, beta, C, n); printf("\n Computations completed.\n\n"); printf(" Top left corner of matrix A: \n"); for (i = 0; i<min(m, 6); i++) { for (j = 0; j<min(k, 6); j++) { printf(".0f", A[j + i*k]); } printf("\n"); } printf("\n Top left corner of matrix B: \n"); for (i = 0; i<min(k, 6); i++) { for (j = 0; j<min(n, 6); j++) { printf(".0f", B[j + i*n]); } printf("\n"); } printf("\n Top left corner of matrix C: \n"); for (i = 0; i<min(m, 6); i++) { for (j = 0; j<min(n, 6); j++) { printf(".5G", C[j + i*n]); } printf("\n"); } printf("\n Deallocating memory \n\n"); mkl_free(A); mkl_free(B); mkl_free(C); printf(" Example completed. \n\n"); return 0; }

转载请注明原文地址: https://www.6miu.com/read-80454.html

技术

最新回复(0)