1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 /* "Handle" used by SpMV2 inspector-executor routines. */ 17 sparse_matrix_t csrA; 18 } Mat_SeqAIJMKL; 19 20 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 21 22 #undef __FUNCT__ 23 #define __FUNCT__ "MatConvert_SeqAIJMKL_SeqAIJ" 24 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 25 { 26 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 27 /* so we will ignore 'MatType type'. */ 28 PetscErrorCode ierr; 29 Mat B = *newmat; 30 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 31 32 PetscFunctionBegin; 33 if (reuse == MAT_INITIAL_MATRIX) { 34 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 35 } 36 37 /* Reset the original function pointers. */ 38 B->ops->duplicate = MatDuplicate_SeqAIJ; 39 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 40 B->ops->destroy = MatDestroy_SeqAIJ; 41 B->ops->mult = MatMult_SeqAIJ; 42 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 43 B->ops->multadd = MatMultAdd_SeqAIJ; 44 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 45 46 /* Free everything in the Mat_SeqAIJMKL data structure. 47 * We don't free the Mat_SeqAIJMKL struct itself, as this will 48 * cause problems later when MatDestroy() tries to free it. */ 49 /* Actually there is nothing to do here right now. 50 * When I've added use of the MKL SpMV2 inspector-executor routines, I should 51 * see if there is some way to clean up the "handle" used by SpMV2. */ 52 53 /* Change the type of B to MATSEQAIJ. */ 54 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 55 56 *newmat = B; 57 PetscFunctionReturn(0); 58 } 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatDestroy_SeqAIJMKL" 62 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 63 { 64 PetscErrorCode ierr; 65 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 66 67 PetscFunctionBegin; 68 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 69 mkl_sparse_destroy(aijmkl->csrA); 70 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 71 72 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 73 * to destroy everything that remains. */ 74 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 75 /* Note that I don't call MatSetType(). I believe this is because that 76 * is only to be called when *building* a matrix. I could be wrong, but 77 * that is how things work for the SuperLU matrix class. */ 78 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 79 PetscFunctionReturn(0); 80 } 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "MatDuplicate_SeqAIJMKL" 84 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 85 { 86 PetscErrorCode ierr; 87 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 88 Mat_SeqAIJMKL *aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 89 90 PetscFunctionBegin; 91 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 92 ierr = PetscMemcpy((*M)->spptr,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 93 PetscFunctionReturn(0); 94 } 95 96 #undef __FUNCT__ 97 #define __FUNCT__ "MatAssemblyEnd_SeqAIJMKL" 98 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 99 { 100 PetscErrorCode ierr; 101 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 102 103 PetscFunctionBegin; 104 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 105 106 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 107 * extra information and some different methods, call the AssemblyEnd 108 * routine for a MATSEQAIJ. 109 * I'm not sure if this is the best way to do this, but it avoids 110 * a lot of code duplication. 111 * I also note that currently MATSEQAIJMKL doesn't know anything about 112 * the Mat_CompressedRow data structure that SeqAIJ now uses when there 113 * are many zero rows. If the SeqAIJ assembly end routine decides to use 114 * this, this may break things. (Don't know... haven't looked at it. 115 * Do I need to disable this somehow?) */ 116 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 117 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 118 119 PetscFunctionReturn(0); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatMult_SeqAIJMKL" 124 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 125 { 126 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 127 const PetscScalar *x; 128 PetscScalar *y; 129 const MatScalar *aa; 130 PetscErrorCode ierr; 131 PetscInt m=A->rmap->n; 132 const PetscInt *aj,*ai; 133 PetscInt i; 134 135 /* Variables not in MatMult_SeqAIJ. */ 136 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 137 138 PetscFunctionBegin; 139 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 140 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 141 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 142 aa = a->a; /* Nonzero elements stored row-by-row. */ 143 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 144 145 /* Call MKL sparse BLAS routine to do the MatMult. */ 146 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 147 148 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 149 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 150 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 151 PetscFunctionReturn(0); 152 } 153 154 #undef __FUNCT__ 155 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL" 156 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 157 { 158 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 159 const PetscScalar *x; 160 PetscScalar *y; 161 const MatScalar *aa; 162 PetscErrorCode ierr; 163 PetscInt m=A->rmap->n; 164 const PetscInt *aj,*ai; 165 PetscInt i; 166 167 /* Variables not in MatMultTranspose_SeqAIJ. */ 168 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 169 170 PetscFunctionBegin; 171 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 172 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 173 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 174 aa = a->a; /* Nonzero elements stored row-by-row. */ 175 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 176 177 /* Call MKL sparse BLAS routine to do the MatMult. */ 178 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 179 180 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 181 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 182 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 183 PetscFunctionReturn(0); 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatMultAdd_SeqAIJMKL" 188 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 189 { 190 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 191 const PetscScalar *x; 192 PetscScalar *y,*z; 193 const MatScalar *aa; 194 PetscErrorCode ierr; 195 PetscInt m=A->rmap->n; 196 const PetscInt *aj,*ai; 197 PetscInt i; 198 199 /* Variables not in MatMultAdd_SeqAIJ. */ 200 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 201 PetscScalar alpha = 1.0; 202 PetscScalar beta = 1.0; 203 char matdescra[6]; 204 205 PetscFunctionBegin; 206 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 207 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 208 209 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 210 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 211 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 212 aa = a->a; /* Nonzero elements stored row-by-row. */ 213 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 214 215 /* Call MKL sparse BLAS routine to do the MatMult. */ 216 if (zz == yy) { 217 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 218 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 219 } else { 220 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 221 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 222 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 223 for (i=0; i<m; i++) { 224 z[i] += y[i]; 225 } 226 } 227 228 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 229 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 230 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 231 PetscFunctionReturn(0); 232 } 233 234 #undef __FUNCT__ 235 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL" 236 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 237 { 238 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 239 const PetscScalar *x; 240 PetscScalar *y,*z; 241 const MatScalar *aa; 242 PetscErrorCode ierr; 243 PetscInt m=A->rmap->n; 244 const PetscInt *aj,*ai; 245 PetscInt i; 246 247 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 248 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 249 PetscScalar alpha = 1.0; 250 PetscScalar beta = 1.0; 251 char matdescra[6]; 252 253 PetscFunctionBegin; 254 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 255 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 256 257 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 258 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 259 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 260 aa = a->a; /* Nonzero elements stored row-by-row. */ 261 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 262 263 /* Call MKL sparse BLAS routine to do the MatMult. */ 264 if (zz == yy) { 265 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 266 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 267 } else { 268 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 269 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 270 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 271 for (i=0; i<m; i++) { 272 z[i] += y[i]; 273 } 274 } 275 276 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 277 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 278 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 283 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 284 * routine, but can also be used to convert an assembled SeqAIJ matrix 285 * into a SeqAIJMKL one. */ 286 #undef __FUNCT__ 287 #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJMKL" 288 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 289 { 290 PetscErrorCode ierr; 291 Mat B = *newmat; 292 Mat_SeqAIJMKL *aijmkl; 293 294 PetscFunctionBegin; 295 if (reuse == MAT_INITIAL_MATRIX) { 296 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 297 } 298 299 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 300 B->spptr = (void*) aijmkl; 301 302 /* Set function pointers for methods that we inherit from AIJ but override. */ 303 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 304 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 305 B->ops->destroy = MatDestroy_SeqAIJMKL; 306 B->ops->mult = MatMult_SeqAIJMKL; 307 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 308 B->ops->multadd = MatMultAdd_SeqAIJMKL; 309 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 310 311 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 312 313 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 314 *newmat = B; 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatCreateSeqAIJMKL" 320 /*@C 321 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 322 This type inherits from AIJ and is largely identical, but uses sparse BLAS 323 routines from Intel MKL whenever possible. 324 Collective on MPI_Comm 325 326 Input Parameters: 327 + comm - MPI communicator, set to PETSC_COMM_SELF 328 . m - number of rows 329 . n - number of columns 330 . nz - number of nonzeros per row (same for all rows) 331 - nnz - array containing the number of nonzeros in the various rows 332 (possibly different for each row) or NULL 333 334 Output Parameter: 335 . A - the matrix 336 337 Notes: 338 If nnz is given then nz is ignored 339 340 Level: intermediate 341 342 .keywords: matrix, cray, sparse, parallel 343 344 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 345 @*/ 346 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 347 { 348 PetscErrorCode ierr; 349 350 PetscFunctionBegin; 351 ierr = MatCreate(comm,A);CHKERRQ(ierr); 352 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 353 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 354 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 355 PetscFunctionReturn(0); 356 } 357 358 #undef __FUNCT__ 359 #define __FUNCT__ "MatCreate_SeqAIJMKL" 360 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 361 { 362 PetscErrorCode ierr; 363 364 PetscFunctionBegin; 365 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 366 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 367 PetscFunctionReturn(0); 368 } 369 370 371