1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 19 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 20 struct matrix_descr descr; 21 #endif 22 } Mat_SeqAIJMKL; 23 24 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatConvert_SeqAIJMKL_SeqAIJ" 28 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 29 { 30 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 31 /* so we will ignore 'MatType type'. */ 32 PetscErrorCode ierr; 33 Mat B = *newmat; 34 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 35 36 PetscFunctionBegin; 37 if (reuse == MAT_INITIAL_MATRIX) { 38 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 39 aijmkl = (Mat_SeqAIJMKL*)B->spptr; 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 45 B->ops->destroy = MatDestroy_SeqAIJ; 46 B->ops->mult = MatMult_SeqAIJ; 47 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 48 B->ops->multadd = MatMultAdd_SeqAIJ; 49 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50 51 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 52 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 53 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 54 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 55 56 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 57 * simply involves destroying the MKL sparse matrix handle and then freeing 58 * the spptr pointer. */ 59 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 60 if (aijmkl->sparse_optimized) { 61 sparse_status_t stat; 62 stat = mkl_sparse_destroy(aijmkl->csrA); 63 if (stat != SPARSE_STATUS_SUCCESS) { 64 PetscFunctionReturn(PETSC_ERR_LIB); 65 } 66 } 67 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 68 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 69 70 /* Change the type of B to MATSEQAIJ. */ 71 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 72 73 *newmat = B; 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNCT__ 78 #define __FUNCT__ "MatDestroy_SeqAIJMKL" 79 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 80 { 81 PetscErrorCode ierr; 82 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 83 84 PetscFunctionBegin; 85 86 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 87 * spptr pointer. */ 88 if (aijmkl) { 89 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 90 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 91 if (aijmkl->sparse_optimized) { 92 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 93 stat = mkl_sparse_destroy(aijmkl->csrA); 94 if (stat != SPARSE_STATUS_SUCCESS) { 95 PetscFunctionReturn(PETSC_ERR_LIB); 96 } 97 } 98 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 99 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 100 } 101 102 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 103 * to destroy everything that remains. */ 104 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 105 /* Note that I don't call MatSetType(). I believe this is because that 106 * is only to be called when *building* a matrix. I could be wrong, but 107 * that is how things work for the SuperLU matrix class. */ 108 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 109 PetscFunctionReturn(0); 110 } 111 112 #undef __FUNCT__ 113 #define __FUNCT__ "MatDuplicate_SeqAIJMKL" 114 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 115 { 116 PetscErrorCode ierr; 117 Mat_SeqAIJMKL *aijmkl; 118 Mat_SeqAIJMKL *aijmkl_dest; 119 120 PetscFunctionBegin; 121 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 122 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 123 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 124 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 125 aijmkl_dest->sparse_optimized = PETSC_FALSE; 126 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 127 aijmkl_dest->csrA = NULL; 128 if (!aijmkl->no_SpMV2) { 129 sparse_status_t stat; 130 stat = mkl_sparse_copy(aijmkl->csrA,aijmkl->descr,&aijmkl_dest->csrA); 131 if (stat != SPARSE_STATUS_SUCCESS) { 132 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_copy"); 133 PetscFunctionReturn(PETSC_ERR_LIB); 134 } 135 stat = mkl_sparse_optimize(aijmkl_dest->csrA); 136 if (stat != SPARSE_STATUS_SUCCESS) { 137 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 138 PetscFunctionReturn(PETSC_ERR_LIB); 139 } 140 aijmkl_dest->sparse_optimized = PETSC_TRUE; 141 } 142 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 143 PetscFunctionReturn(0); 144 } 145 146 #undef __FUNCT__ 147 #define __FUNCT__ "MatAssemblyEnd_SeqAIJMKL" 148 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 149 { 150 PetscErrorCode ierr; 151 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 152 Mat_SeqAIJMKL *aijmkl; 153 154 MatScalar *aa; 155 PetscInt n; 156 PetscInt *aj,*ai; 157 158 PetscFunctionBegin; 159 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 160 161 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 162 * extra information and some different methods, call the AssemblyEnd 163 * routine for a MATSEQAIJ. 164 * I'm not sure if this is the best way to do this, but it avoids 165 * a lot of code duplication. 166 * I also note that currently MATSEQAIJMKL doesn't know anything about 167 * the Mat_CompressedRow data structure that SeqAIJ now uses when there 168 * are many zero rows. If the SeqAIJ assembly end routine decides to use 169 * this, this may break things. (Don't know... haven't looked at it. 170 * Do I need to disable this somehow?) */ 171 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 172 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 173 174 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 175 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 176 if (!aijmkl->no_SpMV2) { 177 sparse_status_t stat; 178 if (aijmkl->sparse_optimized) { 179 /* Matrix has been previously assembled and optimized. Must destroy old 180 * matrix handle before running the optimization step again. */ 181 sparse_status_t stat; 182 stat = mkl_sparse_destroy(aijmkl->csrA); 183 if (stat != SPARSE_STATUS_SUCCESS) { 184 PetscFunctionReturn(PETSC_ERR_LIB); 185 } 186 } 187 /* Now perform the SpMV2 setup and matrix optimization. */ 188 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 189 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 190 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 191 n = A->rmap->n; 192 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 193 aa = a->a; /* Nonzero elements stored row-by-row. */ 194 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 195 stat = mkl_sparse_x_create_csr (&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,n,n,ai,ai+1,aj,aa); 196 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 197 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 198 stat = mkl_sparse_optimize(aijmkl->csrA); 199 if (stat != SPARSE_STATUS_SUCCESS) { 200 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 201 PetscFunctionReturn(PETSC_ERR_LIB); 202 } 203 aijmkl->sparse_optimized = PETSC_TRUE; 204 } 205 #endif 206 207 PetscFunctionReturn(0); 208 } 209 210 #undef __FUNCT__ 211 #define __FUNCT__ "MatMult_SeqAIJMKL" 212 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 213 { 214 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 215 const PetscScalar *x; 216 PetscScalar *y; 217 const MatScalar *aa; 218 PetscErrorCode ierr; 219 PetscInt m=A->rmap->n; 220 const PetscInt *aj,*ai; 221 222 /* Variables not in MatMult_SeqAIJ. */ 223 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 224 225 PetscFunctionBegin; 226 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 227 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 228 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 229 aa = a->a; /* Nonzero elements stored row-by-row. */ 230 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 231 232 /* Call MKL sparse BLAS routine to do the MatMult. */ 233 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 234 235 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 236 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 237 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 238 PetscFunctionReturn(0); 239 } 240 241 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 242 #undef __FUNCT__ 243 #define __FUNCT__ "MatMult_SeqAIJMKL_SpMV2" 244 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 245 { 246 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 247 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 248 const PetscScalar *x; 249 PetscScalar *y; 250 PetscErrorCode ierr; 251 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 252 253 PetscFunctionBegin; 254 255 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 256 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 257 258 /* Call MKL SpMV2 executor routine to do the MatMult. */ 259 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 260 261 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 262 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 263 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 264 if (stat != SPARSE_STATUS_SUCCESS) { 265 PetscFunctionReturn(PETSC_ERR_LIB); 266 } 267 PetscFunctionReturn(0); 268 } 269 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL" 273 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 274 { 275 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 276 const PetscScalar *x; 277 PetscScalar *y; 278 const MatScalar *aa; 279 PetscErrorCode ierr; 280 PetscInt m=A->rmap->n; 281 const PetscInt *aj,*ai; 282 283 /* Variables not in MatMultTranspose_SeqAIJ. */ 284 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 285 286 PetscFunctionBegin; 287 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 288 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 289 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 290 aa = a->a; /* Nonzero elements stored row-by-row. */ 291 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 292 293 /* Call MKL sparse BLAS routine to do the MatMult. */ 294 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 295 296 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 297 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 298 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL_SpMV2" 305 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 306 { 307 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 308 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 309 const PetscScalar *x; 310 PetscScalar *y; 311 PetscErrorCode ierr; 312 sparse_status_t stat; 313 314 PetscFunctionBegin; 315 316 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 317 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 318 319 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 320 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 321 322 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 323 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 324 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 325 if (stat != SPARSE_STATUS_SUCCESS) { 326 PetscFunctionReturn(PETSC_ERR_LIB); 327 } 328 PetscFunctionReturn(0); 329 } 330 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 331 332 #undef __FUNCT__ 333 #define __FUNCT__ "MatMultAdd_SeqAIJMKL" 334 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 335 { 336 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 337 const PetscScalar *x; 338 PetscScalar *y,*z; 339 const MatScalar *aa; 340 PetscErrorCode ierr; 341 PetscInt m=A->rmap->n; 342 const PetscInt *aj,*ai; 343 PetscInt i; 344 345 /* Variables not in MatMultAdd_SeqAIJ. */ 346 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 347 PetscScalar alpha = 1.0; 348 PetscScalar beta = 1.0; 349 char matdescra[6]; 350 351 PetscFunctionBegin; 352 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 353 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 354 355 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 356 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 357 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 358 aa = a->a; /* Nonzero elements stored row-by-row. */ 359 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 360 361 /* Call MKL sparse BLAS routine to do the MatMult. */ 362 if (zz == yy) { 363 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 364 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 365 } else { 366 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 367 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 368 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 369 for (i=0; i<m; i++) { 370 z[i] += y[i]; 371 } 372 } 373 374 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 375 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 376 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 377 PetscFunctionReturn(0); 378 } 379 380 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatMultAdd_SeqAIJMKL_SpMV2" 383 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 384 { 385 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 386 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 387 const PetscScalar *x; 388 PetscScalar *y,*z; 389 PetscErrorCode ierr; 390 PetscInt m=A->rmap->n; 391 PetscInt i; 392 393 /* Variables not in MatMultAdd_SeqAIJ. */ 394 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 395 396 PetscFunctionBegin; 397 398 399 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 400 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 401 402 /* Call MKL sparse BLAS routine to do the MatMult. */ 403 if (zz == yy) { 404 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 405 * with alpha and beta both set to 1.0. */ 406 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y); 407 } else { 408 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 409 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 410 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 411 for (i=0; i<m; i++) { 412 z[i] += y[i]; 413 } 414 } 415 416 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 417 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 418 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 419 if (stat != SPARSE_STATUS_SUCCESS) { 420 PetscFunctionReturn(PETSC_ERR_LIB); 421 } 422 PetscFunctionReturn(0); 423 } 424 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL" 428 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 429 { 430 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 431 const PetscScalar *x; 432 PetscScalar *y,*z; 433 const MatScalar *aa; 434 PetscErrorCode ierr; 435 PetscInt m=A->rmap->n; 436 const PetscInt *aj,*ai; 437 PetscInt i; 438 439 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 440 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 441 PetscScalar alpha = 1.0; 442 PetscScalar beta = 1.0; 443 char matdescra[6]; 444 445 PetscFunctionBegin; 446 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 447 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 448 449 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 450 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 451 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 452 aa = a->a; /* Nonzero elements stored row-by-row. */ 453 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 454 455 /* Call MKL sparse BLAS routine to do the MatMult. */ 456 if (zz == yy) { 457 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 458 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 459 } else { 460 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 461 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 462 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 463 for (i=0; i<m; i++) { 464 z[i] += y[i]; 465 } 466 } 467 468 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 469 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 470 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 475 #undef __FUNCT__ 476 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL_SpMV2" 477 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 478 { 479 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 480 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 481 const PetscScalar *x; 482 PetscScalar *y,*z; 483 PetscErrorCode ierr; 484 PetscInt m=A->rmap->n; 485 PetscInt i; 486 487 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 488 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 489 490 PetscFunctionBegin; 491 492 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 493 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 494 495 /* Call MKL sparse BLAS routine to do the MatMult. */ 496 if (zz == yy) { 497 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 498 * with alpha and beta both set to 1.0. */ 499 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y); 500 } else { 501 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 502 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 503 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 504 for (i=0; i<m; i++) { 505 z[i] += y[i]; 506 } 507 } 508 509 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 510 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 511 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 512 if (stat != SPARSE_STATUS_SUCCESS) { 513 PetscFunctionReturn(PETSC_ERR_LIB); 514 } 515 PetscFunctionReturn(0); 516 } 517 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 518 519 520 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 521 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 522 * routine, but can also be used to convert an assembled SeqAIJ matrix 523 * into a SeqAIJMKL one. */ 524 #undef __FUNCT__ 525 #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJMKL" 526 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 527 { 528 PetscErrorCode ierr; 529 Mat B = *newmat; 530 Mat_SeqAIJMKL *aijmkl; 531 PetscBool set; 532 PetscBool sametype; 533 534 PetscFunctionBegin; 535 if (reuse == MAT_INITIAL_MATRIX) { 536 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 537 } 538 539 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 540 if (sametype) PetscFunctionReturn(0); 541 542 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 543 B->spptr = (void*) aijmkl; 544 545 /* Set function pointers for methods that we inherit from AIJ but override. 546 * We also parse some command line options below, since those determine some of the methods we point to. 547 * Note: Currently the transposed operations are not being set because I encounter memory corruption 548 * when these are enabled. Need to look at this with Valgrind or similar. --RTM */ 549 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 550 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 551 B->ops->destroy = MatDestroy_SeqAIJMKL; 552 553 aijmkl->sparse_optimized = PETSC_FALSE; 554 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 555 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 556 #elif 557 aijmkl->no_SpMV2 = PETSC_TRUE; 558 #endif 559 560 /* Parse command line options. */ 561 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 562 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 563 ierr = PetscOptionsEnd();CHKERRQ(ierr); 564 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 565 if(!aijmkl->no_SpMV2) { 566 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 567 aijmkl->no_SpMV2 = PETSC_TRUE; 568 } 569 #endif 570 571 if(!aijmkl->no_SpMV2) { 572 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 573 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 574 /* B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; */ 575 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 576 /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; */ 577 #endif 578 } else { 579 B->ops->mult = MatMult_SeqAIJMKL; 580 /* B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; */ 581 B->ops->multadd = MatMultAdd_SeqAIJMKL; 582 /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; */ 583 } 584 585 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 586 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 587 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 589 590 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 591 *newmat = B; 592 PetscFunctionReturn(0); 593 } 594 595 #undef __FUNCT__ 596 #define __FUNCT__ "MatCreateSeqAIJMKL" 597 /*@C 598 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 599 This type inherits from AIJ and is largely identical, but uses sparse BLAS 600 routines from Intel MKL whenever possible. 601 Collective on MPI_Comm 602 603 Input Parameters: 604 + comm - MPI communicator, set to PETSC_COMM_SELF 605 . m - number of rows 606 . n - number of columns 607 . nz - number of nonzeros per row (same for all rows) 608 - nnz - array containing the number of nonzeros in the various rows 609 (possibly different for each row) or NULL 610 611 Output Parameter: 612 . A - the matrix 613 614 Notes: 615 If nnz is given then nz is ignored 616 617 Level: intermediate 618 619 .keywords: matrix, cray, sparse, parallel 620 621 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 622 @*/ 623 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 624 { 625 PetscErrorCode ierr; 626 627 PetscFunctionBegin; 628 ierr = MatCreate(comm,A);CHKERRQ(ierr); 629 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 630 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 631 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 632 PetscFunctionReturn(0); 633 } 634 635 #undef __FUNCT__ 636 #define __FUNCT__ "MatCreate_SeqAIJMKL" 637 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 638 { 639 PetscErrorCode ierr; 640 641 PetscFunctionBegin; 642 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 643 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } 646