1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 19 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 20 struct matrix_descr descr; 21 #endif 22 } Mat_SeqAIJMKL; 23 24 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatConvert_SeqAIJMKL_SeqAIJ" 28 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 29 { 30 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 31 /* so we will ignore 'MatType type'. */ 32 PetscErrorCode ierr; 33 Mat B = *newmat; 34 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 35 36 PetscFunctionBegin; 37 if (reuse == MAT_INITIAL_MATRIX) { 38 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 39 aijmkl = (Mat_SeqAIJMKL*)B->spptr; 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 45 B->ops->destroy = MatDestroy_SeqAIJ; 46 B->ops->mult = MatMult_SeqAIJ; 47 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 48 B->ops->multadd = MatMultAdd_SeqAIJ; 49 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50 51 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 52 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 53 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 54 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 55 56 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 57 * simply involves destroying the MKL sparse matrix handle and then freeing 58 * the spptr pointer. */ 59 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 60 if (aijmkl->sparse_optimized) { 61 sparse_status_t stat; 62 stat = mkl_sparse_destroy(aijmkl->csrA); 63 if (stat != SPARSE_STATUS_SUCCESS) { 64 PetscFunctionReturn(PETSC_ERR_LIB); 65 } 66 } 67 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 68 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 69 70 /* Change the type of B to MATSEQAIJ. */ 71 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 72 73 *newmat = B; 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNCT__ 78 #define __FUNCT__ "MatDestroy_SeqAIJMKL" 79 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 80 { 81 PetscErrorCode ierr; 82 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 83 84 PetscFunctionBegin; 85 86 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 87 * spptr pointer. */ 88 if (aijmkl) { 89 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 90 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 91 if (aijmkl->sparse_optimized) { 92 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 93 stat = mkl_sparse_destroy(aijmkl->csrA); 94 if (stat != SPARSE_STATUS_SUCCESS) { 95 PetscFunctionReturn(PETSC_ERR_LIB); 96 } 97 } 98 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 99 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 100 } 101 102 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 103 * to destroy everything that remains. */ 104 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 105 /* Note that I don't call MatSetType(). I believe this is because that 106 * is only to be called when *building* a matrix. I could be wrong, but 107 * that is how things work for the SuperLU matrix class. */ 108 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 109 PetscFunctionReturn(0); 110 } 111 112 #undef __FUNCT__ 113 #define __FUNCT__ "MatDuplicate_SeqAIJMKL" 114 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 115 { 116 PetscErrorCode ierr; 117 Mat_SeqAIJMKL *aijmkl; 118 Mat_SeqAIJMKL *aijmkl_dest; 119 120 PetscFunctionBegin; 121 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 122 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 123 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 124 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 125 aijmkl_dest->sparse_optimized = PETSC_FALSE; 126 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 127 aijmkl_dest->csrA = NULL; 128 if (!aijmkl->no_SpMV2) { 129 sparse_status_t stat; 130 stat = mkl_sparse_copy(aijmkl->csrA,aijmkl->descr,&aijmkl_dest->csrA); 131 stat = mkl_sparse_optimize(aijmkl_dest->csrA); 132 if (stat != SPARSE_STATUS_SUCCESS) { 133 PetscFunctionReturn(PETSC_ERR_LIB); 134 } 135 aijmkl_dest->sparse_optimized = PETSC_TRUE; 136 } 137 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 138 PetscFunctionReturn(0); 139 } 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "MatAssemblyEnd_SeqAIJMKL" 143 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 144 { 145 PetscErrorCode ierr; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 Mat_SeqAIJMKL *aijmkl; 148 149 MatScalar *aa; 150 PetscInt n; 151 PetscInt *aj,*ai; 152 153 PetscFunctionBegin; 154 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 155 156 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 157 * extra information and some different methods, call the AssemblyEnd 158 * routine for a MATSEQAIJ. 159 * I'm not sure if this is the best way to do this, but it avoids 160 * a lot of code duplication. 161 * I also note that currently MATSEQAIJMKL doesn't know anything about 162 * the Mat_CompressedRow data structure that SeqAIJ now uses when there 163 * are many zero rows. If the SeqAIJ assembly end routine decides to use 164 * this, this may break things. (Don't know... haven't looked at it. 165 * Do I need to disable this somehow?) */ 166 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 167 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 168 169 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 170 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 171 if (!aijmkl->no_SpMV2) { 172 sparse_status_t stat; 173 if (aijmkl->sparse_optimized) { 174 /* Matrix has been previously assembled and optimized. Must destroy old 175 * matrix handle before running the optimization step again. */ 176 sparse_status_t stat; 177 stat = mkl_sparse_destroy(aijmkl->csrA); 178 if (stat != SPARSE_STATUS_SUCCESS) { 179 PetscFunctionReturn(PETSC_ERR_LIB); 180 } 181 } 182 /* Now perform the SpMV2 setup and matrix optimization. */ 183 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 184 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 185 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 186 n = A->rmap->n; 187 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 188 aa = a->a; /* Nonzero elements stored row-by-row. */ 189 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 190 stat = mkl_sparse_x_create_csr (&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,n,n,ai,ai+1,aj,aa); 191 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 192 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 193 stat = mkl_sparse_optimize(aijmkl->csrA); 194 if (stat != SPARSE_STATUS_SUCCESS) { 195 PetscFunctionReturn(PETSC_ERR_LIB); 196 } 197 aijmkl->sparse_optimized = PETSC_TRUE; 198 } 199 #endif 200 201 PetscFunctionReturn(0); 202 } 203 204 #undef __FUNCT__ 205 #define __FUNCT__ "MatMult_SeqAIJMKL" 206 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 207 { 208 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 209 const PetscScalar *x; 210 PetscScalar *y; 211 const MatScalar *aa; 212 PetscErrorCode ierr; 213 PetscInt m=A->rmap->n; 214 const PetscInt *aj,*ai; 215 216 /* Variables not in MatMult_SeqAIJ. */ 217 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 218 219 PetscFunctionBegin; 220 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 221 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 222 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 223 aa = a->a; /* Nonzero elements stored row-by-row. */ 224 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 225 226 /* Call MKL sparse BLAS routine to do the MatMult. */ 227 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 228 229 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 230 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 231 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 232 PetscFunctionReturn(0); 233 } 234 235 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 236 #undef __FUNCT__ 237 #define __FUNCT__ "MatMult_SeqAIJMKL_SpMV2" 238 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 239 { 240 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 241 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 242 const PetscScalar *x; 243 PetscScalar *y; 244 PetscErrorCode ierr; 245 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 246 247 PetscFunctionBegin; 248 249 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 250 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 251 252 /* Call MKL SpMV2 executor routine to do the MatMult. */ 253 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 254 255 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 256 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 257 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 258 if (stat != SPARSE_STATUS_SUCCESS) { 259 PetscFunctionReturn(PETSC_ERR_LIB); 260 } 261 PetscFunctionReturn(0); 262 } 263 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 264 265 #undef __FUNCT__ 266 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL" 267 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 268 { 269 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 270 const PetscScalar *x; 271 PetscScalar *y; 272 const MatScalar *aa; 273 PetscErrorCode ierr; 274 PetscInt m=A->rmap->n; 275 const PetscInt *aj,*ai; 276 277 /* Variables not in MatMultTranspose_SeqAIJ. */ 278 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 279 280 PetscFunctionBegin; 281 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 282 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 283 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 284 aa = a->a; /* Nonzero elements stored row-by-row. */ 285 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 286 287 /* Call MKL sparse BLAS routine to do the MatMult. */ 288 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 289 290 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 291 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 292 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 293 PetscFunctionReturn(0); 294 } 295 296 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 297 #undef __FUNCT__ 298 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL_SpMV2" 299 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 300 { 301 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 302 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 303 const PetscScalar *x; 304 PetscScalar *y; 305 PetscErrorCode ierr; 306 sparse_status_t stat; 307 308 PetscFunctionBegin; 309 310 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 311 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 312 313 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 314 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 315 316 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 317 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 318 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 319 if (stat != SPARSE_STATUS_SUCCESS) { 320 PetscFunctionReturn(PETSC_ERR_LIB); 321 } 322 PetscFunctionReturn(0); 323 } 324 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 325 326 #undef __FUNCT__ 327 #define __FUNCT__ "MatMultAdd_SeqAIJMKL" 328 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 329 { 330 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 331 const PetscScalar *x; 332 PetscScalar *y,*z; 333 const MatScalar *aa; 334 PetscErrorCode ierr; 335 PetscInt m=A->rmap->n; 336 const PetscInt *aj,*ai; 337 PetscInt i; 338 339 /* Variables not in MatMultAdd_SeqAIJ. */ 340 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 341 PetscScalar alpha = 1.0; 342 PetscScalar beta = 1.0; 343 char matdescra[6]; 344 345 PetscFunctionBegin; 346 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 347 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 348 349 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 350 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 351 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 352 aa = a->a; /* Nonzero elements stored row-by-row. */ 353 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 354 355 /* Call MKL sparse BLAS routine to do the MatMult. */ 356 if (zz == yy) { 357 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 358 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 359 } else { 360 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 361 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 362 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 363 for (i=0; i<m; i++) { 364 z[i] += y[i]; 365 } 366 } 367 368 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 369 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 370 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 371 PetscFunctionReturn(0); 372 } 373 374 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 375 #undef __FUNCT__ 376 #define __FUNCT__ "MatMultAdd_SeqAIJMKL_SpMV2" 377 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 378 { 379 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 380 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 381 const PetscScalar *x; 382 PetscScalar *y,*z; 383 PetscErrorCode ierr; 384 PetscInt m=A->rmap->n; 385 PetscInt i; 386 387 /* Variables not in MatMultAdd_SeqAIJ. */ 388 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 389 390 PetscFunctionBegin; 391 392 393 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 394 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 395 396 /* Call MKL sparse BLAS routine to do the MatMult. */ 397 if (zz == yy) { 398 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 399 * with alpha and beta both set to 1.0. */ 400 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y); 401 } else { 402 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 403 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 404 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 405 for (i=0; i<m; i++) { 406 z[i] += y[i]; 407 } 408 } 409 410 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 411 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 412 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 413 if (stat != SPARSE_STATUS_SUCCESS) { 414 PetscFunctionReturn(PETSC_ERR_LIB); 415 } 416 PetscFunctionReturn(0); 417 } 418 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 419 420 #undef __FUNCT__ 421 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL" 422 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 423 { 424 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 425 const PetscScalar *x; 426 PetscScalar *y,*z; 427 const MatScalar *aa; 428 PetscErrorCode ierr; 429 PetscInt m=A->rmap->n; 430 const PetscInt *aj,*ai; 431 PetscInt i; 432 433 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 434 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 435 PetscScalar alpha = 1.0; 436 PetscScalar beta = 1.0; 437 char matdescra[6]; 438 439 PetscFunctionBegin; 440 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 441 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 442 443 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 444 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 445 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 446 aa = a->a; /* Nonzero elements stored row-by-row. */ 447 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 448 449 /* Call MKL sparse BLAS routine to do the MatMult. */ 450 if (zz == yy) { 451 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 452 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 453 } else { 454 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 455 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 456 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 457 for (i=0; i<m; i++) { 458 z[i] += y[i]; 459 } 460 } 461 462 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 463 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 464 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 465 PetscFunctionReturn(0); 466 } 467 468 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 469 #undef __FUNCT__ 470 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL_SpMV2" 471 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 472 { 473 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 474 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 475 const PetscScalar *x; 476 PetscScalar *y,*z; 477 PetscErrorCode ierr; 478 PetscInt m=A->rmap->n; 479 PetscInt i; 480 481 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 482 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 483 484 PetscFunctionBegin; 485 486 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 487 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 488 489 /* Call MKL sparse BLAS routine to do the MatMult. */ 490 if (zz == yy) { 491 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 492 * with alpha and beta both set to 1.0. */ 493 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y); 494 } else { 495 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 496 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 497 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 498 for (i=0; i<m; i++) { 499 z[i] += y[i]; 500 } 501 } 502 503 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 504 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 505 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 506 if (stat != SPARSE_STATUS_SUCCESS) { 507 PetscFunctionReturn(PETSC_ERR_LIB); 508 } 509 PetscFunctionReturn(0); 510 } 511 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 512 513 514 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 515 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 516 * routine, but can also be used to convert an assembled SeqAIJ matrix 517 * into a SeqAIJMKL one. */ 518 #undef __FUNCT__ 519 #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJMKL" 520 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 521 { 522 PetscErrorCode ierr; 523 Mat B = *newmat; 524 Mat_SeqAIJMKL *aijmkl; 525 PetscBool set; 526 PetscBool sametype; 527 528 PetscFunctionBegin; 529 if (reuse == MAT_INITIAL_MATRIX) { 530 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 531 } 532 533 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 534 if (sametype) PetscFunctionReturn(0); 535 536 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 537 B->spptr = (void*) aijmkl; 538 539 /* Set function pointers for methods that we inherit from AIJ but override. 540 * We also parse some command line options below, since those determine some of the methods we point to. 541 * Note: Currently the transposed operations are not being set because I encounter memory corruption 542 * when these are enabled. Need to look at this with Valgrind or similar. --RTM */ 543 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 544 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 545 B->ops->destroy = MatDestroy_SeqAIJMKL; 546 547 aijmkl->sparse_optimized = PETSC_FALSE; 548 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 549 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 550 #elif 551 aijmkl->no_SpMV2 = PETSC_TRUE; 552 #endif 553 554 /* Parse command line options. */ 555 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 556 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 557 ierr = PetscOptionsEnd();CHKERRQ(ierr); 558 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 559 if(!aijmkl->no_SpMV2) { 560 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 561 aijmkl->no_SpMV2 = PETSC_TRUE; 562 } 563 #endif 564 565 if(!aijmkl->no_SpMV2) { 566 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 567 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 568 /* B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; */ 569 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 570 /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; */ 571 #endif 572 } else { 573 B->ops->mult = MatMult_SeqAIJMKL; 574 /* B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; */ 575 B->ops->multadd = MatMultAdd_SeqAIJMKL; 576 /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; */ 577 } 578 579 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 580 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 581 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 582 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 583 584 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 585 *newmat = B; 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatCreateSeqAIJMKL" 591 /*@C 592 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 593 This type inherits from AIJ and is largely identical, but uses sparse BLAS 594 routines from Intel MKL whenever possible. 595 Collective on MPI_Comm 596 597 Input Parameters: 598 + comm - MPI communicator, set to PETSC_COMM_SELF 599 . m - number of rows 600 . n - number of columns 601 . nz - number of nonzeros per row (same for all rows) 602 - nnz - array containing the number of nonzeros in the various rows 603 (possibly different for each row) or NULL 604 605 Output Parameter: 606 . A - the matrix 607 608 Notes: 609 If nnz is given then nz is ignored 610 611 Level: intermediate 612 613 .keywords: matrix, cray, sparse, parallel 614 615 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 616 @*/ 617 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 618 { 619 PetscErrorCode ierr; 620 621 PetscFunctionBegin; 622 ierr = MatCreate(comm,A);CHKERRQ(ierr); 623 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 624 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 625 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 626 PetscFunctionReturn(0); 627 } 628 629 #undef __FUNCT__ 630 #define __FUNCT__ "MatCreate_SeqAIJMKL" 631 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 632 { 633 PetscErrorCode ierr; 634 635 PetscFunctionBegin; 636 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 637 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 638 PetscFunctionReturn(0); 639 } 640