1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 19 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 20 struct matrix_descr descr; 21 #endif 22 } Mat_SeqAIJMKL; 23 24 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 25 26 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 27 { 28 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 29 /* so we will ignore 'MatType type'. */ 30 PetscErrorCode ierr; 31 Mat B = *newmat; 32 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 33 34 PetscFunctionBegin; 35 if (reuse == MAT_INITIAL_MATRIX) { 36 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 37 aijmkl = (Mat_SeqAIJMKL*)B->spptr; 38 } 39 40 /* Reset the original function pointers. */ 41 B->ops->duplicate = MatDuplicate_SeqAIJ; 42 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 43 B->ops->destroy = MatDestroy_SeqAIJ; 44 B->ops->mult = MatMult_SeqAIJ; 45 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 46 B->ops->multadd = MatMultAdd_SeqAIJ; 47 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 48 49 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 50 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 51 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 52 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 53 54 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 55 * simply involves destroying the MKL sparse matrix handle and then freeing 56 * the spptr pointer. */ 57 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 58 if (aijmkl->sparse_optimized) { 59 sparse_status_t stat; 60 stat = mkl_sparse_destroy(aijmkl->csrA); 61 if (stat != SPARSE_STATUS_SUCCESS) { 62 PetscFunctionReturn(PETSC_ERR_LIB); 63 } 64 } 65 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 66 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 67 68 /* Change the type of B to MATSEQAIJ. */ 69 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 70 71 *newmat = B; 72 PetscFunctionReturn(0); 73 } 74 75 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 76 { 77 PetscErrorCode ierr; 78 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 79 80 PetscFunctionBegin; 81 82 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 83 * spptr pointer. */ 84 if (aijmkl) { 85 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 86 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 87 if (aijmkl->sparse_optimized) { 88 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 89 stat = mkl_sparse_destroy(aijmkl->csrA); 90 if (stat != SPARSE_STATUS_SUCCESS) { 91 PetscFunctionReturn(PETSC_ERR_LIB); 92 } 93 } 94 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 95 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 96 } 97 98 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 99 * to destroy everything that remains. */ 100 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 101 /* Note that I don't call MatSetType(). I believe this is because that 102 * is only to be called when *building* a matrix. I could be wrong, but 103 * that is how things work for the SuperLU matrix class. */ 104 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 109 { 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 111 Mat_SeqAIJMKL *aijmkl; 112 PetscInt m,n; 113 MatScalar *aa; 114 PetscInt *aj,*ai; 115 116 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 117 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 118 * does nothing. We make it callable anyway in this case because it cuts 119 * down on littering the code with #ifdefs. */ 120 PetscFunctionReturn(0); 121 #else 122 123 sparse_status_t stat; 124 125 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 126 127 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 128 129 if (aijmkl->sparse_optimized) { 130 /* Matrix has been previously assembled and optimized. Must destroy old 131 * matrix handle before running the optimization step again. */ 132 stat = mkl_sparse_destroy(aijmkl->csrA); 133 if (stat != SPARSE_STATUS_SUCCESS) { 134 PetscFunctionReturn(PETSC_ERR_LIB); 135 } 136 } 137 138 /* Now perform the SpMV2 setup and matrix optimization. */ 139 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 140 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 141 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 142 m = A->rmap->n; 143 n = A->cmap->n; 144 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 145 aa = a->a; /* Nonzero elements stored row-by-row. */ 146 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 147 if (n>0) { 148 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 149 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 150 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 151 stat = mkl_sparse_optimize(aijmkl->csrA); 152 if (stat != SPARSE_STATUS_SUCCESS) { 153 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 154 PetscFunctionReturn(PETSC_ERR_LIB); 155 } 156 aijmkl->sparse_optimized = PETSC_TRUE; 157 } 158 159 PetscFunctionReturn(0); 160 #endif 161 } 162 163 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 164 { 165 PetscErrorCode ierr; 166 Mat_SeqAIJMKL *aijmkl; 167 Mat_SeqAIJMKL *aijmkl_dest; 168 169 PetscFunctionBegin; 170 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 171 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 172 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 173 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 174 aijmkl_dest->sparse_optimized = PETSC_FALSE; 175 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 176 PetscFunctionReturn(0); 177 } 178 179 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 180 { 181 PetscErrorCode ierr; 182 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 183 184 PetscFunctionBegin; 185 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 186 187 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 188 * extra information and some different methods, call the AssemblyEnd 189 * routine for a MATSEQAIJ. 190 * I'm not sure if this is the best way to do this, but it avoids 191 * a lot of code duplication. 192 * I also note that currently MATSEQAIJMKL doesn't know anything about 193 * the Mat_CompressedRow data structure that SeqAIJ now uses when there 194 * are many zero rows. If the SeqAIJ assembly end routine decides to use 195 * this, this may break things. (Don't know... haven't looked at it. 196 * Do I need to disable this somehow?) */ 197 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 198 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 199 200 /* Now create the MKL sparse handle (if needed; the function checks). */ 201 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 202 203 PetscFunctionReturn(0); 204 } 205 206 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 207 { 208 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 209 const PetscScalar *x; 210 PetscScalar *y; 211 const MatScalar *aa; 212 PetscErrorCode ierr; 213 PetscInt m=A->rmap->n; 214 PetscInt n=A->cmap->n; 215 PetscScalar alpha = 1.0; 216 PetscScalar beta = 0.0; 217 const PetscInt *aj,*ai; 218 char matdescra[6]; 219 220 221 /* Variables not in MatMult_SeqAIJ. */ 222 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 223 224 PetscFunctionBegin; 225 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 226 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 227 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 228 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 229 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 230 aa = a->a; /* Nonzero elements stored row-by-row. */ 231 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 232 233 /* Call MKL sparse BLAS routine to do the MatMult. */ 234 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 235 236 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 237 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 238 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 239 PetscFunctionReturn(0); 240 } 241 242 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 243 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 244 { 245 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 246 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 247 const PetscScalar *x; 248 PetscScalar *y; 249 PetscErrorCode ierr; 250 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 251 252 PetscFunctionBegin; 253 254 /* If there are no rows, this is a no-op: return immediately. */ 255 if(A->cmap->n < 1) PetscFunctionReturn(0); 256 257 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 258 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 259 260 /* Call MKL SpMV2 executor routine to do the MatMult. */ 261 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 262 263 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 264 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 265 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 266 if (stat != SPARSE_STATUS_SUCCESS) { 267 PetscFunctionReturn(PETSC_ERR_LIB); 268 } 269 PetscFunctionReturn(0); 270 } 271 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 272 273 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 274 { 275 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 276 const PetscScalar *x; 277 PetscScalar *y; 278 const MatScalar *aa; 279 PetscErrorCode ierr; 280 PetscInt m=A->rmap->n; 281 PetscInt n=A->cmap->n; 282 PetscScalar alpha = 1.0; 283 PetscScalar beta = 0.0; 284 const PetscInt *aj,*ai; 285 char matdescra[6]; 286 287 /* Variables not in MatMultTranspose_SeqAIJ. */ 288 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 289 290 PetscFunctionBegin; 291 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 292 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 293 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 294 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 295 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 296 aa = a->a; /* Nonzero elements stored row-by-row. */ 297 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 298 299 /* Call MKL sparse BLAS routine to do the MatMult. */ 300 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 301 302 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 303 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 304 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 305 PetscFunctionReturn(0); 306 } 307 308 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 309 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 310 { 311 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 312 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 313 const PetscScalar *x; 314 PetscScalar *y; 315 PetscErrorCode ierr; 316 sparse_status_t stat; 317 318 PetscFunctionBegin; 319 320 /* If there are no rows, this is a no-op: return immediately. */ 321 if(A->cmap->n < 1) PetscFunctionReturn(0); 322 323 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 324 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 325 326 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 327 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 328 329 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 330 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 331 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 332 if (stat != SPARSE_STATUS_SUCCESS) { 333 PetscFunctionReturn(PETSC_ERR_LIB); 334 } 335 PetscFunctionReturn(0); 336 } 337 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 338 339 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 340 { 341 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 342 const PetscScalar *x; 343 PetscScalar *y,*z; 344 const MatScalar *aa; 345 PetscErrorCode ierr; 346 PetscInt m=A->rmap->n; 347 PetscInt n=A->cmap->n; 348 const PetscInt *aj,*ai; 349 PetscInt i; 350 351 /* Variables not in MatMultAdd_SeqAIJ. */ 352 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 353 PetscScalar alpha = 1.0; 354 PetscScalar beta; 355 char matdescra[6]; 356 357 PetscFunctionBegin; 358 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 359 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 360 361 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 362 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 363 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 364 aa = a->a; /* Nonzero elements stored row-by-row. */ 365 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 366 367 /* Call MKL sparse BLAS routine to do the MatMult. */ 368 if (zz == yy) { 369 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 370 beta = 1.0; 371 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 372 } else { 373 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 374 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 375 beta = 0.0; 376 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 377 for (i=0; i<m; i++) { 378 z[i] += y[i]; 379 } 380 } 381 382 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 383 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 384 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 389 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 390 { 391 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 392 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 393 const PetscScalar *x; 394 PetscScalar *y,*z; 395 PetscErrorCode ierr; 396 PetscInt m=A->rmap->n; 397 PetscInt i; 398 399 /* Variables not in MatMultAdd_SeqAIJ. */ 400 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 401 402 PetscFunctionBegin; 403 404 /* If there are no rows, this is a no-op: return immediately. */ 405 if(A->cmap->n < 1) PetscFunctionReturn(0); 406 407 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 408 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 409 410 /* Call MKL sparse BLAS routine to do the MatMult. */ 411 if (zz == yy) { 412 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 413 * with alpha and beta both set to 1.0. */ 414 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 415 } else { 416 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 417 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 418 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 419 for (i=0; i<m; i++) { 420 z[i] += y[i]; 421 } 422 } 423 424 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 425 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 426 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 427 if (stat != SPARSE_STATUS_SUCCESS) { 428 PetscFunctionReturn(PETSC_ERR_LIB); 429 } 430 PetscFunctionReturn(0); 431 } 432 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 433 434 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 435 { 436 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 437 const PetscScalar *x; 438 PetscScalar *y,*z; 439 const MatScalar *aa; 440 PetscErrorCode ierr; 441 PetscInt m=A->rmap->n; 442 PetscInt n=A->cmap->n; 443 const PetscInt *aj,*ai; 444 PetscInt i; 445 446 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 447 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 448 PetscScalar alpha = 1.0; 449 PetscScalar beta; 450 char matdescra[6]; 451 452 PetscFunctionBegin; 453 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 454 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 455 456 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 457 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 458 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 459 aa = a->a; /* Nonzero elements stored row-by-row. */ 460 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 461 462 /* Call MKL sparse BLAS routine to do the MatMult. */ 463 if (zz == yy) { 464 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 465 beta = 1.0; 466 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 467 } else { 468 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 469 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 470 beta = 0.0; 471 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 472 for (i=0; i<n; i++) { 473 z[i] += y[i]; 474 } 475 } 476 477 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 478 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 479 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 480 PetscFunctionReturn(0); 481 } 482 483 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 484 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 485 { 486 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 487 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 488 const PetscScalar *x; 489 PetscScalar *y,*z; 490 PetscErrorCode ierr; 491 PetscInt n=A->cmap->n; 492 PetscInt i; 493 494 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 495 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 496 497 PetscFunctionBegin; 498 499 /* If there are no rows, this is a no-op: return immediately. */ 500 if(A->cmap->n < 1) PetscFunctionReturn(0); 501 502 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 503 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 504 505 /* Call MKL sparse BLAS routine to do the MatMult. */ 506 if (zz == yy) { 507 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 508 * with alpha and beta both set to 1.0. */ 509 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 510 } else { 511 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 512 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 513 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 514 for (i=0; i<n; i++) { 515 z[i] += y[i]; 516 } 517 } 518 519 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 520 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 521 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 522 if (stat != SPARSE_STATUS_SUCCESS) { 523 PetscFunctionReturn(PETSC_ERR_LIB); 524 } 525 PetscFunctionReturn(0); 526 } 527 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 528 529 PETSC_INTERN PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha) 530 { 531 PetscErrorCode ierr; 532 533 ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr); 534 ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 535 PetscFunctionReturn(0); 536 } 537 538 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 539 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 540 * routine, but can also be used to convert an assembled SeqAIJ matrix 541 * into a SeqAIJMKL one. */ 542 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 543 { 544 PetscErrorCode ierr; 545 Mat B = *newmat; 546 Mat_SeqAIJMKL *aijmkl; 547 PetscBool set; 548 PetscBool sametype; 549 550 PetscFunctionBegin; 551 if (reuse == MAT_INITIAL_MATRIX) { 552 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 553 } 554 555 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 556 if (sametype) PetscFunctionReturn(0); 557 558 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 559 B->spptr = (void*) aijmkl; 560 561 /* Set function pointers for methods that we inherit from AIJ but override. 562 * We also parse some command line options below, since those determine some of the methods we point to. */ 563 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 564 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 565 B->ops->destroy = MatDestroy_SeqAIJMKL; 566 567 aijmkl->sparse_optimized = PETSC_FALSE; 568 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 569 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 570 #elif 571 aijmkl->no_SpMV2 = PETSC_TRUE; 572 #endif 573 574 /* Parse command line options. */ 575 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 576 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 577 ierr = PetscOptionsEnd();CHKERRQ(ierr); 578 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 579 if(!aijmkl->no_SpMV2) { 580 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 581 aijmkl->no_SpMV2 = PETSC_TRUE; 582 } 583 #endif 584 585 if(!aijmkl->no_SpMV2) { 586 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 587 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 588 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 589 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 590 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 591 #endif 592 } else { 593 B->ops->mult = MatMult_SeqAIJMKL; 594 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 595 B->ops->multadd = MatMultAdd_SeqAIJMKL; 596 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 597 } 598 599 B->ops->scale = MatScale_SeqAIJMKL; 600 601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr); 602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 606 607 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 608 *newmat = B; 609 PetscFunctionReturn(0); 610 } 611 612 /*@C 613 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 614 This type inherits from AIJ and is largely identical, but uses sparse BLAS 615 routines from Intel MKL whenever possible. 616 Collective on MPI_Comm 617 618 Input Parameters: 619 + comm - MPI communicator, set to PETSC_COMM_SELF 620 . m - number of rows 621 . n - number of columns 622 . nz - number of nonzeros per row (same for all rows) 623 - nnz - array containing the number of nonzeros in the various rows 624 (possibly different for each row) or NULL 625 626 Output Parameter: 627 . A - the matrix 628 629 Notes: 630 If nnz is given then nz is ignored 631 632 Level: intermediate 633 634 .keywords: matrix, cray, sparse, parallel 635 636 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 637 @*/ 638 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 639 { 640 PetscErrorCode ierr; 641 642 PetscFunctionBegin; 643 ierr = MatCreate(comm,A);CHKERRQ(ierr); 644 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 645 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 646 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 647 PetscFunctionReturn(0); 648 } 649 650 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 651 { 652 PetscErrorCode ierr; 653 654 PetscFunctionBegin; 655 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 656 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 657 PetscFunctionReturn(0); 658 } 659