1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 19 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 20 struct matrix_descr descr; 21 #endif 22 } Mat_SeqAIJMKL; 23 24 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 25 26 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 27 { 28 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 29 /* so we will ignore 'MatType type'. */ 30 PetscErrorCode ierr; 31 Mat B = *newmat; 32 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 33 34 PetscFunctionBegin; 35 if (reuse == MAT_INITIAL_MATRIX) { 36 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 37 aijmkl = (Mat_SeqAIJMKL*)B->spptr; 38 } 39 40 /* Reset the original function pointers. */ 41 B->ops->duplicate = MatDuplicate_SeqAIJ; 42 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 43 B->ops->destroy = MatDestroy_SeqAIJ; 44 B->ops->mult = MatMult_SeqAIJ; 45 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 46 B->ops->multadd = MatMultAdd_SeqAIJ; 47 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 48 49 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 50 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 51 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 52 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 53 54 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 55 * simply involves destroying the MKL sparse matrix handle and then freeing 56 * the spptr pointer. */ 57 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 58 if (aijmkl->sparse_optimized) { 59 sparse_status_t stat; 60 stat = mkl_sparse_destroy(aijmkl->csrA); 61 if (stat != SPARSE_STATUS_SUCCESS) { 62 PetscFunctionReturn(PETSC_ERR_LIB); 63 } 64 } 65 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 66 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 67 68 /* Change the type of B to MATSEQAIJ. */ 69 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 70 71 *newmat = B; 72 PetscFunctionReturn(0); 73 } 74 75 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 76 { 77 PetscErrorCode ierr; 78 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 79 80 PetscFunctionBegin; 81 82 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 83 * spptr pointer. */ 84 if (aijmkl) { 85 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 86 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 87 if (aijmkl->sparse_optimized) { 88 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 89 stat = mkl_sparse_destroy(aijmkl->csrA); 90 if (stat != SPARSE_STATUS_SUCCESS) { 91 PetscFunctionReturn(PETSC_ERR_LIB); 92 } 93 } 94 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 95 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 96 } 97 98 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 99 * to destroy everything that remains. */ 100 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 101 /* Note that I don't call MatSetType(). I believe this is because that 102 * is only to be called when *building* a matrix. I could be wrong, but 103 * that is how things work for the SuperLU matrix class. */ 104 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 109 { 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 111 Mat_SeqAIJMKL *aijmkl; 112 PetscInt m,n; 113 MatScalar *aa; 114 PetscInt *aj,*ai; 115 116 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 117 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 118 * does nothing. We make it callable anyway in this case because it cuts 119 * down on littering the code with #ifdefs. */ 120 PetscFunctionReturn(0); 121 #else 122 123 sparse_status_t stat; 124 125 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 126 127 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 128 129 if (aijmkl->sparse_optimized) { 130 /* Matrix has been previously assembled and optimized. Must destroy old 131 * matrix handle before running the optimization step again. */ 132 stat = mkl_sparse_destroy(aijmkl->csrA); 133 if (stat != SPARSE_STATUS_SUCCESS) { 134 PetscFunctionReturn(PETSC_ERR_LIB); 135 } 136 } 137 aijmkl->sparse_optimized = PETSC_FALSE; 138 139 /* Now perform the SpMV2 setup and matrix optimization. */ 140 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 141 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 142 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 143 m = A->rmap->n; 144 n = A->cmap->n; 145 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 146 aa = a->a; /* Nonzero elements stored row-by-row. */ 147 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 148 if (a->nz) { 149 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 150 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 151 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 152 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 153 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 154 stat = mkl_sparse_optimize(aijmkl->csrA); 155 if (stat != SPARSE_STATUS_SUCCESS) { 156 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 157 PetscFunctionReturn(PETSC_ERR_LIB); 158 } 159 aijmkl->sparse_optimized = PETSC_TRUE; 160 } 161 162 PetscFunctionReturn(0); 163 #endif 164 } 165 166 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 167 { 168 PetscErrorCode ierr; 169 Mat_SeqAIJMKL *aijmkl; 170 Mat_SeqAIJMKL *aijmkl_dest; 171 172 PetscFunctionBegin; 173 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 174 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 175 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 176 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 177 aijmkl_dest->sparse_optimized = PETSC_FALSE; 178 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 179 PetscFunctionReturn(0); 180 } 181 182 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 183 { 184 PetscErrorCode ierr; 185 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 186 187 PetscFunctionBegin; 188 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 189 190 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 191 * extra information and some different methods, call the AssemblyEnd 192 * routine for a MATSEQAIJ. 193 * I'm not sure if this is the best way to do this, but it avoids 194 * a lot of code duplication. 195 * I also note that currently MATSEQAIJMKL doesn't know anything about 196 * the Mat_CompressedRow data structure that SeqAIJ now uses when there 197 * are many zero rows. If the SeqAIJ assembly end routine decides to use 198 * this, this may break things. (Don't know... haven't looked at it. 199 * Do I need to disable this somehow?) */ 200 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 201 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 202 203 /* Now create the MKL sparse handle (if needed; the function checks). */ 204 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 205 206 PetscFunctionReturn(0); 207 } 208 209 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 210 { 211 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 212 const PetscScalar *x; 213 PetscScalar *y; 214 const MatScalar *aa; 215 PetscErrorCode ierr; 216 PetscInt m=A->rmap->n; 217 PetscInt n=A->cmap->n; 218 PetscScalar alpha = 1.0; 219 PetscScalar beta = 0.0; 220 const PetscInt *aj,*ai; 221 char matdescra[6]; 222 223 224 /* Variables not in MatMult_SeqAIJ. */ 225 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 226 227 PetscFunctionBegin; 228 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 229 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 230 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 231 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 232 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 233 aa = a->a; /* Nonzero elements stored row-by-row. */ 234 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 235 236 /* Call MKL sparse BLAS routine to do the MatMult. */ 237 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 238 239 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 240 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 241 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 242 PetscFunctionReturn(0); 243 } 244 245 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 246 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 247 { 248 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 249 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 250 const PetscScalar *x; 251 PetscScalar *y; 252 PetscErrorCode ierr; 253 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 254 255 PetscFunctionBegin; 256 257 /* If there are no nonzero entries, this is a no-op: return immediately. */ 258 if(!a->nz) PetscFunctionReturn(0); 259 260 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 261 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 262 263 /* Call MKL SpMV2 executor routine to do the MatMult. */ 264 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 265 266 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 267 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 268 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 269 if (stat != SPARSE_STATUS_SUCCESS) { 270 PetscFunctionReturn(PETSC_ERR_LIB); 271 } 272 PetscFunctionReturn(0); 273 } 274 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 275 276 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 277 { 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 279 const PetscScalar *x; 280 PetscScalar *y; 281 const MatScalar *aa; 282 PetscErrorCode ierr; 283 PetscInt m=A->rmap->n; 284 PetscInt n=A->cmap->n; 285 PetscScalar alpha = 1.0; 286 PetscScalar beta = 0.0; 287 const PetscInt *aj,*ai; 288 char matdescra[6]; 289 290 /* Variables not in MatMultTranspose_SeqAIJ. */ 291 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 292 293 PetscFunctionBegin; 294 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 295 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 296 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 297 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 298 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 299 aa = a->a; /* Nonzero elements stored row-by-row. */ 300 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 301 302 /* Call MKL sparse BLAS routine to do the MatMult. */ 303 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 304 305 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 306 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 307 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 308 PetscFunctionReturn(0); 309 } 310 311 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 312 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 313 { 314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 315 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 316 const PetscScalar *x; 317 PetscScalar *y; 318 PetscErrorCode ierr; 319 sparse_status_t stat; 320 321 PetscFunctionBegin; 322 323 /* If there are no nonzero entries, this is a no-op: return immediately. */ 324 if(!a->nz) PetscFunctionReturn(0); 325 326 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 327 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 328 329 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 330 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 331 332 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 333 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 334 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 335 if (stat != SPARSE_STATUS_SUCCESS) { 336 PetscFunctionReturn(PETSC_ERR_LIB); 337 } 338 PetscFunctionReturn(0); 339 } 340 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 341 342 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 343 { 344 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 345 const PetscScalar *x; 346 PetscScalar *y,*z; 347 const MatScalar *aa; 348 PetscErrorCode ierr; 349 PetscInt m=A->rmap->n; 350 PetscInt n=A->cmap->n; 351 const PetscInt *aj,*ai; 352 PetscInt i; 353 354 /* Variables not in MatMultAdd_SeqAIJ. */ 355 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 356 PetscScalar alpha = 1.0; 357 PetscScalar beta; 358 char matdescra[6]; 359 360 PetscFunctionBegin; 361 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 362 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 363 364 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 365 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 366 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 367 aa = a->a; /* Nonzero elements stored row-by-row. */ 368 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 369 370 /* Call MKL sparse BLAS routine to do the MatMult. */ 371 if (zz == yy) { 372 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 373 beta = 1.0; 374 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 375 } else { 376 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 377 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 378 beta = 0.0; 379 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 380 for (i=0; i<m; i++) { 381 z[i] += y[i]; 382 } 383 } 384 385 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 386 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 387 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 388 PetscFunctionReturn(0); 389 } 390 391 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 392 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 393 { 394 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 395 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 396 const PetscScalar *x; 397 PetscScalar *y,*z; 398 PetscErrorCode ierr; 399 PetscInt m=A->rmap->n; 400 PetscInt i; 401 402 /* Variables not in MatMultAdd_SeqAIJ. */ 403 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 404 405 PetscFunctionBegin; 406 407 /* If there are no nonzero entries, this is a no-op: return immediately. */ 408 if(!a->nz) PetscFunctionReturn(0); 409 410 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 411 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 412 413 /* Call MKL sparse BLAS routine to do the MatMult. */ 414 if (zz == yy) { 415 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 416 * with alpha and beta both set to 1.0. */ 417 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 418 } else { 419 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 420 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 421 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 422 for (i=0; i<m; i++) { 423 z[i] += y[i]; 424 } 425 } 426 427 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 428 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 429 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 430 if (stat != SPARSE_STATUS_SUCCESS) { 431 PetscFunctionReturn(PETSC_ERR_LIB); 432 } 433 PetscFunctionReturn(0); 434 } 435 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 436 437 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 438 { 439 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 440 const PetscScalar *x; 441 PetscScalar *y,*z; 442 const MatScalar *aa; 443 PetscErrorCode ierr; 444 PetscInt m=A->rmap->n; 445 PetscInt n=A->cmap->n; 446 const PetscInt *aj,*ai; 447 PetscInt i; 448 449 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 450 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 451 PetscScalar alpha = 1.0; 452 PetscScalar beta; 453 char matdescra[6]; 454 455 PetscFunctionBegin; 456 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 457 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 458 459 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 460 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 461 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 462 aa = a->a; /* Nonzero elements stored row-by-row. */ 463 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 464 465 /* Call MKL sparse BLAS routine to do the MatMult. */ 466 if (zz == yy) { 467 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 468 beta = 1.0; 469 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 470 } else { 471 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 472 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 473 beta = 0.0; 474 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 475 for (i=0; i<n; i++) { 476 z[i] += y[i]; 477 } 478 } 479 480 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 481 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 482 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 483 PetscFunctionReturn(0); 484 } 485 486 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 487 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 488 { 489 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 490 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 491 const PetscScalar *x; 492 PetscScalar *y,*z; 493 PetscErrorCode ierr; 494 PetscInt n=A->cmap->n; 495 PetscInt i; 496 497 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 498 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 499 500 PetscFunctionBegin; 501 502 /* If there are no nonzero entries, this is a no-op: return immediately. */ 503 if(!a->nz) PetscFunctionReturn(0); 504 505 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 506 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 507 508 /* Call MKL sparse BLAS routine to do the MatMult. */ 509 if (zz == yy) { 510 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 511 * with alpha and beta both set to 1.0. */ 512 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 513 } else { 514 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 515 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 516 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 517 for (i=0; i<n; i++) { 518 z[i] += y[i]; 519 } 520 } 521 522 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 523 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 524 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 525 if (stat != SPARSE_STATUS_SUCCESS) { 526 PetscFunctionReturn(PETSC_ERR_LIB); 527 } 528 PetscFunctionReturn(0); 529 } 530 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 531 532 PETSC_INTERN PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha) 533 { 534 PetscErrorCode ierr; 535 536 ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr); 537 ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 538 PetscFunctionReturn(0); 539 } 540 541 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 542 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 543 * routine, but can also be used to convert an assembled SeqAIJ matrix 544 * into a SeqAIJMKL one. */ 545 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 546 { 547 PetscErrorCode ierr; 548 Mat B = *newmat; 549 Mat_SeqAIJMKL *aijmkl; 550 PetscBool set; 551 PetscBool sametype; 552 553 PetscFunctionBegin; 554 if (reuse == MAT_INITIAL_MATRIX) { 555 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 556 } 557 558 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 559 if (sametype) PetscFunctionReturn(0); 560 561 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 562 B->spptr = (void*) aijmkl; 563 564 /* Set function pointers for methods that we inherit from AIJ but override. 565 * We also parse some command line options below, since those determine some of the methods we point to. */ 566 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 567 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 568 B->ops->destroy = MatDestroy_SeqAIJMKL; 569 570 aijmkl->sparse_optimized = PETSC_FALSE; 571 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 572 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 573 #elif 574 aijmkl->no_SpMV2 = PETSC_TRUE; 575 #endif 576 577 /* Parse command line options. */ 578 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 579 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 580 ierr = PetscOptionsEnd();CHKERRQ(ierr); 581 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 582 if(!aijmkl->no_SpMV2) { 583 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 584 aijmkl->no_SpMV2 = PETSC_TRUE; 585 } 586 #endif 587 588 if(!aijmkl->no_SpMV2) { 589 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 590 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 591 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 592 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 593 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 594 #endif 595 } else { 596 B->ops->mult = MatMult_SeqAIJMKL; 597 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 598 B->ops->multadd = MatMultAdd_SeqAIJMKL; 599 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 600 } 601 602 B->ops->scale = MatScale_SeqAIJMKL; 603 604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr); 605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 609 610 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 611 *newmat = B; 612 PetscFunctionReturn(0); 613 } 614 615 /*@C 616 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 617 This type inherits from AIJ and is largely identical, but uses sparse BLAS 618 routines from Intel MKL whenever possible. 619 Collective on MPI_Comm 620 621 Input Parameters: 622 + comm - MPI communicator, set to PETSC_COMM_SELF 623 . m - number of rows 624 . n - number of columns 625 . nz - number of nonzeros per row (same for all rows) 626 - nnz - array containing the number of nonzeros in the various rows 627 (possibly different for each row) or NULL 628 629 Output Parameter: 630 . A - the matrix 631 632 Notes: 633 If nnz is given then nz is ignored 634 635 Level: intermediate 636 637 .keywords: matrix, cray, sparse, parallel 638 639 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 640 @*/ 641 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 642 { 643 PetscErrorCode ierr; 644 645 PetscFunctionBegin; 646 ierr = MatCreate(comm,A);CHKERRQ(ierr); 647 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 648 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 649 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 654 { 655 PetscErrorCode ierr; 656 657 PetscFunctionBegin; 658 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 659 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 660 PetscFunctionReturn(0); 661 } 662