1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 18 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 19 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 28 { 29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 30 /* so we will ignore 'MatType type'. */ 31 PetscErrorCode ierr; 32 Mat B = *newmat; 33 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 34 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 35 #endif 36 37 PetscFunctionBegin; 38 if (reuse == MAT_INITIAL_MATRIX) { 39 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 45 B->ops->destroy = MatDestroy_SeqAIJ; 46 B->ops->mult = MatMult_SeqAIJ; 47 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 48 B->ops->multadd = MatMultAdd_SeqAIJ; 49 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50 B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ; 51 B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ; 52 B->ops->scale = MatScale_SeqAIJ; 53 B->ops->diagonalscale = MatDiagonalScale_SeqAIJ; 54 B->ops->diagonalset = MatDiagonalSet_SeqAIJ; 55 B->ops->axpy = MatAXPY_SeqAIJ; 56 57 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 58 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 59 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 60 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 61 if(!aijmkl->no_SpMV2) { 62 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 63 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 64 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 65 #endif 66 } 67 68 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 69 * simply involves destroying the MKL sparse matrix handle and then freeing 70 * the spptr pointer. */ 71 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 72 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 73 74 if (aijmkl->sparse_optimized) { 75 sparse_status_t stat; 76 stat = mkl_sparse_destroy(aijmkl->csrA); 77 if (stat != SPARSE_STATUS_SUCCESS) { 78 PetscFunctionReturn(PETSC_ERR_LIB); 79 } 80 } 81 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 82 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 83 84 /* Change the type of B to MATSEQAIJ. */ 85 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 86 87 *newmat = B; 88 PetscFunctionReturn(0); 89 } 90 91 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 92 { 93 PetscErrorCode ierr; 94 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 95 96 PetscFunctionBegin; 97 98 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 99 * spptr pointer. */ 100 if (aijmkl) { 101 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 102 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 103 if (aijmkl->sparse_optimized) { 104 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 105 stat = mkl_sparse_destroy(aijmkl->csrA); 106 if (stat != SPARSE_STATUS_SUCCESS) { 107 PetscFunctionReturn(PETSC_ERR_LIB); 108 } 109 } 110 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 111 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 112 } 113 114 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 115 * to destroy everything that remains. */ 116 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 117 /* Note that I don't call MatSetType(). I believe this is because that 118 * is only to be called when *building* a matrix. I could be wrong, but 119 * that is how things work for the SuperLU matrix class. */ 120 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 125 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 126 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 127 * handle, creates a new one, and then calls mkl_sparse_optimize(). 128 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 129 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 130 * an unoptimized matrix handle here. */ 131 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 132 { 133 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 134 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 135 * does nothing. We make it callable anyway in this case because it cuts 136 * down on littering the code with #ifdefs. */ 137 PetscFunctionBegin; 138 PetscFunctionReturn(0); 139 #else 140 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 141 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 142 PetscInt m,n; 143 MatScalar *aa; 144 PetscInt *aj,*ai; 145 sparse_status_t stat; 146 147 PetscFunctionBegin; 148 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 149 150 if (aijmkl->sparse_optimized) { 151 /* Matrix has been previously assembled and optimized. Must destroy old 152 * matrix handle before running the optimization step again. */ 153 stat = mkl_sparse_destroy(aijmkl->csrA); 154 if (stat != SPARSE_STATUS_SUCCESS) { 155 PetscFunctionReturn(PETSC_ERR_LIB); 156 } 157 } 158 aijmkl->sparse_optimized = PETSC_FALSE; 159 160 /* Now perform the SpMV2 setup and matrix optimization. */ 161 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 162 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 163 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 164 m = A->rmap->n; 165 n = A->cmap->n; 166 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 167 aa = a->a; /* Nonzero elements stored row-by-row. */ 168 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 169 if ((a->nz!=0) & !(A->structure_only)) { 170 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 171 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 172 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 173 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 174 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 175 stat = mkl_sparse_optimize(aijmkl->csrA); 176 if (stat != SPARSE_STATUS_SUCCESS) { 177 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 178 PetscFunctionReturn(PETSC_ERR_LIB); 179 } 180 aijmkl->sparse_optimized = PETSC_TRUE; 181 } 182 183 PetscFunctionReturn(0); 184 #endif 185 } 186 187 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 188 * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 189 * matrix handle. 190 * Note: This routine supports replacing the numerical values in an existing matrix that has the same sparsity 191 * structure as in the MKL handle. However, this code currently doesn't actually get used when MatMatMult() 192 * is called with MAT_REUSE_MATRIX, because the MatMatMult() interface code calls MatMatMultNumeric() in this case. 193 * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 194 * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 195 * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 196 * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */ 197 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 198 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat) 199 { 200 PetscErrorCode ierr; 201 sparse_status_t stat; 202 sparse_index_base_t indexing; 203 PetscInt nrows, ncols; 204 PetscInt *aj,*ai,*dummy; 205 MatScalar *aa; 206 Mat A; 207 Mat_SeqAIJ *a; 208 Mat_SeqAIJMKL *aijmkl; 209 210 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 211 stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 212 if (stat != SPARSE_STATUS_SUCCESS) { 213 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 214 PetscFunctionReturn(PETSC_ERR_LIB); 215 } 216 217 if (reuse == MAT_INITIAL_MATRIX) { 218 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 219 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 220 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 221 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 222 223 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 224 * Now turn it into a MATSEQAIJMKL. */ 225 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 226 } else { 227 A = *mat; 228 } 229 230 a = (Mat_SeqAIJ*)A->data; 231 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 232 233 if (reuse == MAT_REUSE_MATRIX) { 234 /* Need to destroy old MKL handle. */ 235 stat = mkl_sparse_destroy(aijmkl->csrA); 236 if (stat != SPARSE_STATUS_SUCCESS) { 237 PetscFunctionReturn(PETSC_ERR_LIB); 238 } 239 240 /* The new matrix is supposed to have the same sparsity pattern, so copy only the array of numerical values. */ 241 ierr = PetscMemcpy(a->a,aa,sizeof(MatScalar)*a->nz);CHKERRQ(ierr); 242 } 243 aijmkl->csrA = csrA; 244 245 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 246 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 247 * and just need to be able to run the MKL optimization step. */ 248 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 249 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 250 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 251 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 252 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 253 stat = mkl_sparse_optimize(aijmkl->csrA); 254 if (stat != SPARSE_STATUS_SUCCESS) { 255 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 256 PetscFunctionReturn(PETSC_ERR_LIB); 257 } 258 aijmkl->sparse_optimized = PETSC_TRUE; 259 260 *mat = A; 261 PetscFunctionReturn(0); 262 } 263 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 264 265 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 266 { 267 PetscErrorCode ierr; 268 Mat_SeqAIJMKL *aijmkl; 269 Mat_SeqAIJMKL *aijmkl_dest; 270 271 PetscFunctionBegin; 272 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 273 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 274 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 275 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 276 aijmkl_dest->sparse_optimized = PETSC_FALSE; 277 if (aijmkl->eager_inspection) { 278 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 279 } 280 PetscFunctionReturn(0); 281 } 282 283 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 284 { 285 PetscErrorCode ierr; 286 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 287 Mat_SeqAIJMKL *aijmkl; 288 289 PetscFunctionBegin; 290 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 291 292 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 293 * extra information and some different methods, call the AssemblyEnd 294 * routine for a MATSEQAIJ. 295 * I'm not sure if this is the best way to do this, but it avoids 296 * a lot of code duplication. */ 297 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 298 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 299 300 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 301 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 302 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 303 if (aijmkl->eager_inspection) { 304 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 305 } 306 307 PetscFunctionReturn(0); 308 } 309 310 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 311 { 312 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 313 const PetscScalar *x; 314 PetscScalar *y; 315 const MatScalar *aa; 316 PetscErrorCode ierr; 317 PetscInt m=A->rmap->n; 318 PetscInt n=A->cmap->n; 319 PetscScalar alpha = 1.0; 320 PetscScalar beta = 0.0; 321 const PetscInt *aj,*ai; 322 char matdescra[6]; 323 324 325 /* Variables not in MatMult_SeqAIJ. */ 326 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 327 328 PetscFunctionBegin; 329 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 330 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 331 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 332 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 333 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 334 aa = a->a; /* Nonzero elements stored row-by-row. */ 335 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 336 337 /* Call MKL sparse BLAS routine to do the MatMult. */ 338 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 339 340 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 341 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 342 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 343 PetscFunctionReturn(0); 344 } 345 346 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 347 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 348 { 349 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 350 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 351 const PetscScalar *x; 352 PetscScalar *y; 353 PetscErrorCode ierr; 354 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 355 356 PetscFunctionBegin; 357 358 /* If there are no nonzero entries, zero yy and return immediately. */ 359 if(!a->nz) { 360 PetscInt i; 361 PetscInt m=A->rmap->n; 362 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 363 for (i=0; i<m; i++) { 364 y[i] = 0.0; 365 } 366 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 367 PetscFunctionReturn(0); 368 } 369 370 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 371 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 372 373 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 374 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 375 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 376 if (!aijmkl->sparse_optimized) { 377 MatSeqAIJMKL_create_mkl_handle(A); 378 } 379 380 /* Call MKL SpMV2 executor routine to do the MatMult. */ 381 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 382 383 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 384 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 385 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 386 if (stat != SPARSE_STATUS_SUCCESS) { 387 PetscFunctionReturn(PETSC_ERR_LIB); 388 } 389 PetscFunctionReturn(0); 390 } 391 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 392 393 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 394 { 395 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 396 const PetscScalar *x; 397 PetscScalar *y; 398 const MatScalar *aa; 399 PetscErrorCode ierr; 400 PetscInt m=A->rmap->n; 401 PetscInt n=A->cmap->n; 402 PetscScalar alpha = 1.0; 403 PetscScalar beta = 0.0; 404 const PetscInt *aj,*ai; 405 char matdescra[6]; 406 407 /* Variables not in MatMultTranspose_SeqAIJ. */ 408 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 409 410 PetscFunctionBegin; 411 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 412 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 413 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 414 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 415 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 416 aa = a->a; /* Nonzero elements stored row-by-row. */ 417 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 418 419 /* Call MKL sparse BLAS routine to do the MatMult. */ 420 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 421 422 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 423 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 424 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 429 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 430 { 431 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 432 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 433 const PetscScalar *x; 434 PetscScalar *y; 435 PetscErrorCode ierr; 436 sparse_status_t stat; 437 438 PetscFunctionBegin; 439 440 /* If there are no nonzero entries, zero yy and return immediately. */ 441 if(!a->nz) { 442 PetscInt i; 443 PetscInt n=A->cmap->n; 444 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 445 for (i=0; i<n; i++) { 446 y[i] = 0.0; 447 } 448 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 453 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 454 455 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 456 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 457 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 458 if (!aijmkl->sparse_optimized) { 459 MatSeqAIJMKL_create_mkl_handle(A); 460 } 461 462 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 463 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 464 465 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 466 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 467 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 468 if (stat != SPARSE_STATUS_SUCCESS) { 469 PetscFunctionReturn(PETSC_ERR_LIB); 470 } 471 PetscFunctionReturn(0); 472 } 473 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 474 475 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 476 { 477 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 478 const PetscScalar *x; 479 PetscScalar *y,*z; 480 const MatScalar *aa; 481 PetscErrorCode ierr; 482 PetscInt m=A->rmap->n; 483 PetscInt n=A->cmap->n; 484 const PetscInt *aj,*ai; 485 PetscInt i; 486 487 /* Variables not in MatMultAdd_SeqAIJ. */ 488 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 489 PetscScalar alpha = 1.0; 490 PetscScalar beta; 491 char matdescra[6]; 492 493 PetscFunctionBegin; 494 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 495 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 496 497 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 498 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 499 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 500 aa = a->a; /* Nonzero elements stored row-by-row. */ 501 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 502 503 /* Call MKL sparse BLAS routine to do the MatMult. */ 504 if (zz == yy) { 505 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 506 beta = 1.0; 507 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 508 } else { 509 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 510 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 511 beta = 0.0; 512 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 513 for (i=0; i<m; i++) { 514 z[i] += y[i]; 515 } 516 } 517 518 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 519 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 520 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 521 PetscFunctionReturn(0); 522 } 523 524 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 525 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 526 { 527 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 528 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 529 const PetscScalar *x; 530 PetscScalar *y,*z; 531 PetscErrorCode ierr; 532 PetscInt m=A->rmap->n; 533 PetscInt i; 534 535 /* Variables not in MatMultAdd_SeqAIJ. */ 536 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 537 538 PetscFunctionBegin; 539 540 /* If there are no nonzero entries, set zz = yy and return immediately. */ 541 if(!a->nz) { 542 PetscInt i; 543 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 544 for (i=0; i<m; i++) { 545 z[i] = y[i]; 546 } 547 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 548 PetscFunctionReturn(0); 549 } 550 551 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 552 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 553 554 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 555 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 556 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 557 if (!aijmkl->sparse_optimized) { 558 MatSeqAIJMKL_create_mkl_handle(A); 559 } 560 561 /* Call MKL sparse BLAS routine to do the MatMult. */ 562 if (zz == yy) { 563 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 564 * with alpha and beta both set to 1.0. */ 565 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 566 } else { 567 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 568 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 569 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 570 for (i=0; i<m; i++) { 571 z[i] += y[i]; 572 } 573 } 574 575 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 576 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 577 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 578 if (stat != SPARSE_STATUS_SUCCESS) { 579 PetscFunctionReturn(PETSC_ERR_LIB); 580 } 581 PetscFunctionReturn(0); 582 } 583 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 584 585 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 586 { 587 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 588 const PetscScalar *x; 589 PetscScalar *y,*z; 590 const MatScalar *aa; 591 PetscErrorCode ierr; 592 PetscInt m=A->rmap->n; 593 PetscInt n=A->cmap->n; 594 const PetscInt *aj,*ai; 595 PetscInt i; 596 597 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 598 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 599 PetscScalar alpha = 1.0; 600 PetscScalar beta; 601 char matdescra[6]; 602 603 PetscFunctionBegin; 604 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 605 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 606 607 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 608 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 609 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 610 aa = a->a; /* Nonzero elements stored row-by-row. */ 611 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 612 613 /* Call MKL sparse BLAS routine to do the MatMult. */ 614 if (zz == yy) { 615 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 616 beta = 1.0; 617 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 618 } else { 619 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 620 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 621 beta = 0.0; 622 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 623 for (i=0; i<n; i++) { 624 z[i] += y[i]; 625 } 626 } 627 628 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 629 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 630 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 631 PetscFunctionReturn(0); 632 } 633 634 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 635 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 636 { 637 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 638 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 639 const PetscScalar *x; 640 PetscScalar *y,*z; 641 PetscErrorCode ierr; 642 PetscInt n=A->cmap->n; 643 PetscInt i; 644 645 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 646 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 647 648 PetscFunctionBegin; 649 650 /* If there are no nonzero entries, set zz = yy and return immediately. */ 651 if(!a->nz) { 652 PetscInt i; 653 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 654 for (i=0; i<n; i++) { 655 z[i] = y[i]; 656 } 657 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 658 PetscFunctionReturn(0); 659 } 660 661 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 662 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 663 664 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 665 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 666 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 667 if (!aijmkl->sparse_optimized) { 668 MatSeqAIJMKL_create_mkl_handle(A); 669 } 670 671 /* Call MKL sparse BLAS routine to do the MatMult. */ 672 if (zz == yy) { 673 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 674 * with alpha and beta both set to 1.0. */ 675 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 676 } else { 677 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 678 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 679 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 680 for (i=0; i<n; i++) { 681 z[i] += y[i]; 682 } 683 } 684 685 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 686 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 687 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 688 if (stat != SPARSE_STATUS_SUCCESS) { 689 PetscFunctionReturn(PETSC_ERR_LIB); 690 } 691 PetscFunctionReturn(0); 692 } 693 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 694 695 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 696 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 697 { 698 Mat_SeqAIJMKL *a, *b; 699 sparse_matrix_t csrA, csrB, csrC; 700 PetscErrorCode ierr; 701 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 702 703 PetscFunctionBegin; 704 a = (Mat_SeqAIJMKL*)A->spptr; 705 b = (Mat_SeqAIJMKL*)B->spptr; 706 if (!a->sparse_optimized) { 707 MatSeqAIJMKL_create_mkl_handle(A); 708 } 709 if (!b->sparse_optimized) { 710 MatSeqAIJMKL_create_mkl_handle(B); 711 } 712 csrA = a->csrA; 713 csrB = b->csrA; 714 715 stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC); 716 if (stat != SPARSE_STATUS_SUCCESS) { 717 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 718 PetscFunctionReturn(PETSC_ERR_LIB); 719 } 720 721 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 722 723 PetscFunctionReturn(0); 724 } 725 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 726 727 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 728 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 729 { 730 Mat_SeqAIJMKL *a, *b; 731 sparse_matrix_t csrA, csrB, csrC; 732 PetscErrorCode ierr; 733 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 734 735 PetscFunctionBegin; 736 a = (Mat_SeqAIJMKL*)A->spptr; 737 b = (Mat_SeqAIJMKL*)B->spptr; 738 if (!a->sparse_optimized) { 739 MatSeqAIJMKL_create_mkl_handle(A); 740 } 741 if (!b->sparse_optimized) { 742 MatSeqAIJMKL_create_mkl_handle(B); 743 } 744 csrA = a->csrA; 745 csrB = b->csrA; 746 747 stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC); 748 if (stat != SPARSE_STATUS_SUCCESS) { 749 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 750 PetscFunctionReturn(PETSC_ERR_LIB); 751 } 752 753 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 754 755 PetscFunctionReturn(0); 756 } 757 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 758 759 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha) 760 { 761 PetscErrorCode ierr; 762 763 PetscFunctionBegin; 764 ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr); 765 ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 766 PetscFunctionReturn(0); 767 } 768 769 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr) 770 { 771 PetscErrorCode ierr; 772 773 PetscFunctionBegin; 774 ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr); 775 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 776 PetscFunctionReturn(0); 777 } 778 779 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is) 780 { 781 PetscErrorCode ierr; 782 783 PetscFunctionBegin; 784 ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr); 785 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 786 PetscFunctionReturn(0); 787 } 788 789 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 790 { 791 PetscErrorCode ierr; 792 793 PetscFunctionBegin; 794 ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr); 795 if (str == SAME_NONZERO_PATTERN) { 796 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 797 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 798 } 799 PetscFunctionReturn(0); 800 } 801 802 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 803 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 804 * routine, but can also be used to convert an assembled SeqAIJ matrix 805 * into a SeqAIJMKL one. */ 806 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 807 { 808 PetscErrorCode ierr; 809 Mat B = *newmat; 810 Mat_SeqAIJMKL *aijmkl; 811 PetscBool set; 812 PetscBool sametype; 813 814 PetscFunctionBegin; 815 if (reuse == MAT_INITIAL_MATRIX) { 816 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 817 } 818 819 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 820 if (sametype) PetscFunctionReturn(0); 821 822 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 823 B->spptr = (void*) aijmkl; 824 825 /* Set function pointers for methods that we inherit from AIJ but override. 826 * We also parse some command line options below, since those determine some of the methods we point to. */ 827 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 828 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 829 B->ops->destroy = MatDestroy_SeqAIJMKL; 830 831 aijmkl->sparse_optimized = PETSC_FALSE; 832 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 833 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 834 #else 835 aijmkl->no_SpMV2 = PETSC_TRUE; 836 #endif 837 aijmkl->eager_inspection = PETSC_FALSE; 838 839 /* Parse command line options. */ 840 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 841 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 842 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 843 ierr = PetscOptionsEnd();CHKERRQ(ierr); 844 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 845 if(!aijmkl->no_SpMV2) { 846 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 847 aijmkl->no_SpMV2 = PETSC_TRUE; 848 } 849 #endif 850 851 if(!aijmkl->no_SpMV2) { 852 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 853 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 854 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 855 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 856 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 857 B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 858 B->ops->mattransposemult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 859 #endif 860 } else { 861 B->ops->mult = MatMult_SeqAIJMKL; 862 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 863 B->ops->multadd = MatMultAdd_SeqAIJMKL; 864 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 865 } 866 867 B->ops->scale = MatScale_SeqAIJMKL; 868 B->ops->diagonalscale = MatDiagonalScale_SeqAIJMKL; 869 B->ops->diagonalset = MatDiagonalSet_SeqAIJMKL; 870 B->ops->axpy = MatAXPY_SeqAIJMKL; 871 872 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr); 873 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 874 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 875 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 876 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 877 if(!aijmkl->no_SpMV2) { 878 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 879 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 880 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 881 #endif 882 } 883 884 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 885 *newmat = B; 886 PetscFunctionReturn(0); 887 } 888 889 /*@C 890 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 891 This type inherits from AIJ and is largely identical, but uses sparse BLAS 892 routines from Intel MKL whenever possible. 893 MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 894 operations are currently supported. 895 If the installed version of MKL supports the "SpMV2" sparse 896 inspector-executor routines, then those are used by default. 897 898 Collective on MPI_Comm 899 900 Input Parameters: 901 + comm - MPI communicator, set to PETSC_COMM_SELF 902 . m - number of rows 903 . n - number of columns 904 . nz - number of nonzeros per row (same for all rows) 905 - nnz - array containing the number of nonzeros in the various rows 906 (possibly different for each row) or NULL 907 908 Output Parameter: 909 . A - the matrix 910 911 Options Database Keys: 912 . -mat_aijmkl_no_spmv2 - disables use of the SpMV2 inspector-executor routines 913 914 Notes: 915 If nnz is given then nz is ignored 916 917 Level: intermediate 918 919 .keywords: matrix, MKL, sparse, parallel 920 921 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 922 @*/ 923 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 924 { 925 PetscErrorCode ierr; 926 927 PetscFunctionBegin; 928 ierr = MatCreate(comm,A);CHKERRQ(ierr); 929 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 930 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 931 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 932 PetscFunctionReturn(0); 933 } 934 935 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 936 { 937 PetscErrorCode ierr; 938 939 PetscFunctionBegin; 940 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 941 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 942 PetscFunctionReturn(0); 943 } 944