1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 18 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 19 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 28 { 29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 30 /* so we will ignore 'MatType type'. */ 31 PetscErrorCode ierr; 32 Mat B = *newmat; 33 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 34 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 35 #endif 36 37 PetscFunctionBegin; 38 if (reuse == MAT_INITIAL_MATRIX) { 39 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 45 B->ops->destroy = MatDestroy_SeqAIJ; 46 B->ops->mult = MatMult_SeqAIJ; 47 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 48 B->ops->multadd = MatMultAdd_SeqAIJ; 49 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50 B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ; 51 B->ops->scale = MatScale_SeqAIJ; 52 B->ops->diagonalscale = MatDiagonalScale_SeqAIJ; 53 B->ops->diagonalset = MatDiagonalSet_SeqAIJ; 54 B->ops->axpy = MatAXPY_SeqAIJ; 55 56 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 57 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 58 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 59 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 60 if(!aijmkl->no_SpMV2) { 61 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 62 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 63 #endif 64 } 65 66 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 67 * simply involves destroying the MKL sparse matrix handle and then freeing 68 * the spptr pointer. */ 69 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 70 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 71 72 if (aijmkl->sparse_optimized) { 73 sparse_status_t stat; 74 stat = mkl_sparse_destroy(aijmkl->csrA); 75 if (stat != SPARSE_STATUS_SUCCESS) { 76 PetscFunctionReturn(PETSC_ERR_LIB); 77 } 78 } 79 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 80 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 81 82 /* Change the type of B to MATSEQAIJ. */ 83 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 84 85 *newmat = B; 86 PetscFunctionReturn(0); 87 } 88 89 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 90 { 91 PetscErrorCode ierr; 92 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 93 94 PetscFunctionBegin; 95 96 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 97 * spptr pointer. */ 98 if (aijmkl) { 99 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 100 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 101 if (aijmkl->sparse_optimized) { 102 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 103 stat = mkl_sparse_destroy(aijmkl->csrA); 104 if (stat != SPARSE_STATUS_SUCCESS) { 105 PetscFunctionReturn(PETSC_ERR_LIB); 106 } 107 } 108 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 109 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 110 } 111 112 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 113 * to destroy everything that remains. */ 114 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 115 /* Note that I don't call MatSetType(). I believe this is because that 116 * is only to be called when *building* a matrix. I could be wrong, but 117 * that is how things work for the SuperLU matrix class. */ 118 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 123 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 124 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 125 * handle, creates a new one, and then calls mkl_sparse_optimize(). 126 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 127 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 128 * an unoptimized matrix handle here. */ 129 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 130 { 131 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 132 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 133 * does nothing. We make it callable anyway in this case because it cuts 134 * down on littering the code with #ifdefs. */ 135 PetscFunctionBegin; 136 PetscFunctionReturn(0); 137 #else 138 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 139 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 140 PetscInt m,n; 141 MatScalar *aa; 142 PetscInt *aj,*ai; 143 sparse_status_t stat; 144 145 PetscFunctionBegin; 146 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 147 148 if (aijmkl->sparse_optimized) { 149 /* Matrix has been previously assembled and optimized. Must destroy old 150 * matrix handle before running the optimization step again. */ 151 stat = mkl_sparse_destroy(aijmkl->csrA); 152 if (stat != SPARSE_STATUS_SUCCESS) { 153 PetscFunctionReturn(PETSC_ERR_LIB); 154 } 155 } 156 aijmkl->sparse_optimized = PETSC_FALSE; 157 158 /* Now perform the SpMV2 setup and matrix optimization. */ 159 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 160 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 161 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 162 m = A->rmap->n; 163 n = A->cmap->n; 164 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 165 aa = a->a; /* Nonzero elements stored row-by-row. */ 166 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 167 if ((a->nz!=0) & !(A->structure_only)) { 168 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 169 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 170 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 171 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 172 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 173 stat = mkl_sparse_optimize(aijmkl->csrA); 174 if (stat != SPARSE_STATUS_SUCCESS) { 175 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 176 PetscFunctionReturn(PETSC_ERR_LIB); 177 } 178 aijmkl->sparse_optimized = PETSC_TRUE; 179 } 180 181 PetscFunctionReturn(0); 182 #endif 183 } 184 185 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 186 * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 187 * matrix handle. 188 * Note: This routine supports replacing the numerical values in an existing matrix that has the same sparsity 189 * structure as in the MKL handle. However, this code currently doesn't actually get used when MatMatMult() 190 * is called with MAT_REUSE_MATRIX, because the MatMatMult() interface code calls MatMatMultNumeric() in this case. 191 * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 192 * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 193 * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 194 * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */ 195 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 196 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat) 197 { 198 PetscErrorCode ierr; 199 sparse_status_t stat; 200 sparse_index_base_t indexing; 201 PetscInt nrows, ncols; 202 PetscInt *aj,*ai,*dummy; 203 MatScalar *aa; 204 Mat A; 205 Mat_SeqAIJ *a; 206 Mat_SeqAIJMKL *aijmkl; 207 208 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 209 stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 210 if (stat != SPARSE_STATUS_SUCCESS) { 211 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 212 PetscFunctionReturn(PETSC_ERR_LIB); 213 } 214 215 if (reuse == MAT_INITIAL_MATRIX) { 216 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 217 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 218 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 219 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 220 221 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 222 * Now turn it into a MATSEQAIJMKL. */ 223 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 224 } else { 225 A = *mat; 226 } 227 228 a = (Mat_SeqAIJ*)A->data; 229 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 230 231 if (reuse == MAT_REUSE_MATRIX) { 232 /* Need to destroy old MKL handle. */ 233 stat = mkl_sparse_destroy(aijmkl->csrA); 234 if (stat != SPARSE_STATUS_SUCCESS) { 235 PetscFunctionReturn(PETSC_ERR_LIB); 236 } 237 238 /* The new matrix is supposed to have the same sparsity pattern, so copy only the array of numerical values. */ 239 ierr = PetscMemcpy(a->a,aa,sizeof(MatScalar)*a->nz);CHKERRQ(ierr); 240 } 241 aijmkl->csrA = csrA; 242 243 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 244 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 245 * and just need to be able to run the MKL optimization step. */ 246 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 247 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 248 stat = mkl_sparse_optimize(aijmkl->csrA); 249 if (stat != SPARSE_STATUS_SUCCESS) { 250 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 251 PetscFunctionReturn(PETSC_ERR_LIB); 252 } 253 aijmkl->sparse_optimized = PETSC_TRUE; 254 255 *mat = A; 256 PetscFunctionReturn(0); 257 } 258 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 259 260 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 261 { 262 PetscErrorCode ierr; 263 Mat_SeqAIJMKL *aijmkl; 264 Mat_SeqAIJMKL *aijmkl_dest; 265 266 PetscFunctionBegin; 267 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 268 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 269 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 270 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 271 aijmkl_dest->sparse_optimized = PETSC_FALSE; 272 if (aijmkl->eager_inspection) { 273 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 274 } 275 PetscFunctionReturn(0); 276 } 277 278 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 279 { 280 PetscErrorCode ierr; 281 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 282 Mat_SeqAIJMKL *aijmkl; 283 284 PetscFunctionBegin; 285 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 286 287 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 288 * extra information and some different methods, call the AssemblyEnd 289 * routine for a MATSEQAIJ. 290 * I'm not sure if this is the best way to do this, but it avoids 291 * a lot of code duplication. */ 292 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 293 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 294 295 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 296 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 297 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 298 if (aijmkl->eager_inspection) { 299 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 300 } 301 302 PetscFunctionReturn(0); 303 } 304 305 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 306 { 307 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 308 const PetscScalar *x; 309 PetscScalar *y; 310 const MatScalar *aa; 311 PetscErrorCode ierr; 312 PetscInt m=A->rmap->n; 313 PetscInt n=A->cmap->n; 314 PetscScalar alpha = 1.0; 315 PetscScalar beta = 0.0; 316 const PetscInt *aj,*ai; 317 char matdescra[6]; 318 319 320 /* Variables not in MatMult_SeqAIJ. */ 321 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 322 323 PetscFunctionBegin; 324 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 325 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 326 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 327 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 328 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 329 aa = a->a; /* Nonzero elements stored row-by-row. */ 330 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 331 332 /* Call MKL sparse BLAS routine to do the MatMult. */ 333 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 334 335 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 336 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 337 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 338 PetscFunctionReturn(0); 339 } 340 341 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 342 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 343 { 344 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 345 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 346 const PetscScalar *x; 347 PetscScalar *y; 348 PetscErrorCode ierr; 349 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 350 351 PetscFunctionBegin; 352 353 /* If there are no nonzero entries, zero yy and return immediately. */ 354 if(!a->nz) { 355 PetscInt i; 356 PetscInt m=A->rmap->n; 357 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 358 for (i=0; i<m; i++) { 359 y[i] = 0.0; 360 } 361 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 362 PetscFunctionReturn(0); 363 } 364 365 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 366 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 367 368 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 369 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 370 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 371 if (!aijmkl->sparse_optimized) { 372 MatSeqAIJMKL_create_mkl_handle(A); 373 } 374 375 /* Call MKL SpMV2 executor routine to do the MatMult. */ 376 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 377 378 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 379 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 380 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 381 if (stat != SPARSE_STATUS_SUCCESS) { 382 PetscFunctionReturn(PETSC_ERR_LIB); 383 } 384 PetscFunctionReturn(0); 385 } 386 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 387 388 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 389 { 390 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 391 const PetscScalar *x; 392 PetscScalar *y; 393 const MatScalar *aa; 394 PetscErrorCode ierr; 395 PetscInt m=A->rmap->n; 396 PetscInt n=A->cmap->n; 397 PetscScalar alpha = 1.0; 398 PetscScalar beta = 0.0; 399 const PetscInt *aj,*ai; 400 char matdescra[6]; 401 402 /* Variables not in MatMultTranspose_SeqAIJ. */ 403 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 404 405 PetscFunctionBegin; 406 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 407 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 408 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 409 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 410 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 411 aa = a->a; /* Nonzero elements stored row-by-row. */ 412 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 413 414 /* Call MKL sparse BLAS routine to do the MatMult. */ 415 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 416 417 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 418 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 419 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 420 PetscFunctionReturn(0); 421 } 422 423 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 424 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 425 { 426 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 427 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 428 const PetscScalar *x; 429 PetscScalar *y; 430 PetscErrorCode ierr; 431 sparse_status_t stat; 432 433 PetscFunctionBegin; 434 435 /* If there are no nonzero entries, zero yy and return immediately. */ 436 if(!a->nz) { 437 PetscInt i; 438 PetscInt n=A->cmap->n; 439 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 440 for (i=0; i<n; i++) { 441 y[i] = 0.0; 442 } 443 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 444 PetscFunctionReturn(0); 445 } 446 447 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 448 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 449 450 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 451 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 452 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 453 if (!aijmkl->sparse_optimized) { 454 MatSeqAIJMKL_create_mkl_handle(A); 455 } 456 457 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 458 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 459 460 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 461 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 462 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 463 if (stat != SPARSE_STATUS_SUCCESS) { 464 PetscFunctionReturn(PETSC_ERR_LIB); 465 } 466 PetscFunctionReturn(0); 467 } 468 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 469 470 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 471 { 472 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 473 const PetscScalar *x; 474 PetscScalar *y,*z; 475 const MatScalar *aa; 476 PetscErrorCode ierr; 477 PetscInt m=A->rmap->n; 478 PetscInt n=A->cmap->n; 479 const PetscInt *aj,*ai; 480 PetscInt i; 481 482 /* Variables not in MatMultAdd_SeqAIJ. */ 483 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 484 PetscScalar alpha = 1.0; 485 PetscScalar beta; 486 char matdescra[6]; 487 488 PetscFunctionBegin; 489 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 490 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 491 492 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 493 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 494 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 495 aa = a->a; /* Nonzero elements stored row-by-row. */ 496 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 497 498 /* Call MKL sparse BLAS routine to do the MatMult. */ 499 if (zz == yy) { 500 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 501 beta = 1.0; 502 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 503 } else { 504 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 505 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 506 beta = 0.0; 507 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 508 for (i=0; i<m; i++) { 509 z[i] += y[i]; 510 } 511 } 512 513 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 514 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 515 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 516 PetscFunctionReturn(0); 517 } 518 519 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 520 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 521 { 522 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 523 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 524 const PetscScalar *x; 525 PetscScalar *y,*z; 526 PetscErrorCode ierr; 527 PetscInt m=A->rmap->n; 528 PetscInt i; 529 530 /* Variables not in MatMultAdd_SeqAIJ. */ 531 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 532 533 PetscFunctionBegin; 534 535 /* If there are no nonzero entries, set zz = yy and return immediately. */ 536 if(!a->nz) { 537 PetscInt i; 538 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 539 for (i=0; i<m; i++) { 540 z[i] = y[i]; 541 } 542 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 543 PetscFunctionReturn(0); 544 } 545 546 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 547 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 548 549 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 550 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 551 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 552 if (!aijmkl->sparse_optimized) { 553 MatSeqAIJMKL_create_mkl_handle(A); 554 } 555 556 /* Call MKL sparse BLAS routine to do the MatMult. */ 557 if (zz == yy) { 558 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 559 * with alpha and beta both set to 1.0. */ 560 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 561 } else { 562 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 563 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 564 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 565 for (i=0; i<m; i++) { 566 z[i] += y[i]; 567 } 568 } 569 570 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 571 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 572 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 573 if (stat != SPARSE_STATUS_SUCCESS) { 574 PetscFunctionReturn(PETSC_ERR_LIB); 575 } 576 PetscFunctionReturn(0); 577 } 578 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 579 580 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 581 { 582 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 583 const PetscScalar *x; 584 PetscScalar *y,*z; 585 const MatScalar *aa; 586 PetscErrorCode ierr; 587 PetscInt m=A->rmap->n; 588 PetscInt n=A->cmap->n; 589 const PetscInt *aj,*ai; 590 PetscInt i; 591 592 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 593 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 594 PetscScalar alpha = 1.0; 595 PetscScalar beta; 596 char matdescra[6]; 597 598 PetscFunctionBegin; 599 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 600 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 601 602 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 603 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 604 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 605 aa = a->a; /* Nonzero elements stored row-by-row. */ 606 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 607 608 /* Call MKL sparse BLAS routine to do the MatMult. */ 609 if (zz == yy) { 610 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 611 beta = 1.0; 612 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 613 } else { 614 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 615 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 616 beta = 0.0; 617 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 618 for (i=0; i<n; i++) { 619 z[i] += y[i]; 620 } 621 } 622 623 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 624 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 625 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 626 PetscFunctionReturn(0); 627 } 628 629 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 630 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 631 { 632 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 633 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 634 const PetscScalar *x; 635 PetscScalar *y,*z; 636 PetscErrorCode ierr; 637 PetscInt n=A->cmap->n; 638 PetscInt i; 639 640 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 641 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 642 643 PetscFunctionBegin; 644 645 /* If there are no nonzero entries, set zz = yy and return immediately. */ 646 if(!a->nz) { 647 PetscInt i; 648 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 649 for (i=0; i<n; i++) { 650 z[i] = y[i]; 651 } 652 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 653 PetscFunctionReturn(0); 654 } 655 656 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 657 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 658 659 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 660 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 661 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 662 if (!aijmkl->sparse_optimized) { 663 MatSeqAIJMKL_create_mkl_handle(A); 664 } 665 666 /* Call MKL sparse BLAS routine to do the MatMult. */ 667 if (zz == yy) { 668 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 669 * with alpha and beta both set to 1.0. */ 670 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 671 } else { 672 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 673 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 674 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 675 for (i=0; i<n; i++) { 676 z[i] += y[i]; 677 } 678 } 679 680 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 681 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 682 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 683 if (stat != SPARSE_STATUS_SUCCESS) { 684 PetscFunctionReturn(PETSC_ERR_LIB); 685 } 686 PetscFunctionReturn(0); 687 } 688 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 689 690 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 691 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 692 { 693 Mat_SeqAIJMKL *a, *b; 694 sparse_matrix_t csrA, csrB, csrC; 695 PetscErrorCode ierr; 696 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 697 698 PetscFunctionBegin; 699 a = (Mat_SeqAIJMKL*)A->spptr; 700 b = (Mat_SeqAIJMKL*)B->spptr; 701 if (!a->sparse_optimized) { 702 MatSeqAIJMKL_create_mkl_handle(A); 703 } 704 if (!b->sparse_optimized) { 705 MatSeqAIJMKL_create_mkl_handle(B); 706 } 707 csrA = a->csrA; 708 csrB = b->csrA; 709 710 stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC); 711 if (stat != SPARSE_STATUS_SUCCESS) { 712 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 713 PetscFunctionReturn(PETSC_ERR_LIB); 714 } 715 716 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 717 718 PetscFunctionReturn(0); 719 } 720 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 721 722 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha) 723 { 724 PetscErrorCode ierr; 725 726 PetscFunctionBegin; 727 ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr); 728 ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 729 PetscFunctionReturn(0); 730 } 731 732 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr) 733 { 734 PetscErrorCode ierr; 735 736 PetscFunctionBegin; 737 ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr); 738 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 742 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is) 743 { 744 PetscErrorCode ierr; 745 746 PetscFunctionBegin; 747 ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr); 748 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 749 PetscFunctionReturn(0); 750 } 751 752 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 753 { 754 PetscErrorCode ierr; 755 756 PetscFunctionBegin; 757 ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr); 758 if (str == SAME_NONZERO_PATTERN) { 759 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 760 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 761 } 762 PetscFunctionReturn(0); 763 } 764 765 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 766 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 767 * routine, but can also be used to convert an assembled SeqAIJ matrix 768 * into a SeqAIJMKL one. */ 769 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 770 { 771 PetscErrorCode ierr; 772 Mat B = *newmat; 773 Mat_SeqAIJMKL *aijmkl; 774 PetscBool set; 775 PetscBool sametype; 776 777 PetscFunctionBegin; 778 if (reuse == MAT_INITIAL_MATRIX) { 779 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 780 } 781 782 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 783 if (sametype) PetscFunctionReturn(0); 784 785 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 786 B->spptr = (void*) aijmkl; 787 788 /* Set function pointers for methods that we inherit from AIJ but override. 789 * We also parse some command line options below, since those determine some of the methods we point to. */ 790 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 791 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 792 B->ops->destroy = MatDestroy_SeqAIJMKL; 793 794 aijmkl->sparse_optimized = PETSC_FALSE; 795 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 796 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 797 #else 798 aijmkl->no_SpMV2 = PETSC_TRUE; 799 #endif 800 aijmkl->eager_inspection = PETSC_FALSE; 801 802 /* Parse command line options. */ 803 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 804 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 805 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 806 ierr = PetscOptionsEnd();CHKERRQ(ierr); 807 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 808 if(!aijmkl->no_SpMV2) { 809 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 810 aijmkl->no_SpMV2 = PETSC_TRUE; 811 } 812 #endif 813 814 if(!aijmkl->no_SpMV2) { 815 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 816 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 817 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 818 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 819 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 820 B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 821 #endif 822 } else { 823 B->ops->mult = MatMult_SeqAIJMKL; 824 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 825 B->ops->multadd = MatMultAdd_SeqAIJMKL; 826 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 827 } 828 829 B->ops->scale = MatScale_SeqAIJMKL; 830 B->ops->diagonalscale = MatDiagonalScale_SeqAIJMKL; 831 B->ops->diagonalset = MatDiagonalSet_SeqAIJMKL; 832 B->ops->axpy = MatAXPY_SeqAIJMKL; 833 834 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr); 835 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 836 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 837 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 838 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 839 if(!aijmkl->no_SpMV2) { 840 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 841 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 842 #endif 843 } 844 845 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 846 *newmat = B; 847 PetscFunctionReturn(0); 848 } 849 850 /*@C 851 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 852 This type inherits from AIJ and is largely identical, but uses sparse BLAS 853 routines from Intel MKL whenever possible. 854 MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 855 operations are currently supported. 856 If the installed version of MKL supports the "SpMV2" sparse 857 inspector-executor routines, then those are used by default. 858 859 Collective on MPI_Comm 860 861 Input Parameters: 862 + comm - MPI communicator, set to PETSC_COMM_SELF 863 . m - number of rows 864 . n - number of columns 865 . nz - number of nonzeros per row (same for all rows) 866 - nnz - array containing the number of nonzeros in the various rows 867 (possibly different for each row) or NULL 868 869 Output Parameter: 870 . A - the matrix 871 872 Options Database Keys: 873 . -mat_aijmkl_no_spmv2 - disables use of the SpMV2 inspector-executor routines 874 875 Notes: 876 If nnz is given then nz is ignored 877 878 Level: intermediate 879 880 .keywords: matrix, MKL, sparse, parallel 881 882 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 883 @*/ 884 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 885 { 886 PetscErrorCode ierr; 887 888 PetscFunctionBegin; 889 ierr = MatCreate(comm,A);CHKERRQ(ierr); 890 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 891 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 892 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 893 PetscFunctionReturn(0); 894 } 895 896 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 897 { 898 PetscErrorCode ierr; 899 900 PetscFunctionBegin; 901 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 902 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 903 PetscFunctionReturn(0); 904 } 905