1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 18 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 19 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 28 { 29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 30 /* so we will ignore 'MatType type'. */ 31 PetscErrorCode ierr; 32 Mat B = *newmat; 33 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 34 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 35 #endif 36 37 PetscFunctionBegin; 38 if (reuse == MAT_INITIAL_MATRIX) { 39 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 45 B->ops->destroy = MatDestroy_SeqAIJ; 46 B->ops->mult = MatMult_SeqAIJ; 47 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 48 B->ops->multadd = MatMultAdd_SeqAIJ; 49 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50 B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ; 51 B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ; 52 B->ops->scale = MatScale_SeqAIJ; 53 B->ops->diagonalscale = MatDiagonalScale_SeqAIJ; 54 B->ops->diagonalset = MatDiagonalSet_SeqAIJ; 55 B->ops->axpy = MatAXPY_SeqAIJ; 56 57 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 58 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 59 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 60 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 61 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 62 if(!aijmkl->no_SpMV2) { 63 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 64 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 65 } 66 67 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 68 * simply involves destroying the MKL sparse matrix handle and then freeing 69 * the spptr pointer. */ 70 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 71 72 if (aijmkl->sparse_optimized) { 73 sparse_status_t stat; 74 stat = mkl_sparse_destroy(aijmkl->csrA); 75 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 76 } 77 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 78 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 79 80 /* Change the type of B to MATSEQAIJ. */ 81 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 82 83 *newmat = B; 84 PetscFunctionReturn(0); 85 } 86 87 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 88 { 89 PetscErrorCode ierr; 90 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 91 92 PetscFunctionBegin; 93 94 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 95 * spptr pointer. */ 96 if (aijmkl) { 97 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 98 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 99 if (aijmkl->sparse_optimized) { 100 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 101 stat = mkl_sparse_destroy(aijmkl->csrA); 102 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 103 } 104 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 105 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 106 } 107 108 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 109 * to destroy everything that remains. */ 110 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 111 /* Note that I don't call MatSetType(). I believe this is because that 112 * is only to be called when *building* a matrix. I could be wrong, but 113 * that is how things work for the SuperLU matrix class. */ 114 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 115 PetscFunctionReturn(0); 116 } 117 118 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 119 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 120 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 121 * handle, creates a new one, and then calls mkl_sparse_optimize(). 122 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 123 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 124 * an unoptimized matrix handle here. */ 125 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 126 { 127 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 128 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 129 * does nothing. We make it callable anyway in this case because it cuts 130 * down on littering the code with #ifdefs. */ 131 PetscFunctionBegin; 132 PetscFunctionReturn(0); 133 #else 134 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 135 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 136 PetscInt m,n; 137 MatScalar *aa; 138 PetscInt *aj,*ai; 139 sparse_status_t stat; 140 141 PetscFunctionBegin; 142 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 143 144 if (aijmkl->sparse_optimized) { 145 /* Matrix has been previously assembled and optimized. Must destroy old 146 * matrix handle before running the optimization step again. */ 147 stat = mkl_sparse_destroy(aijmkl->csrA); 148 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 149 } 150 aijmkl->sparse_optimized = PETSC_FALSE; 151 152 /* Now perform the SpMV2 setup and matrix optimization. */ 153 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 154 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 155 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 156 m = A->rmap->n; 157 n = A->cmap->n; 158 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 159 aa = a->a; /* Nonzero elements stored row-by-row. */ 160 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 161 if ((a->nz!=0) & !(A->structure_only)) { 162 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 163 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 164 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 165 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 166 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 167 stat = mkl_sparse_optimize(aijmkl->csrA); 168 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 169 aijmkl->sparse_optimized = PETSC_TRUE; 170 } 171 172 PetscFunctionReturn(0); 173 #endif 174 } 175 176 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 177 * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 178 * matrix handle. 179 * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as 180 * there is no good alternative. */ 181 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 182 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat) 183 { 184 PetscErrorCode ierr; 185 sparse_status_t stat; 186 sparse_index_base_t indexing; 187 PetscInt nrows, ncols; 188 PetscInt *aj,*ai,*dummy; 189 MatScalar *aa; 190 Mat A; 191 Mat_SeqAIJMKL *aijmkl; 192 193 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 194 stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 195 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 196 197 if (reuse == MAT_REUSE_MATRIX) { 198 ierr = MatDestroy(mat);CHKERRQ(ierr); 199 } 200 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 201 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 202 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 203 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 204 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 205 * they will be destroyed when the MKL handle is destroyed. 206 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 207 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 208 209 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 210 * Now turn it into a MATSEQAIJMKL. */ 211 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 212 213 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 214 aijmkl->csrA = csrA; 215 216 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 217 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 218 * and just need to be able to run the MKL optimization step. */ 219 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 220 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 221 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 222 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 223 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 224 stat = mkl_sparse_optimize(aijmkl->csrA); 225 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 226 aijmkl->sparse_optimized = PETSC_TRUE; 227 228 *mat = A; 229 PetscFunctionReturn(0); 230 } 231 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 232 233 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 234 { 235 PetscErrorCode ierr; 236 Mat_SeqAIJMKL *aijmkl; 237 Mat_SeqAIJMKL *aijmkl_dest; 238 239 PetscFunctionBegin; 240 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 241 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 242 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 243 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 244 aijmkl_dest->sparse_optimized = PETSC_FALSE; 245 if (aijmkl->eager_inspection) { 246 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 247 } 248 PetscFunctionReturn(0); 249 } 250 251 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 252 { 253 PetscErrorCode ierr; 254 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 255 Mat_SeqAIJMKL *aijmkl; 256 257 PetscFunctionBegin; 258 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 259 260 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 261 * extra information and some different methods, call the AssemblyEnd 262 * routine for a MATSEQAIJ. 263 * I'm not sure if this is the best way to do this, but it avoids 264 * a lot of code duplication. */ 265 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 266 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 267 268 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 269 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 270 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 271 if (aijmkl->eager_inspection) { 272 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 273 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 274 } else if (aijmkl->sparse_optimized) { 275 /* If doing lazy inspection and there is an optimized MKL handle, we need to destroy it, so that it will be 276 * rebuilt later when needed. Otherwise, some SeqAIJ implementations that we depend on for some operations 277 * (such as MatMatMultNumeric()) can modify the result matrix without the matrix handle being rebuilt. 278 * (The SeqAIJ version MatMatMultNumeric() knows nothing about matrix handles, but it *does* call MatAssemblyEnd().) */ 279 sparse_status_t stat = mkl_sparse_destroy(aijmkl->csrA); 280 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 281 aijmkl->sparse_optimized = PETSC_FALSE; 282 #endif 283 } 284 285 PetscFunctionReturn(0); 286 } 287 288 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 289 { 290 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 291 const PetscScalar *x; 292 PetscScalar *y; 293 const MatScalar *aa; 294 PetscErrorCode ierr; 295 PetscInt m=A->rmap->n; 296 PetscInt n=A->cmap->n; 297 PetscScalar alpha = 1.0; 298 PetscScalar beta = 0.0; 299 const PetscInt *aj,*ai; 300 char matdescra[6]; 301 302 303 /* Variables not in MatMult_SeqAIJ. */ 304 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 305 306 PetscFunctionBegin; 307 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 308 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 309 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 310 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 311 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 312 aa = a->a; /* Nonzero elements stored row-by-row. */ 313 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 314 315 /* Call MKL sparse BLAS routine to do the MatMult. */ 316 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 317 318 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 319 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 320 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 321 PetscFunctionReturn(0); 322 } 323 324 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 325 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 326 { 327 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 328 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 329 const PetscScalar *x; 330 PetscScalar *y; 331 PetscErrorCode ierr; 332 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 333 334 PetscFunctionBegin; 335 336 /* If there are no nonzero entries, zero yy and return immediately. */ 337 if(!a->nz) { 338 PetscInt i; 339 PetscInt m=A->rmap->n; 340 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 341 for (i=0; i<m; i++) { 342 y[i] = 0.0; 343 } 344 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 345 PetscFunctionReturn(0); 346 } 347 348 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 349 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 350 351 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 352 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 353 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 354 if (!aijmkl->sparse_optimized) { 355 MatSeqAIJMKL_create_mkl_handle(A); 356 } 357 358 /* Call MKL SpMV2 executor routine to do the MatMult. */ 359 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 360 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 361 362 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 363 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 364 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 365 PetscFunctionReturn(0); 366 } 367 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 368 369 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 370 { 371 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 372 const PetscScalar *x; 373 PetscScalar *y; 374 const MatScalar *aa; 375 PetscErrorCode ierr; 376 PetscInt m=A->rmap->n; 377 PetscInt n=A->cmap->n; 378 PetscScalar alpha = 1.0; 379 PetscScalar beta = 0.0; 380 const PetscInt *aj,*ai; 381 char matdescra[6]; 382 383 /* Variables not in MatMultTranspose_SeqAIJ. */ 384 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 385 386 PetscFunctionBegin; 387 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 388 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 389 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 390 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 391 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 392 aa = a->a; /* Nonzero elements stored row-by-row. */ 393 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 394 395 /* Call MKL sparse BLAS routine to do the MatMult. */ 396 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 397 398 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 399 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 400 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 401 PetscFunctionReturn(0); 402 } 403 404 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 405 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 406 { 407 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 408 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 409 const PetscScalar *x; 410 PetscScalar *y; 411 PetscErrorCode ierr; 412 sparse_status_t stat; 413 414 PetscFunctionBegin; 415 416 /* If there are no nonzero entries, zero yy and return immediately. */ 417 if(!a->nz) { 418 PetscInt i; 419 PetscInt n=A->cmap->n; 420 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 421 for (i=0; i<n; i++) { 422 y[i] = 0.0; 423 } 424 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 429 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 430 431 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 432 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 433 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 434 if (!aijmkl->sparse_optimized) { 435 MatSeqAIJMKL_create_mkl_handle(A); 436 } 437 438 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 439 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 440 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 441 442 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 443 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 444 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 445 PetscFunctionReturn(0); 446 } 447 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 448 449 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 450 { 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 452 const PetscScalar *x; 453 PetscScalar *y,*z; 454 const MatScalar *aa; 455 PetscErrorCode ierr; 456 PetscInt m=A->rmap->n; 457 PetscInt n=A->cmap->n; 458 const PetscInt *aj,*ai; 459 PetscInt i; 460 461 /* Variables not in MatMultAdd_SeqAIJ. */ 462 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 463 PetscScalar alpha = 1.0; 464 PetscScalar beta; 465 char matdescra[6]; 466 467 PetscFunctionBegin; 468 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 469 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 470 471 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 472 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 473 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 474 aa = a->a; /* Nonzero elements stored row-by-row. */ 475 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 476 477 /* Call MKL sparse BLAS routine to do the MatMult. */ 478 if (zz == yy) { 479 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 480 beta = 1.0; 481 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 482 } else { 483 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 484 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 485 beta = 0.0; 486 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 487 for (i=0; i<m; i++) { 488 z[i] += y[i]; 489 } 490 } 491 492 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 493 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 494 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 495 PetscFunctionReturn(0); 496 } 497 498 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 499 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 500 { 501 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 502 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 503 const PetscScalar *x; 504 PetscScalar *y,*z; 505 PetscErrorCode ierr; 506 PetscInt m=A->rmap->n; 507 PetscInt i; 508 509 /* Variables not in MatMultAdd_SeqAIJ. */ 510 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 511 512 PetscFunctionBegin; 513 514 /* If there are no nonzero entries, set zz = yy and return immediately. */ 515 if(!a->nz) { 516 PetscInt i; 517 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 518 for (i=0; i<m; i++) { 519 z[i] = y[i]; 520 } 521 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 522 PetscFunctionReturn(0); 523 } 524 525 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 526 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 527 528 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 529 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 530 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 531 if (!aijmkl->sparse_optimized) { 532 MatSeqAIJMKL_create_mkl_handle(A); 533 } 534 535 /* Call MKL sparse BLAS routine to do the MatMult. */ 536 if (zz == yy) { 537 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 538 * with alpha and beta both set to 1.0. */ 539 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 540 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 541 } else { 542 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 543 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 544 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 545 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 546 for (i=0; i<m; i++) { 547 z[i] += y[i]; 548 } 549 } 550 551 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 552 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 553 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 554 PetscFunctionReturn(0); 555 } 556 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 557 558 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 559 { 560 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 561 const PetscScalar *x; 562 PetscScalar *y,*z; 563 const MatScalar *aa; 564 PetscErrorCode ierr; 565 PetscInt m=A->rmap->n; 566 PetscInt n=A->cmap->n; 567 const PetscInt *aj,*ai; 568 PetscInt i; 569 570 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 571 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 572 PetscScalar alpha = 1.0; 573 PetscScalar beta; 574 char matdescra[6]; 575 576 PetscFunctionBegin; 577 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 578 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 579 580 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 581 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 582 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 583 aa = a->a; /* Nonzero elements stored row-by-row. */ 584 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 585 586 /* Call MKL sparse BLAS routine to do the MatMult. */ 587 if (zz == yy) { 588 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 589 beta = 1.0; 590 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 591 } else { 592 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 593 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 594 beta = 0.0; 595 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 596 for (i=0; i<n; i++) { 597 z[i] += y[i]; 598 } 599 } 600 601 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 602 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 603 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 604 PetscFunctionReturn(0); 605 } 606 607 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 608 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 609 { 610 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 611 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 612 const PetscScalar *x; 613 PetscScalar *y,*z; 614 PetscErrorCode ierr; 615 PetscInt n=A->cmap->n; 616 PetscInt i; 617 618 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 619 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 620 621 PetscFunctionBegin; 622 623 /* If there are no nonzero entries, set zz = yy and return immediately. */ 624 if(!a->nz) { 625 PetscInt i; 626 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 627 for (i=0; i<n; i++) { 628 z[i] = y[i]; 629 } 630 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 631 PetscFunctionReturn(0); 632 } 633 634 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 635 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 636 637 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 638 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 639 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 640 if (!aijmkl->sparse_optimized) { 641 MatSeqAIJMKL_create_mkl_handle(A); 642 } 643 644 /* Call MKL sparse BLAS routine to do the MatMult. */ 645 if (zz == yy) { 646 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 647 * with alpha and beta both set to 1.0. */ 648 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 649 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 650 } else { 651 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 652 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 653 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 654 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 655 for (i=0; i<n; i++) { 656 z[i] += y[i]; 657 } 658 } 659 660 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 661 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 662 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 666 667 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 668 /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because 669 * the MatMatMult() interface code calls MatMatMultNumeric() in this case. 670 * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 671 * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 672 * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 673 * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */ 674 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 675 { 676 Mat_SeqAIJMKL *a, *b; 677 sparse_matrix_t csrA, csrB, csrC; 678 PetscErrorCode ierr; 679 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 680 681 PetscFunctionBegin; 682 a = (Mat_SeqAIJMKL*)A->spptr; 683 b = (Mat_SeqAIJMKL*)B->spptr; 684 if (!a->sparse_optimized) { 685 MatSeqAIJMKL_create_mkl_handle(A); 686 } 687 if (!b->sparse_optimized) { 688 MatSeqAIJMKL_create_mkl_handle(B); 689 } 690 csrA = a->csrA; 691 csrB = b->csrA; 692 693 stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC); 694 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 695 696 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 697 698 PetscFunctionReturn(0); 699 } 700 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 701 702 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 703 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 704 { 705 Mat_SeqAIJMKL *a, *b; 706 sparse_matrix_t csrA, csrB, csrC; 707 PetscErrorCode ierr; 708 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 709 710 PetscFunctionBegin; 711 a = (Mat_SeqAIJMKL*)A->spptr; 712 b = (Mat_SeqAIJMKL*)B->spptr; 713 if (!a->sparse_optimized) { 714 MatSeqAIJMKL_create_mkl_handle(A); 715 } 716 if (!b->sparse_optimized) { 717 MatSeqAIJMKL_create_mkl_handle(B); 718 } 719 csrA = a->csrA; 720 csrB = b->csrA; 721 722 stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC); 723 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 724 725 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 726 727 PetscFunctionReturn(0); 728 } 729 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 730 731 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha) 732 { 733 PetscErrorCode ierr; 734 735 PetscFunctionBegin; 736 ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr); 737 ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 738 PetscFunctionReturn(0); 739 } 740 741 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr) 742 { 743 PetscErrorCode ierr; 744 745 PetscFunctionBegin; 746 ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr); 747 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 748 PetscFunctionReturn(0); 749 } 750 751 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is) 752 { 753 PetscErrorCode ierr; 754 755 PetscFunctionBegin; 756 ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr); 757 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 758 PetscFunctionReturn(0); 759 } 760 761 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 762 { 763 PetscErrorCode ierr; 764 765 PetscFunctionBegin; 766 ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr); 767 if (str == SAME_NONZERO_PATTERN) { 768 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 769 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 770 } 771 PetscFunctionReturn(0); 772 } 773 774 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 775 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 776 * routine, but can also be used to convert an assembled SeqAIJ matrix 777 * into a SeqAIJMKL one. */ 778 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 779 { 780 PetscErrorCode ierr; 781 Mat B = *newmat; 782 Mat_SeqAIJMKL *aijmkl; 783 PetscBool set; 784 PetscBool sametype; 785 786 PetscFunctionBegin; 787 if (reuse == MAT_INITIAL_MATRIX) { 788 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 789 } 790 791 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 792 if (sametype) PetscFunctionReturn(0); 793 794 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 795 B->spptr = (void*) aijmkl; 796 797 /* Set function pointers for methods that we inherit from AIJ but override. 798 * We also parse some command line options below, since those determine some of the methods we point to. */ 799 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 800 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 801 B->ops->destroy = MatDestroy_SeqAIJMKL; 802 803 aijmkl->sparse_optimized = PETSC_FALSE; 804 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 805 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 806 #else 807 aijmkl->no_SpMV2 = PETSC_TRUE; 808 #endif 809 aijmkl->eager_inspection = PETSC_FALSE; 810 811 /* Parse command line options. */ 812 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 813 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 814 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 815 ierr = PetscOptionsEnd();CHKERRQ(ierr); 816 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 817 if(!aijmkl->no_SpMV2) { 818 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 819 aijmkl->no_SpMV2 = PETSC_TRUE; 820 } 821 #endif 822 823 if(!aijmkl->no_SpMV2) { 824 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 825 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 826 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 827 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 828 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 829 B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 830 B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 831 #endif 832 } else { 833 B->ops->mult = MatMult_SeqAIJMKL; 834 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 835 B->ops->multadd = MatMultAdd_SeqAIJMKL; 836 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 837 } 838 839 B->ops->scale = MatScale_SeqAIJMKL; 840 B->ops->diagonalscale = MatDiagonalScale_SeqAIJMKL; 841 B->ops->diagonalset = MatDiagonalSet_SeqAIJMKL; 842 B->ops->axpy = MatAXPY_SeqAIJMKL; 843 844 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr); 845 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 846 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 847 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 848 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 849 if(!aijmkl->no_SpMV2) { 850 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 851 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 852 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 853 #endif 854 } 855 856 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 857 *newmat = B; 858 PetscFunctionReturn(0); 859 } 860 861 /*@C 862 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 863 This type inherits from AIJ and is largely identical, but uses sparse BLAS 864 routines from Intel MKL whenever possible. 865 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, and MatTransposeMatMult 866 operations are currently supported. 867 If the installed version of MKL supports the "SpMV2" sparse 868 inspector-executor routines, then those are used by default. 869 870 Collective on MPI_Comm 871 872 Input Parameters: 873 + comm - MPI communicator, set to PETSC_COMM_SELF 874 . m - number of rows 875 . n - number of columns 876 . nz - number of nonzeros per row (same for all rows) 877 - nnz - array containing the number of nonzeros in the various rows 878 (possibly different for each row) or NULL 879 880 Output Parameter: 881 . A - the matrix 882 883 Options Database Keys: 884 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 885 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 886 887 Notes: 888 If nnz is given then nz is ignored 889 890 Level: intermediate 891 892 .keywords: matrix, MKL, sparse, parallel 893 894 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 895 @*/ 896 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 897 { 898 PetscErrorCode ierr; 899 900 PetscFunctionBegin; 901 ierr = MatCreate(comm,A);CHKERRQ(ierr); 902 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 903 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 904 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 905 PetscFunctionReturn(0); 906 } 907 908 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 909 { 910 PetscErrorCode ierr; 911 912 PetscFunctionBegin; 913 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 914 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 915 PetscFunctionReturn(0); 916 } 917