1 2 /* 3 Defines basic operations for the MATSEQAIJMKL matrix class. 4 This class is derived from the MATSEQAIJ class and retains the 5 compressed row storage (aka Yale sparse matrix format) but uses 6 sparse BLAS operations from the Intel Math Kernel Library (MKL) 7 wherever possible. 8 */ 9 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12 #include <mkl_spblas.h> 13 14 typedef struct { 15 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 16 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 PetscObjectState state; 19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 28 { 29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 30 /* so we will ignore 'MatType type'. */ 31 PetscErrorCode ierr; 32 Mat B = *newmat; 33 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 34 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 35 #endif 36 37 PetscFunctionBegin; 38 if (reuse == MAT_INITIAL_MATRIX) { 39 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 45 B->ops->destroy = MatDestroy_SeqAIJ; 46 B->ops->mult = MatMult_SeqAIJ; 47 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 48 B->ops->multadd = MatMultAdd_SeqAIJ; 49 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 51 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 52 53 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 54 55 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 56 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 57 * simply involves destroying the MKL sparse matrix handle and then freeing 58 * the spptr pointer. */ 59 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 60 61 if (aijmkl->sparse_optimized) { 62 sparse_status_t stat; 63 stat = mkl_sparse_destroy(aijmkl->csrA); 64 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 65 } 66 #endif 67 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 68 69 /* Change the type of B to MATSEQAIJ. */ 70 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 71 72 *newmat = B; 73 PetscFunctionReturn(0); 74 } 75 76 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 77 { 78 PetscErrorCode ierr; 79 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 80 81 PetscFunctionBegin; 82 83 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 84 * spptr pointer. */ 85 if (aijmkl) { 86 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 87 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 88 if (aijmkl->sparse_optimized) { 89 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 90 stat = mkl_sparse_destroy(aijmkl->csrA); 91 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 92 } 93 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 94 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 95 } 96 97 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 98 * to destroy everything that remains. */ 99 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 100 /* Note that I don't call MatSetType(). I believe this is because that 101 * is only to be called when *building* a matrix. I could be wrong, but 102 * that is how things work for the SuperLU matrix class. */ 103 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 108 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 109 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 110 * handle, creates a new one, and then calls mkl_sparse_optimize(). 111 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 112 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 113 * an unoptimized matrix handle here. */ 114 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 115 { 116 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 117 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 118 * does nothing. We make it callable anyway in this case because it cuts 119 * down on littering the code with #ifdefs. */ 120 PetscFunctionBegin; 121 PetscFunctionReturn(0); 122 #else 123 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 124 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 125 PetscInt m,n; 126 MatScalar *aa; 127 PetscInt *aj,*ai; 128 sparse_status_t stat; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 133 /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 134 * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 135 * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 136 * mkl_sparse_optimize() later. */ 137 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 138 #endif 139 140 if (aijmkl->sparse_optimized) { 141 /* Matrix has been previously assembled and optimized. Must destroy old 142 * matrix handle before running the optimization step again. */ 143 stat = mkl_sparse_destroy(aijmkl->csrA); 144 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 145 } 146 aijmkl->sparse_optimized = PETSC_FALSE; 147 148 /* Now perform the SpMV2 setup and matrix optimization. */ 149 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 150 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 151 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 152 m = A->rmap->n; 153 n = A->cmap->n; 154 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 155 aa = a->a; /* Nonzero elements stored row-by-row. */ 156 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 157 if ((a->nz!=0) & !(A->structure_only)) { 158 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 159 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 160 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 161 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle"); 162 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 163 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 164 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 165 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 166 if (!aijmkl->no_SpMV2) { 167 stat = mkl_sparse_optimize(aijmkl->csrA); 168 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 169 } 170 aijmkl->sparse_optimized = PETSC_TRUE; 171 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 172 } 173 174 PetscFunctionReturn(0); 175 #endif 176 } 177 178 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 179 * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 180 * matrix handle. 181 * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as 182 * there is no good alternative. */ 183 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 184 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat) 185 { 186 PetscErrorCode ierr; 187 sparse_status_t stat; 188 sparse_index_base_t indexing; 189 PetscInt nrows, ncols; 190 PetscInt *aj,*ai,*dummy; 191 MatScalar *aa; 192 Mat A; 193 Mat_SeqAIJMKL *aijmkl; 194 195 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 196 stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 197 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 198 199 if (reuse == MAT_REUSE_MATRIX) { 200 ierr = MatDestroy(mat);CHKERRQ(ierr); 201 } 202 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 203 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 204 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 205 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 206 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 207 * they will be destroyed when the MKL handle is destroyed. 208 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 209 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 210 211 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 212 * Now turn it into a MATSEQAIJMKL. */ 213 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 214 215 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 216 aijmkl->csrA = csrA; 217 218 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 219 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 220 * and just need to be able to run the MKL optimization step. */ 221 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 222 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 223 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 224 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 225 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 226 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 227 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 228 if (!aijmkl->no_SpMV2) { 229 stat = mkl_sparse_optimize(aijmkl->csrA); 230 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 231 } 232 aijmkl->sparse_optimized = PETSC_TRUE; 233 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 234 235 *mat = A; 236 PetscFunctionReturn(0); 237 } 238 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 239 240 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 241 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 242 * MatMatMultNumeric(). */ 243 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 244 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 245 { 246 PetscInt i; 247 PetscInt nrows,ncols; 248 PetscInt nz; 249 PetscInt *ai,*aj,*dummy; 250 PetscScalar *aa; 251 PetscErrorCode ierr; 252 Mat_SeqAIJMKL *aijmkl; 253 sparse_status_t stat; 254 sparse_index_base_t indexing; 255 256 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 257 258 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 259 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 260 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 261 262 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 263 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 264 for (i=0; i<nrows; i++) { 265 nz = ai[i+1] - ai[i]; 266 ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr); 267 } 268 269 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 270 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 271 272 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 273 /* We mark our matrix as having a valid, optimized MKL handle. 274 * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */ 275 aijmkl->sparse_optimized = PETSC_TRUE; 276 277 PetscFunctionReturn(0); 278 } 279 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 280 281 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 282 { 283 PetscErrorCode ierr; 284 Mat_SeqAIJMKL *aijmkl; 285 Mat_SeqAIJMKL *aijmkl_dest; 286 287 PetscFunctionBegin; 288 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 289 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 290 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 291 ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr); 292 aijmkl_dest->sparse_optimized = PETSC_FALSE; 293 if (aijmkl->eager_inspection) { 294 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 295 } 296 PetscFunctionReturn(0); 297 } 298 299 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 300 { 301 PetscErrorCode ierr; 302 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 303 Mat_SeqAIJMKL *aijmkl; 304 305 PetscFunctionBegin; 306 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 307 308 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 309 * extra information and some different methods, call the AssemblyEnd 310 * routine for a MATSEQAIJ. 311 * I'm not sure if this is the best way to do this, but it avoids 312 * a lot of code duplication. */ 313 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 314 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 315 316 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 317 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 318 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 319 if (aijmkl->eager_inspection) { 320 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 321 } 322 323 PetscFunctionReturn(0); 324 } 325 326 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 327 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 328 { 329 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 330 const PetscScalar *x; 331 PetscScalar *y; 332 const MatScalar *aa; 333 PetscErrorCode ierr; 334 PetscInt m=A->rmap->n; 335 PetscInt n=A->cmap->n; 336 PetscScalar alpha = 1.0; 337 PetscScalar beta = 0.0; 338 const PetscInt *aj,*ai; 339 char matdescra[6]; 340 341 342 /* Variables not in MatMult_SeqAIJ. */ 343 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 344 345 PetscFunctionBegin; 346 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 347 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 348 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 349 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 350 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 351 aa = a->a; /* Nonzero elements stored row-by-row. */ 352 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 353 354 /* Call MKL sparse BLAS routine to do the MatMult. */ 355 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 356 357 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 358 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 359 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 360 PetscFunctionReturn(0); 361 } 362 #endif 363 364 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 365 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 366 { 367 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 368 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 369 const PetscScalar *x; 370 PetscScalar *y; 371 PetscErrorCode ierr; 372 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 373 PetscObjectState state; 374 375 PetscFunctionBegin; 376 377 /* If there are no nonzero entries, zero yy and return immediately. */ 378 if(!a->nz) { 379 PetscInt i; 380 PetscInt m=A->rmap->n; 381 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 382 for (i=0; i<m; i++) { 383 y[i] = 0.0; 384 } 385 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 390 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 391 392 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 393 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 394 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 395 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 396 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 397 MatSeqAIJMKL_create_mkl_handle(A); 398 } 399 400 /* Call MKL SpMV2 executor routine to do the MatMult. */ 401 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 402 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 403 404 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 405 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 406 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 407 PetscFunctionReturn(0); 408 } 409 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 410 411 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 412 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 413 { 414 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 415 const PetscScalar *x; 416 PetscScalar *y; 417 const MatScalar *aa; 418 PetscErrorCode ierr; 419 PetscInt m=A->rmap->n; 420 PetscInt n=A->cmap->n; 421 PetscScalar alpha = 1.0; 422 PetscScalar beta = 0.0; 423 const PetscInt *aj,*ai; 424 char matdescra[6]; 425 426 /* Variables not in MatMultTranspose_SeqAIJ. */ 427 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 428 429 PetscFunctionBegin; 430 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 431 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 432 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 433 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 434 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 435 aa = a->a; /* Nonzero elements stored row-by-row. */ 436 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 437 438 /* Call MKL sparse BLAS routine to do the MatMult. */ 439 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 440 441 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 442 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 443 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 444 PetscFunctionReturn(0); 445 } 446 #endif 447 448 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 449 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 450 { 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 452 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 453 const PetscScalar *x; 454 PetscScalar *y; 455 PetscErrorCode ierr; 456 sparse_status_t stat; 457 PetscObjectState state; 458 459 PetscFunctionBegin; 460 461 /* If there are no nonzero entries, zero yy and return immediately. */ 462 if(!a->nz) { 463 PetscInt i; 464 PetscInt n=A->cmap->n; 465 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 466 for (i=0; i<n; i++) { 467 y[i] = 0.0; 468 } 469 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 470 PetscFunctionReturn(0); 471 } 472 473 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 474 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 475 476 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 477 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 478 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 479 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 480 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 481 MatSeqAIJMKL_create_mkl_handle(A); 482 } 483 484 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 485 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 486 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 487 488 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 489 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 490 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 491 PetscFunctionReturn(0); 492 } 493 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 494 495 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 496 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 497 { 498 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 499 const PetscScalar *x; 500 PetscScalar *y,*z; 501 const MatScalar *aa; 502 PetscErrorCode ierr; 503 PetscInt m=A->rmap->n; 504 PetscInt n=A->cmap->n; 505 const PetscInt *aj,*ai; 506 PetscInt i; 507 508 /* Variables not in MatMultAdd_SeqAIJ. */ 509 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 510 PetscScalar alpha = 1.0; 511 PetscScalar beta; 512 char matdescra[6]; 513 514 PetscFunctionBegin; 515 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 516 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 517 518 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 519 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 520 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 521 aa = a->a; /* Nonzero elements stored row-by-row. */ 522 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 523 524 /* Call MKL sparse BLAS routine to do the MatMult. */ 525 if (zz == yy) { 526 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 527 beta = 1.0; 528 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 529 } else { 530 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 531 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 532 beta = 0.0; 533 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 534 for (i=0; i<m; i++) { 535 z[i] += y[i]; 536 } 537 } 538 539 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 540 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 541 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 542 PetscFunctionReturn(0); 543 } 544 #endif 545 546 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 547 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 548 { 549 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 550 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 551 const PetscScalar *x; 552 PetscScalar *y,*z; 553 PetscErrorCode ierr; 554 PetscInt m=A->rmap->n; 555 PetscInt i; 556 557 /* Variables not in MatMultAdd_SeqAIJ. */ 558 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 559 PetscObjectState state; 560 561 PetscFunctionBegin; 562 563 /* If there are no nonzero entries, set zz = yy and return immediately. */ 564 if(!a->nz) { 565 PetscInt i; 566 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 567 for (i=0; i<m; i++) { 568 z[i] = y[i]; 569 } 570 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 571 PetscFunctionReturn(0); 572 } 573 574 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 575 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 576 577 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 578 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 579 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 580 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 581 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 582 MatSeqAIJMKL_create_mkl_handle(A); 583 } 584 585 /* Call MKL sparse BLAS routine to do the MatMult. */ 586 if (zz == yy) { 587 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 588 * with alpha and beta both set to 1.0. */ 589 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 590 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 591 } else { 592 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 593 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 594 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 595 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 596 for (i=0; i<m; i++) { 597 z[i] += y[i]; 598 } 599 } 600 601 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 602 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 603 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 604 PetscFunctionReturn(0); 605 } 606 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 607 608 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 609 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 610 { 611 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 612 const PetscScalar *x; 613 PetscScalar *y,*z; 614 const MatScalar *aa; 615 PetscErrorCode ierr; 616 PetscInt m=A->rmap->n; 617 PetscInt n=A->cmap->n; 618 const PetscInt *aj,*ai; 619 PetscInt i; 620 621 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 622 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 623 PetscScalar alpha = 1.0; 624 PetscScalar beta; 625 char matdescra[6]; 626 627 PetscFunctionBegin; 628 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 629 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 630 631 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 632 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 633 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 634 aa = a->a; /* Nonzero elements stored row-by-row. */ 635 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 636 637 /* Call MKL sparse BLAS routine to do the MatMult. */ 638 if (zz == yy) { 639 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 640 beta = 1.0; 641 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 642 } else { 643 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 644 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 645 beta = 0.0; 646 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 647 for (i=0; i<n; i++) { 648 z[i] += y[i]; 649 } 650 } 651 652 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 653 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 654 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 655 PetscFunctionReturn(0); 656 } 657 #endif 658 659 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 660 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 661 { 662 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 663 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 664 const PetscScalar *x; 665 PetscScalar *y,*z; 666 PetscErrorCode ierr; 667 PetscInt n=A->cmap->n; 668 PetscInt i; 669 PetscObjectState state; 670 671 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 672 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 673 674 PetscFunctionBegin; 675 676 /* If there are no nonzero entries, set zz = yy and return immediately. */ 677 if(!a->nz) { 678 PetscInt i; 679 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 680 for (i=0; i<n; i++) { 681 z[i] = y[i]; 682 } 683 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 684 PetscFunctionReturn(0); 685 } 686 687 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 688 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 689 690 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 691 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 692 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 693 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 694 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 695 MatSeqAIJMKL_create_mkl_handle(A); 696 } 697 698 /* Call MKL sparse BLAS routine to do the MatMult. */ 699 if (zz == yy) { 700 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 701 * with alpha and beta both set to 1.0. */ 702 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 703 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 704 } else { 705 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 706 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 707 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 708 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 709 for (i=0; i<n; i++) { 710 z[i] += y[i]; 711 } 712 } 713 714 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 715 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 716 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 717 PetscFunctionReturn(0); 718 } 719 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 720 721 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 722 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C) 723 { 724 Mat_SeqAIJMKL *a, *b, *c; 725 sparse_matrix_t csrA, csrB, csrC; 726 PetscErrorCode ierr; 727 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 728 struct matrix_descr descr_type_gen; 729 PetscObjectState state; 730 731 PetscFunctionBegin; 732 a = (Mat_SeqAIJMKL*)A->spptr; 733 b = (Mat_SeqAIJMKL*)B->spptr; 734 c = (Mat_SeqAIJMKL*)C->spptr; 735 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 736 if (!a->sparse_optimized || a->state != state) { 737 MatSeqAIJMKL_create_mkl_handle(A); 738 } 739 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 740 if (!b->sparse_optimized || b->state != state) { 741 MatSeqAIJMKL_create_mkl_handle(B); 742 } 743 csrA = a->csrA; 744 csrB = b->csrA; 745 csrC = c->csrA; 746 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 747 748 stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA, 749 SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB, 750 SPARSE_STAGE_FINALIZE_MULT,&csrC); 751 752 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply"); 753 754 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 755 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 756 757 PetscFunctionReturn(0); 758 } 759 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 760 761 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 762 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C) 763 { 764 Mat_SeqAIJMKL *a, *p, *c; 765 sparse_matrix_t csrA, csrP, csrC; 766 PetscBool set, flag; 767 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 768 struct matrix_descr descr_type_sym; 769 PetscObjectState state; 770 PetscErrorCode ierr; 771 772 PetscFunctionBegin; 773 ierr = MatIsSymmetricKnown(A,&set,&flag); 774 if (!set || (set && !flag)) { 775 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr); 776 PetscFunctionReturn(0); 777 } 778 779 a = (Mat_SeqAIJMKL*)A->spptr; 780 p = (Mat_SeqAIJMKL*)P->spptr; 781 c = (Mat_SeqAIJMKL*)C->spptr; 782 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 783 if (!a->sparse_optimized || a->state != state) { 784 MatSeqAIJMKL_create_mkl_handle(A); 785 } 786 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 787 if (!p->sparse_optimized || p->state != state) { 788 MatSeqAIJMKL_create_mkl_handle(P); 789 } 790 csrA = a->csrA; 791 csrP = p->csrA; 792 csrC = c->csrA; 793 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 794 descr_type_sym.mode = SPARSE_FILL_MODE_LOWER; 795 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 796 797 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 798 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT); 799 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr"); 800 801 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 802 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 803 804 PetscFunctionReturn(0); 805 } 806 #endif 807 808 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 809 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 810 * routine, but can also be used to convert an assembled SeqAIJ matrix 811 * into a SeqAIJMKL one. */ 812 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 813 { 814 PetscErrorCode ierr; 815 Mat B = *newmat; 816 Mat_SeqAIJMKL *aijmkl; 817 PetscBool set; 818 PetscBool sametype; 819 820 PetscFunctionBegin; 821 if (reuse == MAT_INITIAL_MATRIX) { 822 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 823 } 824 825 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 826 if (sametype) PetscFunctionReturn(0); 827 828 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 829 B->spptr = (void*) aijmkl; 830 831 /* Set function pointers for methods that we inherit from AIJ but override. 832 * We also parse some command line options below, since those determine some of the methods we point to. */ 833 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 834 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 835 B->ops->destroy = MatDestroy_SeqAIJMKL; 836 837 aijmkl->sparse_optimized = PETSC_FALSE; 838 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 839 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 840 #else 841 aijmkl->no_SpMV2 = PETSC_TRUE; 842 #endif 843 aijmkl->eager_inspection = PETSC_FALSE; 844 845 /* Parse command line options. */ 846 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 847 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 848 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 849 ierr = PetscOptionsEnd();CHKERRQ(ierr); 850 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 851 if(!aijmkl->no_SpMV2) { 852 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 853 aijmkl->no_SpMV2 = PETSC_TRUE; 854 } 855 #endif 856 857 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 858 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 859 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 860 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 861 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 862 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 863 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 864 # if !defined(PETSC_USE_COMPLEX) 865 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 866 # endif 867 # endif 868 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 869 870 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 871 /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 872 * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 873 * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 874 * versions in which the older interface has not been deprecated, we use the old interface. */ 875 if (aijmkl->no_SpMV2) { 876 B->ops->mult = MatMult_SeqAIJMKL; 877 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 878 B->ops->multadd = MatMultAdd_SeqAIJMKL; 879 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 880 } 881 #endif 882 883 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 884 885 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 886 *newmat = B; 887 PetscFunctionReturn(0); 888 } 889 890 /*@C 891 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 892 This type inherits from AIJ and is largely identical, but uses sparse BLAS 893 routines from Intel MKL whenever possible. 894 If the installed version of MKL supports the "SpMV2" sparse 895 inspector-executor routines, then those are used by default. 896 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 897 symmetric A) operations are currently supported. 898 Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 899 900 Collective 901 902 Input Parameters: 903 + comm - MPI communicator, set to PETSC_COMM_SELF 904 . m - number of rows 905 . n - number of columns 906 . nz - number of nonzeros per row (same for all rows) 907 - nnz - array containing the number of nonzeros in the various rows 908 (possibly different for each row) or NULL 909 910 Output Parameter: 911 . A - the matrix 912 913 Options Database Keys: 914 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 915 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 916 917 Notes: 918 If nnz is given then nz is ignored 919 920 Level: intermediate 921 922 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 923 @*/ 924 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 925 { 926 PetscErrorCode ierr; 927 928 PetscFunctionBegin; 929 ierr = MatCreate(comm,A);CHKERRQ(ierr); 930 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 931 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 932 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 933 PetscFunctionReturn(0); 934 } 935 936 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 937 { 938 PetscErrorCode ierr; 939 940 PetscFunctionBegin; 941 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 942 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 943 PetscFunctionReturn(0); 944 } 945