1 2 /* 3 Defines basic operations for the MATSEQAIJMKL matrix class. 4 This class is derived from the MATSEQAIJ class and retains the 5 compressed row storage (aka Yale sparse matrix format) but uses 6 sparse BLAS operations from the Intel Math Kernel Library (MKL) 7 wherever possible. 8 */ 9 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12 #include <mkl_spblas.h> 13 14 typedef struct { 15 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 16 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 PetscObjectState state; 19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 28 { 29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 30 /* so we will ignore 'MatType type'. */ 31 PetscErrorCode ierr; 32 Mat B = *newmat; 33 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 34 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 35 #endif 36 37 PetscFunctionBegin; 38 if (reuse == MAT_INITIAL_MATRIX) { 39 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 45 B->ops->destroy = MatDestroy_SeqAIJ; 46 B->ops->mult = MatMult_SeqAIJ; 47 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 48 B->ops->multadd = MatMultAdd_SeqAIJ; 49 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 51 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 52 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 53 54 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 55 56 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 57 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 58 * simply involves destroying the MKL sparse matrix handle and then freeing 59 * the spptr pointer. */ 60 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 61 62 if (aijmkl->sparse_optimized) { 63 sparse_status_t stat; 64 stat = mkl_sparse_destroy(aijmkl->csrA); 65 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 66 } 67 #endif 68 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 69 70 /* Change the type of B to MATSEQAIJ. */ 71 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 72 73 *newmat = B; 74 PetscFunctionReturn(0); 75 } 76 77 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 78 { 79 PetscErrorCode ierr; 80 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 81 82 PetscFunctionBegin; 83 84 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 85 * spptr pointer. */ 86 if (aijmkl) { 87 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 88 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 89 if (aijmkl->sparse_optimized) { 90 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 91 stat = mkl_sparse_destroy(aijmkl->csrA); 92 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 93 } 94 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 95 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 96 } 97 98 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 99 * to destroy everything that remains. */ 100 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 101 /* Note that I don't call MatSetType(). I believe this is because that 102 * is only to be called when *building* a matrix. I could be wrong, but 103 * that is how things work for the SuperLU matrix class. */ 104 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 109 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 110 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 111 * handle, creates a new one, and then calls mkl_sparse_optimize(). 112 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 113 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 114 * an unoptimized matrix handle here. */ 115 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 116 { 117 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 118 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 119 * does nothing. We make it callable anyway in this case because it cuts 120 * down on littering the code with #ifdefs. */ 121 PetscFunctionBegin; 122 PetscFunctionReturn(0); 123 #else 124 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 125 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 126 PetscInt m,n; 127 MatScalar *aa; 128 PetscInt *aj,*ai; 129 sparse_status_t stat; 130 PetscErrorCode ierr; 131 132 PetscFunctionBegin; 133 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 134 /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 135 * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 136 * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 137 * mkl_sparse_optimize() later. */ 138 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 139 #endif 140 141 if (aijmkl->sparse_optimized) { 142 /* Matrix has been previously assembled and optimized. Must destroy old 143 * matrix handle before running the optimization step again. */ 144 stat = mkl_sparse_destroy(aijmkl->csrA); 145 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 146 } 147 aijmkl->sparse_optimized = PETSC_FALSE; 148 149 /* Now perform the SpMV2 setup and matrix optimization. */ 150 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 151 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 152 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 153 m = A->rmap->n; 154 n = A->cmap->n; 155 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 156 aa = a->a; /* Nonzero elements stored row-by-row. */ 157 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 158 if ((a->nz!=0) && aa && !(A->structure_only)) { 159 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 160 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 161 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 162 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle"); 163 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 164 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 165 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 166 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 167 if (!aijmkl->no_SpMV2) { 168 stat = mkl_sparse_optimize(aijmkl->csrA); 169 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 170 } 171 aijmkl->sparse_optimized = PETSC_TRUE; 172 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 173 } 174 175 PetscFunctionReturn(0); 176 #endif 177 } 178 179 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 180 * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 181 * matrix handle. 182 * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as 183 * there is no good alternative. */ 184 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 185 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat) 186 { 187 PetscErrorCode ierr; 188 sparse_status_t stat; 189 sparse_index_base_t indexing; 190 PetscInt nrows, ncols; 191 PetscInt *aj,*ai,*dummy; 192 MatScalar *aa; 193 Mat A; 194 Mat_SeqAIJMKL *aijmkl; 195 196 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 197 stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 198 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 199 200 if (reuse == MAT_REUSE_MATRIX) { 201 ierr = MatDestroy(mat);CHKERRQ(ierr); 202 } 203 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 204 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 205 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 206 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 207 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 208 * they will be destroyed when the MKL handle is destroyed. 209 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 210 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 211 212 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 213 * Now turn it into a MATSEQAIJMKL. */ 214 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 215 216 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 217 aijmkl->csrA = csrA; 218 219 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 220 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 221 * and just need to be able to run the MKL optimization step. */ 222 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 223 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 224 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 225 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 226 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 227 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 228 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 229 if (!aijmkl->no_SpMV2) { 230 stat = mkl_sparse_optimize(aijmkl->csrA); 231 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 232 } 233 aijmkl->sparse_optimized = PETSC_TRUE; 234 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 235 236 *mat = A; 237 PetscFunctionReturn(0); 238 } 239 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 240 241 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 242 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 243 * MatMatMultNumeric(). */ 244 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 245 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 246 { 247 PetscInt i; 248 PetscInt nrows,ncols; 249 PetscInt nz; 250 PetscInt *ai,*aj,*dummy; 251 PetscScalar *aa; 252 PetscErrorCode ierr; 253 Mat_SeqAIJMKL *aijmkl; 254 sparse_status_t stat; 255 sparse_index_base_t indexing; 256 257 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 258 259 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 260 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 261 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 262 263 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 264 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 265 for (i=0; i<nrows; i++) { 266 nz = ai[i+1] - ai[i]; 267 ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr); 268 } 269 270 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 271 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 272 273 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 274 /* We mark our matrix as having a valid, optimized MKL handle. 275 * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */ 276 aijmkl->sparse_optimized = PETSC_TRUE; 277 278 PetscFunctionReturn(0); 279 } 280 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 281 282 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 283 { 284 PetscErrorCode ierr; 285 Mat_SeqAIJMKL *aijmkl; 286 Mat_SeqAIJMKL *aijmkl_dest; 287 288 PetscFunctionBegin; 289 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 290 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 291 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 292 ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr); 293 aijmkl_dest->sparse_optimized = PETSC_FALSE; 294 if (aijmkl->eager_inspection) { 295 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 296 } 297 PetscFunctionReturn(0); 298 } 299 300 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 301 { 302 PetscErrorCode ierr; 303 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 304 Mat_SeqAIJMKL *aijmkl; 305 306 PetscFunctionBegin; 307 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 308 309 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 310 * extra information and some different methods, call the AssemblyEnd 311 * routine for a MATSEQAIJ. 312 * I'm not sure if this is the best way to do this, but it avoids 313 * a lot of code duplication. */ 314 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 315 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 316 317 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 318 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 319 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 320 if (aijmkl->eager_inspection) { 321 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 322 } 323 324 PetscFunctionReturn(0); 325 } 326 327 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 328 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 329 { 330 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 331 const PetscScalar *x; 332 PetscScalar *y; 333 const MatScalar *aa; 334 PetscErrorCode ierr; 335 PetscInt m=A->rmap->n; 336 PetscInt n=A->cmap->n; 337 PetscScalar alpha = 1.0; 338 PetscScalar beta = 0.0; 339 const PetscInt *aj,*ai; 340 char matdescra[6]; 341 342 343 /* Variables not in MatMult_SeqAIJ. */ 344 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 345 346 PetscFunctionBegin; 347 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 348 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 349 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 350 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 351 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 352 aa = a->a; /* Nonzero elements stored row-by-row. */ 353 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 354 355 /* Call MKL sparse BLAS routine to do the MatMult. */ 356 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 357 358 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 359 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 360 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 361 PetscFunctionReturn(0); 362 } 363 #endif 364 365 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 366 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 367 { 368 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 369 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 370 const PetscScalar *x; 371 PetscScalar *y; 372 PetscErrorCode ierr; 373 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 374 PetscObjectState state; 375 376 PetscFunctionBegin; 377 378 /* If there are no nonzero entries, zero yy and return immediately. */ 379 if(!a->nz) { 380 PetscInt i; 381 PetscInt m=A->rmap->n; 382 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 383 for (i=0; i<m; i++) { 384 y[i] = 0.0; 385 } 386 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 387 PetscFunctionReturn(0); 388 } 389 390 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 391 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 392 393 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 394 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 395 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 396 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 397 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 398 MatSeqAIJMKL_create_mkl_handle(A); 399 } 400 401 /* Call MKL SpMV2 executor routine to do the MatMult. */ 402 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 403 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 404 405 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 406 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 407 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 408 PetscFunctionReturn(0); 409 } 410 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 411 412 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 413 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 414 { 415 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 416 const PetscScalar *x; 417 PetscScalar *y; 418 const MatScalar *aa; 419 PetscErrorCode ierr; 420 PetscInt m=A->rmap->n; 421 PetscInt n=A->cmap->n; 422 PetscScalar alpha = 1.0; 423 PetscScalar beta = 0.0; 424 const PetscInt *aj,*ai; 425 char matdescra[6]; 426 427 /* Variables not in MatMultTranspose_SeqAIJ. */ 428 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 429 430 PetscFunctionBegin; 431 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 432 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 433 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 434 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 435 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 436 aa = a->a; /* Nonzero elements stored row-by-row. */ 437 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 438 439 /* Call MKL sparse BLAS routine to do the MatMult. */ 440 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 441 442 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 443 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 444 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 445 PetscFunctionReturn(0); 446 } 447 #endif 448 449 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 450 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 451 { 452 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 453 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 454 const PetscScalar *x; 455 PetscScalar *y; 456 PetscErrorCode ierr; 457 sparse_status_t stat; 458 PetscObjectState state; 459 460 PetscFunctionBegin; 461 462 /* If there are no nonzero entries, zero yy and return immediately. */ 463 if(!a->nz) { 464 PetscInt i; 465 PetscInt n=A->cmap->n; 466 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 467 for (i=0; i<n; i++) { 468 y[i] = 0.0; 469 } 470 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 475 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 476 477 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 478 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 479 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 480 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 481 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 482 MatSeqAIJMKL_create_mkl_handle(A); 483 } 484 485 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 486 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 487 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 488 489 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 490 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 491 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 492 PetscFunctionReturn(0); 493 } 494 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 495 496 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 497 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 498 { 499 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 500 const PetscScalar *x; 501 PetscScalar *y,*z; 502 const MatScalar *aa; 503 PetscErrorCode ierr; 504 PetscInt m=A->rmap->n; 505 PetscInt n=A->cmap->n; 506 const PetscInt *aj,*ai; 507 PetscInt i; 508 509 /* Variables not in MatMultAdd_SeqAIJ. */ 510 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 511 PetscScalar alpha = 1.0; 512 PetscScalar beta; 513 char matdescra[6]; 514 515 PetscFunctionBegin; 516 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 517 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 518 519 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 520 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 521 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 522 aa = a->a; /* Nonzero elements stored row-by-row. */ 523 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 524 525 /* Call MKL sparse BLAS routine to do the MatMult. */ 526 if (zz == yy) { 527 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 528 beta = 1.0; 529 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 530 } else { 531 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 532 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 533 beta = 0.0; 534 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 535 for (i=0; i<m; i++) { 536 z[i] += y[i]; 537 } 538 } 539 540 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 541 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 542 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 543 PetscFunctionReturn(0); 544 } 545 #endif 546 547 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 548 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 549 { 550 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 551 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 552 const PetscScalar *x; 553 PetscScalar *y,*z; 554 PetscErrorCode ierr; 555 PetscInt m=A->rmap->n; 556 PetscInt i; 557 558 /* Variables not in MatMultAdd_SeqAIJ. */ 559 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 560 PetscObjectState state; 561 562 PetscFunctionBegin; 563 564 /* If there are no nonzero entries, set zz = yy and return immediately. */ 565 if(!a->nz) { 566 PetscInt i; 567 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 568 for (i=0; i<m; i++) { 569 z[i] = y[i]; 570 } 571 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 576 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 577 578 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 579 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 580 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 581 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 582 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 583 MatSeqAIJMKL_create_mkl_handle(A); 584 } 585 586 /* Call MKL sparse BLAS routine to do the MatMult. */ 587 if (zz == yy) { 588 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 589 * with alpha and beta both set to 1.0. */ 590 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 591 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 592 } else { 593 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 594 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 595 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 596 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 597 for (i=0; i<m; i++) { 598 z[i] += y[i]; 599 } 600 } 601 602 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 603 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 604 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 608 609 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 610 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 611 { 612 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 613 const PetscScalar *x; 614 PetscScalar *y,*z; 615 const MatScalar *aa; 616 PetscErrorCode ierr; 617 PetscInt m=A->rmap->n; 618 PetscInt n=A->cmap->n; 619 const PetscInt *aj,*ai; 620 PetscInt i; 621 622 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 623 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 624 PetscScalar alpha = 1.0; 625 PetscScalar beta; 626 char matdescra[6]; 627 628 PetscFunctionBegin; 629 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 630 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 631 632 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 633 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 634 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 635 aa = a->a; /* Nonzero elements stored row-by-row. */ 636 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 637 638 /* Call MKL sparse BLAS routine to do the MatMult. */ 639 if (zz == yy) { 640 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 641 beta = 1.0; 642 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 643 } else { 644 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 645 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 646 beta = 0.0; 647 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 648 for (i=0; i<n; i++) { 649 z[i] += y[i]; 650 } 651 } 652 653 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 654 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 655 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 656 PetscFunctionReturn(0); 657 } 658 #endif 659 660 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 661 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 662 { 663 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 664 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 665 const PetscScalar *x; 666 PetscScalar *y,*z; 667 PetscErrorCode ierr; 668 PetscInt n=A->cmap->n; 669 PetscInt i; 670 PetscObjectState state; 671 672 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 673 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 674 675 PetscFunctionBegin; 676 677 /* If there are no nonzero entries, set zz = yy and return immediately. */ 678 if(!a->nz) { 679 PetscInt i; 680 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 681 for (i=0; i<n; i++) { 682 z[i] = y[i]; 683 } 684 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 685 PetscFunctionReturn(0); 686 } 687 688 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 689 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 690 691 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 692 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 693 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 694 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 695 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 696 MatSeqAIJMKL_create_mkl_handle(A); 697 } 698 699 /* Call MKL sparse BLAS routine to do the MatMult. */ 700 if (zz == yy) { 701 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 702 * with alpha and beta both set to 1.0. */ 703 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 704 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 705 } else { 706 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 707 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 708 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 709 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 710 for (i=0; i<n; i++) { 711 z[i] += y[i]; 712 } 713 } 714 715 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 716 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 717 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 718 PetscFunctionReturn(0); 719 } 720 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 721 722 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 723 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,PetscReal fill,Mat C) 724 { 725 Mat_SeqAIJMKL *a, *b, *c; 726 sparse_matrix_t csrA, csrB, csrC; 727 PetscErrorCode ierr; 728 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 729 struct matrix_descr descr_type_gen; 730 PetscObjectState state; 731 732 PetscFunctionBegin; 733 a = (Mat_SeqAIJMKL*)A->spptr; 734 b = (Mat_SeqAIJMKL*)B->spptr; 735 c = (Mat_SeqAIJMKL*)C->spptr; 736 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 737 if (!a->sparse_optimized || a->state != state) { 738 MatSeqAIJMKL_create_mkl_handle(A); 739 } 740 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 741 if (!b->sparse_optimized || b->state != state) { 742 MatSeqAIJMKL_create_mkl_handle(B); 743 } 744 csrA = a->csrA; 745 csrB = b->csrA; 746 csrC = c->csrA; 747 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 748 749 stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA, 750 SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB, 751 SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC); 752 753 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply"); 754 755 PetscFunctionReturn(0); 756 } 757 758 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C) 759 { 760 Mat_SeqAIJMKL *a, *b, *c; 761 sparse_matrix_t csrA, csrB, csrC; 762 PetscErrorCode ierr; 763 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 764 struct matrix_descr descr_type_gen; 765 PetscObjectState state; 766 767 PetscFunctionBegin; 768 a = (Mat_SeqAIJMKL*)A->spptr; 769 b = (Mat_SeqAIJMKL*)B->spptr; 770 c = (Mat_SeqAIJMKL*)C->spptr; 771 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 772 if (!a->sparse_optimized || a->state != state) { 773 MatSeqAIJMKL_create_mkl_handle(A); 774 } 775 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 776 if (!b->sparse_optimized || b->state != state) { 777 MatSeqAIJMKL_create_mkl_handle(B); 778 } 779 csrA = a->csrA; 780 csrB = b->csrA; 781 csrC = c->csrA; 782 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 783 784 stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA, 785 SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB, 786 SPARSE_STAGE_FINALIZE_MULT,&csrC); 787 788 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply"); 789 790 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 791 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 792 793 PetscFunctionReturn(0); 794 } 795 796 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C) 797 { 798 Mat_SeqAIJMKL *a, *p, *c; 799 sparse_matrix_t csrA, csrP, csrC; 800 PetscBool set, flag; 801 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 802 struct matrix_descr descr_type_sym; 803 PetscObjectState state; 804 PetscErrorCode ierr; 805 806 PetscFunctionBegin; 807 ierr = MatIsSymmetricKnown(A,&set,&flag); 808 if (!set || (set && !flag)) { 809 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr); 810 PetscFunctionReturn(0); 811 } 812 813 a = (Mat_SeqAIJMKL*)A->spptr; 814 p = (Mat_SeqAIJMKL*)P->spptr; 815 c = (Mat_SeqAIJMKL*)C->spptr; 816 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 817 if (!a->sparse_optimized || a->state != state) { 818 MatSeqAIJMKL_create_mkl_handle(A); 819 } 820 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 821 if (!p->sparse_optimized || p->state != state) { 822 MatSeqAIJMKL_create_mkl_handle(P); 823 } 824 csrA = a->csrA; 825 csrP = p->csrA; 826 csrC = c->csrA; 827 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 828 descr_type_sym.mode = SPARSE_FILL_MODE_LOWER; 829 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 830 831 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 832 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT); 833 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr"); 834 835 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 836 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 837 838 PetscFunctionReturn(0); 839 } 840 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 841 842 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 843 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 844 * routine, but can also be used to convert an assembled SeqAIJ matrix 845 * into a SeqAIJMKL one. */ 846 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 847 { 848 PetscErrorCode ierr; 849 Mat B = *newmat; 850 Mat_SeqAIJMKL *aijmkl; 851 PetscBool set; 852 PetscBool sametype; 853 854 PetscFunctionBegin; 855 if (reuse == MAT_INITIAL_MATRIX) { 856 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 857 } 858 859 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 860 if (sametype) PetscFunctionReturn(0); 861 862 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 863 B->spptr = (void*) aijmkl; 864 865 /* Set function pointers for methods that we inherit from AIJ but override. 866 * We also parse some command line options below, since those determine some of the methods we point to. */ 867 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 868 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 869 B->ops->destroy = MatDestroy_SeqAIJMKL; 870 871 aijmkl->sparse_optimized = PETSC_FALSE; 872 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 873 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 874 #else 875 aijmkl->no_SpMV2 = PETSC_TRUE; 876 #endif 877 aijmkl->eager_inspection = PETSC_FALSE; 878 879 /* Parse command line options. */ 880 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 881 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 882 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 883 ierr = PetscOptionsEnd();CHKERRQ(ierr); 884 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 885 if(!aijmkl->no_SpMV2) { 886 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 887 aijmkl->no_SpMV2 = PETSC_TRUE; 888 } 889 #endif 890 891 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 892 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 893 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 894 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 895 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 896 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 897 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_SpMV2; 898 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 899 # if !defined(PETSC_USE_COMPLEX) 900 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 901 # endif 902 # endif 903 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 904 905 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 906 /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 907 * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 908 * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 909 * versions in which the older interface has not been deprecated, we use the old interface. */ 910 if (aijmkl->no_SpMV2) { 911 B->ops->mult = MatMult_SeqAIJMKL; 912 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 913 B->ops->multadd = MatMultAdd_SeqAIJMKL; 914 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 915 } 916 #endif 917 918 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 919 920 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 921 *newmat = B; 922 PetscFunctionReturn(0); 923 } 924 925 /*@C 926 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 927 This type inherits from AIJ and is largely identical, but uses sparse BLAS 928 routines from Intel MKL whenever possible. 929 If the installed version of MKL supports the "SpMV2" sparse 930 inspector-executor routines, then those are used by default. 931 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 932 symmetric A) operations are currently supported. 933 Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 934 935 Collective 936 937 Input Parameters: 938 + comm - MPI communicator, set to PETSC_COMM_SELF 939 . m - number of rows 940 . n - number of columns 941 . nz - number of nonzeros per row (same for all rows) 942 - nnz - array containing the number of nonzeros in the various rows 943 (possibly different for each row) or NULL 944 945 Output Parameter: 946 . A - the matrix 947 948 Options Database Keys: 949 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 950 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 951 952 Notes: 953 If nnz is given then nz is ignored 954 955 Level: intermediate 956 957 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 958 @*/ 959 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 960 { 961 PetscErrorCode ierr; 962 963 PetscFunctionBegin; 964 ierr = MatCreate(comm,A);CHKERRQ(ierr); 965 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 966 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 967 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 971 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 972 { 973 PetscErrorCode ierr; 974 975 PetscFunctionBegin; 976 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 977 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980