1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 18 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 19 PetscObjectState state; 20 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 21 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 22 struct matrix_descr descr; 23 #endif 24 } Mat_SeqAIJMKL; 25 26 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 27 28 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 29 { 30 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 31 /* so we will ignore 'MatType type'. */ 32 PetscErrorCode ierr; 33 Mat B = *newmat; 34 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 35 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 36 #endif 37 38 PetscFunctionBegin; 39 if (reuse == MAT_INITIAL_MATRIX) { 40 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 41 } 42 43 /* Reset the original function pointers. */ 44 B->ops->duplicate = MatDuplicate_SeqAIJ; 45 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 46 B->ops->destroy = MatDestroy_SeqAIJ; 47 B->ops->mult = MatMult_SeqAIJ; 48 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 49 B->ops->multadd = MatMultAdd_SeqAIJ; 50 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 51 B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ; 52 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 53 B->ops->ptap = MatPtAP_SeqAIJ_SeqAIJ; 54 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 55 B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ; 56 57 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 58 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 59 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 60 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 61 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqaijmkl_C",NULL);CHKERRQ(ierr); 62 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 63 if(!aijmkl->no_SpMV2) { 64 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 65 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 66 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 67 #endif 68 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 69 } 70 71 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 72 * simply involves destroying the MKL sparse matrix handle and then freeing 73 * the spptr pointer. */ 74 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 75 76 if (aijmkl->sparse_optimized) { 77 sparse_status_t stat; 78 stat = mkl_sparse_destroy(aijmkl->csrA); 79 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 80 } 81 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 82 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 83 84 /* Change the type of B to MATSEQAIJ. */ 85 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 86 87 *newmat = B; 88 PetscFunctionReturn(0); 89 } 90 91 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 92 { 93 PetscErrorCode ierr; 94 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 95 96 PetscFunctionBegin; 97 98 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 99 * spptr pointer. */ 100 if (aijmkl) { 101 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 102 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 103 if (aijmkl->sparse_optimized) { 104 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 105 stat = mkl_sparse_destroy(aijmkl->csrA); 106 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 107 } 108 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 109 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 110 } 111 112 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 113 * to destroy everything that remains. */ 114 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 115 /* Note that I don't call MatSetType(). I believe this is because that 116 * is only to be called when *building* a matrix. I could be wrong, but 117 * that is how things work for the SuperLU matrix class. */ 118 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 123 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 124 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 125 * handle, creates a new one, and then calls mkl_sparse_optimize(). 126 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 127 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 128 * an unoptimized matrix handle here. */ 129 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 130 { 131 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 132 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 133 * does nothing. We make it callable anyway in this case because it cuts 134 * down on littering the code with #ifdefs. */ 135 PetscFunctionBegin; 136 PetscFunctionReturn(0); 137 #else 138 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 139 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 140 PetscInt m,n; 141 MatScalar *aa; 142 PetscInt *aj,*ai; 143 sparse_status_t stat; 144 PetscErrorCode ierr; 145 146 PetscFunctionBegin; 147 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 148 /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 149 * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 150 * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 151 * mkl_sparse_optimize() later. */ 152 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 153 #endif 154 155 if (aijmkl->sparse_optimized) { 156 /* Matrix has been previously assembled and optimized. Must destroy old 157 * matrix handle before running the optimization step again. */ 158 stat = mkl_sparse_destroy(aijmkl->csrA); 159 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 160 } 161 aijmkl->sparse_optimized = PETSC_FALSE; 162 163 /* Now perform the SpMV2 setup and matrix optimization. */ 164 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 165 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 166 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 167 m = A->rmap->n; 168 n = A->cmap->n; 169 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 170 aa = a->a; /* Nonzero elements stored row-by-row. */ 171 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 172 if ((a->nz!=0) & !(A->structure_only)) { 173 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 174 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 175 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 176 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle"); 177 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 178 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 179 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 180 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 181 if (!aijmkl->no_SpMV2) { 182 stat = mkl_sparse_optimize(aijmkl->csrA); 183 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 184 } 185 aijmkl->sparse_optimized = PETSC_TRUE; 186 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 187 } 188 189 PetscFunctionReturn(0); 190 #endif 191 } 192 193 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 194 * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 195 * matrix handle. 196 * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as 197 * there is no good alternative. */ 198 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 199 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat) 200 { 201 PetscErrorCode ierr; 202 sparse_status_t stat; 203 sparse_index_base_t indexing; 204 PetscInt nrows, ncols; 205 PetscInt *aj,*ai,*dummy; 206 MatScalar *aa; 207 Mat A; 208 Mat_SeqAIJMKL *aijmkl; 209 210 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 211 stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 212 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 213 214 if (reuse == MAT_REUSE_MATRIX) { 215 ierr = MatDestroy(mat);CHKERRQ(ierr); 216 } 217 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 218 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 219 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 220 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 221 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 222 * they will be destroyed when the MKL handle is destroyed. 223 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 224 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 225 226 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 227 * Now turn it into a MATSEQAIJMKL. */ 228 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 229 230 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 231 aijmkl->csrA = csrA; 232 233 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 234 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 235 * and just need to be able to run the MKL optimization step. */ 236 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 237 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 238 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 239 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 240 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 241 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 242 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 243 if (!aijmkl->no_SpMV2) { 244 stat = mkl_sparse_optimize(aijmkl->csrA); 245 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 246 } 247 aijmkl->sparse_optimized = PETSC_TRUE; 248 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 249 250 *mat = A; 251 PetscFunctionReturn(0); 252 } 253 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 254 255 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 256 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 257 * MatMatMultNumeric(). */ 258 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 259 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 260 { 261 PetscInt i; 262 PetscInt nrows,ncols; 263 PetscInt nz; 264 PetscInt *ai,*aj,*dummy; 265 PetscScalar *aa; 266 PetscErrorCode ierr; 267 Mat_SeqAIJMKL *aijmkl; 268 sparse_status_t stat; 269 sparse_index_base_t indexing; 270 271 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 272 273 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 274 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 275 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 276 277 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 278 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 279 for (i=0; i<nrows; i++) { 280 nz = ai[i+1] - ai[i]; 281 ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr); 282 } 283 284 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 285 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 286 287 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 288 /* We mark our matrix as having a valid, optimized MKL handle. 289 * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */ 290 aijmkl->sparse_optimized = PETSC_TRUE; 291 292 PetscFunctionReturn(0); 293 } 294 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 295 296 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 297 { 298 PetscErrorCode ierr; 299 Mat_SeqAIJMKL *aijmkl; 300 Mat_SeqAIJMKL *aijmkl_dest; 301 302 PetscFunctionBegin; 303 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 304 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 305 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 306 ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr); 307 aijmkl_dest->sparse_optimized = PETSC_FALSE; 308 if (aijmkl->eager_inspection) { 309 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 310 } 311 PetscFunctionReturn(0); 312 } 313 314 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 315 { 316 PetscErrorCode ierr; 317 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 318 Mat_SeqAIJMKL *aijmkl; 319 320 PetscFunctionBegin; 321 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 322 323 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 324 * extra information and some different methods, call the AssemblyEnd 325 * routine for a MATSEQAIJ. 326 * I'm not sure if this is the best way to do this, but it avoids 327 * a lot of code duplication. */ 328 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 329 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 330 331 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 332 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 333 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 334 if (aijmkl->eager_inspection) { 335 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 336 } 337 338 PetscFunctionReturn(0); 339 } 340 341 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 342 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 343 { 344 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 345 const PetscScalar *x; 346 PetscScalar *y; 347 const MatScalar *aa; 348 PetscErrorCode ierr; 349 PetscInt m=A->rmap->n; 350 PetscInt n=A->cmap->n; 351 PetscScalar alpha = 1.0; 352 PetscScalar beta = 0.0; 353 const PetscInt *aj,*ai; 354 char matdescra[6]; 355 356 357 /* Variables not in MatMult_SeqAIJ. */ 358 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 359 360 PetscFunctionBegin; 361 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 362 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 363 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 364 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 365 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 366 aa = a->a; /* Nonzero elements stored row-by-row. */ 367 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 368 369 /* Call MKL sparse BLAS routine to do the MatMult. */ 370 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 371 372 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 373 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 374 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 375 PetscFunctionReturn(0); 376 } 377 #endif 378 379 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 380 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 381 { 382 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 383 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 384 const PetscScalar *x; 385 PetscScalar *y; 386 PetscErrorCode ierr; 387 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 388 PetscObjectState state; 389 390 PetscFunctionBegin; 391 392 /* If there are no nonzero entries, zero yy and return immediately. */ 393 if(!a->nz) { 394 PetscInt i; 395 PetscInt m=A->rmap->n; 396 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 397 for (i=0; i<m; i++) { 398 y[i] = 0.0; 399 } 400 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 401 PetscFunctionReturn(0); 402 } 403 404 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 405 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 406 407 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 408 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 409 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 410 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 411 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 412 MatSeqAIJMKL_create_mkl_handle(A); 413 } 414 415 /* Call MKL SpMV2 executor routine to do the MatMult. */ 416 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 417 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 418 419 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 420 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 421 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 422 PetscFunctionReturn(0); 423 } 424 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 425 426 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 427 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 428 { 429 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 430 const PetscScalar *x; 431 PetscScalar *y; 432 const MatScalar *aa; 433 PetscErrorCode ierr; 434 PetscInt m=A->rmap->n; 435 PetscInt n=A->cmap->n; 436 PetscScalar alpha = 1.0; 437 PetscScalar beta = 0.0; 438 const PetscInt *aj,*ai; 439 char matdescra[6]; 440 441 /* Variables not in MatMultTranspose_SeqAIJ. */ 442 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 443 444 PetscFunctionBegin; 445 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 446 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 447 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 448 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 449 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 450 aa = a->a; /* Nonzero elements stored row-by-row. */ 451 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 452 453 /* Call MKL sparse BLAS routine to do the MatMult. */ 454 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 455 456 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 457 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 458 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 459 PetscFunctionReturn(0); 460 } 461 #endif 462 463 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 464 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 465 { 466 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 467 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 468 const PetscScalar *x; 469 PetscScalar *y; 470 PetscErrorCode ierr; 471 sparse_status_t stat; 472 PetscObjectState state; 473 474 PetscFunctionBegin; 475 476 /* If there are no nonzero entries, zero yy and return immediately. */ 477 if(!a->nz) { 478 PetscInt i; 479 PetscInt n=A->cmap->n; 480 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 481 for (i=0; i<n; i++) { 482 y[i] = 0.0; 483 } 484 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 485 PetscFunctionReturn(0); 486 } 487 488 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 489 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 490 491 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 492 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 493 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 494 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 495 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 496 MatSeqAIJMKL_create_mkl_handle(A); 497 } 498 499 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 500 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 501 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 502 503 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 504 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 505 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 506 PetscFunctionReturn(0); 507 } 508 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 509 510 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 511 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 512 { 513 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 514 const PetscScalar *x; 515 PetscScalar *y,*z; 516 const MatScalar *aa; 517 PetscErrorCode ierr; 518 PetscInt m=A->rmap->n; 519 PetscInt n=A->cmap->n; 520 const PetscInt *aj,*ai; 521 PetscInt i; 522 523 /* Variables not in MatMultAdd_SeqAIJ. */ 524 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 525 PetscScalar alpha = 1.0; 526 PetscScalar beta; 527 char matdescra[6]; 528 529 PetscFunctionBegin; 530 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 531 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 532 533 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 534 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 535 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 536 aa = a->a; /* Nonzero elements stored row-by-row. */ 537 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 538 539 /* Call MKL sparse BLAS routine to do the MatMult. */ 540 if (zz == yy) { 541 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 542 beta = 1.0; 543 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 544 } else { 545 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 546 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 547 beta = 0.0; 548 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 549 for (i=0; i<m; i++) { 550 z[i] += y[i]; 551 } 552 } 553 554 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 555 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 556 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 #endif 560 561 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 562 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 563 { 564 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 565 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 566 const PetscScalar *x; 567 PetscScalar *y,*z; 568 PetscErrorCode ierr; 569 PetscInt m=A->rmap->n; 570 PetscInt i; 571 572 /* Variables not in MatMultAdd_SeqAIJ. */ 573 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 574 PetscObjectState state; 575 576 PetscFunctionBegin; 577 578 /* If there are no nonzero entries, set zz = yy and return immediately. */ 579 if(!a->nz) { 580 PetscInt i; 581 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 582 for (i=0; i<m; i++) { 583 z[i] = y[i]; 584 } 585 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 586 PetscFunctionReturn(0); 587 } 588 589 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 590 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 591 592 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 593 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 594 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 595 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 596 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 597 MatSeqAIJMKL_create_mkl_handle(A); 598 } 599 600 /* Call MKL sparse BLAS routine to do the MatMult. */ 601 if (zz == yy) { 602 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 603 * with alpha and beta both set to 1.0. */ 604 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 605 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 606 } else { 607 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 608 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 609 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 610 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 611 for (i=0; i<m; i++) { 612 z[i] += y[i]; 613 } 614 } 615 616 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 617 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 618 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 619 PetscFunctionReturn(0); 620 } 621 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 622 623 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 624 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 625 { 626 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 627 const PetscScalar *x; 628 PetscScalar *y,*z; 629 const MatScalar *aa; 630 PetscErrorCode ierr; 631 PetscInt m=A->rmap->n; 632 PetscInt n=A->cmap->n; 633 const PetscInt *aj,*ai; 634 PetscInt i; 635 636 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 637 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 638 PetscScalar alpha = 1.0; 639 PetscScalar beta; 640 char matdescra[6]; 641 642 PetscFunctionBegin; 643 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 644 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 645 646 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 647 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 648 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 649 aa = a->a; /* Nonzero elements stored row-by-row. */ 650 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 651 652 /* Call MKL sparse BLAS routine to do the MatMult. */ 653 if (zz == yy) { 654 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 655 beta = 1.0; 656 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 657 } else { 658 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 659 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 660 beta = 0.0; 661 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 662 for (i=0; i<n; i++) { 663 z[i] += y[i]; 664 } 665 } 666 667 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 668 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 669 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 670 PetscFunctionReturn(0); 671 } 672 #endif 673 674 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 675 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 676 { 677 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 678 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 679 const PetscScalar *x; 680 PetscScalar *y,*z; 681 PetscErrorCode ierr; 682 PetscInt n=A->cmap->n; 683 PetscInt i; 684 PetscObjectState state; 685 686 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 687 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 688 689 PetscFunctionBegin; 690 691 /* If there are no nonzero entries, set zz = yy and return immediately. */ 692 if(!a->nz) { 693 PetscInt i; 694 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 695 for (i=0; i<n; i++) { 696 z[i] = y[i]; 697 } 698 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 699 PetscFunctionReturn(0); 700 } 701 702 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 703 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 704 705 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 706 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 707 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 708 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 709 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 710 MatSeqAIJMKL_create_mkl_handle(A); 711 } 712 713 /* Call MKL sparse BLAS routine to do the MatMult. */ 714 if (zz == yy) { 715 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 716 * with alpha and beta both set to 1.0. */ 717 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 718 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 719 } else { 720 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 721 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 722 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 723 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 724 for (i=0; i<n; i++) { 725 z[i] += y[i]; 726 } 727 } 728 729 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 730 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 731 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 732 PetscFunctionReturn(0); 733 } 734 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 735 736 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 737 /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because 738 * the MatMatMult() interface code calls MatMatMultNumeric() in this case. 739 * For releases of MKL prior to version 18, update 2: 740 * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 741 * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 742 * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 743 * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */ 744 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 745 { 746 Mat_SeqAIJMKL *a, *b; 747 sparse_matrix_t csrA, csrB, csrC; 748 PetscErrorCode ierr; 749 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 750 PetscObjectState state; 751 752 PetscFunctionBegin; 753 a = (Mat_SeqAIJMKL*)A->spptr; 754 b = (Mat_SeqAIJMKL*)B->spptr; 755 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 756 if (!a->sparse_optimized || a->state != state) { 757 MatSeqAIJMKL_create_mkl_handle(A); 758 } 759 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 760 if (!b->sparse_optimized || b->state != state) { 761 MatSeqAIJMKL_create_mkl_handle(B); 762 } 763 csrA = a->csrA; 764 csrB = b->csrA; 765 766 stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC); 767 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 768 769 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 770 771 PetscFunctionReturn(0); 772 } 773 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 774 775 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 776 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C) 777 { 778 Mat_SeqAIJMKL *a, *b, *c; 779 sparse_matrix_t csrA, csrB, csrC; 780 PetscErrorCode ierr; 781 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 782 struct matrix_descr descr_type_gen; 783 PetscObjectState state; 784 785 PetscFunctionBegin; 786 a = (Mat_SeqAIJMKL*)A->spptr; 787 b = (Mat_SeqAIJMKL*)B->spptr; 788 c = (Mat_SeqAIJMKL*)C->spptr; 789 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 790 if (!a->sparse_optimized || a->state != state) { 791 MatSeqAIJMKL_create_mkl_handle(A); 792 } 793 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 794 if (!b->sparse_optimized || b->state != state) { 795 MatSeqAIJMKL_create_mkl_handle(B); 796 } 797 csrA = a->csrA; 798 csrB = b->csrA; 799 csrC = c->csrA; 800 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 801 802 stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA, 803 SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB, 804 SPARSE_STAGE_FINALIZE_MULT,&csrC); 805 806 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply"); 807 808 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 809 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 810 811 PetscFunctionReturn(0); 812 } 813 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 814 815 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 816 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 817 { 818 Mat_SeqAIJMKL *a, *b; 819 sparse_matrix_t csrA, csrB, csrC; 820 PetscErrorCode ierr; 821 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 822 PetscObjectState state; 823 824 PetscFunctionBegin; 825 a = (Mat_SeqAIJMKL*)A->spptr; 826 b = (Mat_SeqAIJMKL*)B->spptr; 827 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 828 if (!a->sparse_optimized || a->state != state) { 829 MatSeqAIJMKL_create_mkl_handle(A); 830 } 831 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 832 if (!b->sparse_optimized || b->state != state) { 833 MatSeqAIJMKL_create_mkl_handle(B); 834 } 835 csrA = a->csrA; 836 csrB = b->csrA; 837 838 stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC); 839 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 840 841 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 842 843 PetscFunctionReturn(0); 844 } 845 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 846 847 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 848 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C) 849 { 850 Mat_SeqAIJMKL *a, *p, *c; 851 sparse_matrix_t csrA, csrP, csrC; 852 PetscBool set, flag; 853 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 854 struct matrix_descr descr_type_sym; 855 PetscObjectState state; 856 PetscErrorCode ierr; 857 858 PetscFunctionBegin; 859 ierr = MatIsSymmetricKnown(A,&set,&flag); 860 if (!set || (set && !flag)) { 861 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr); 862 PetscFunctionReturn(0); 863 } 864 865 a = (Mat_SeqAIJMKL*)A->spptr; 866 p = (Mat_SeqAIJMKL*)P->spptr; 867 c = (Mat_SeqAIJMKL*)C->spptr; 868 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 869 if (!a->sparse_optimized || a->state != state) { 870 MatSeqAIJMKL_create_mkl_handle(A); 871 } 872 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 873 if (!p->sparse_optimized || p->state != state) { 874 MatSeqAIJMKL_create_mkl_handle(P); 875 } 876 csrA = a->csrA; 877 csrP = p->csrA; 878 csrC = c->csrA; 879 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 880 descr_type_sym.mode = SPARSE_FILL_MODE_LOWER; 881 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 882 883 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 884 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT); 885 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr"); 886 887 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 888 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 889 890 PetscFunctionReturn(0); 891 } 892 #endif 893 894 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 895 PetscErrorCode MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 896 { 897 Mat_SeqAIJMKL *a, *p; 898 sparse_matrix_t csrA, csrP, csrC; 899 PetscBool set, flag; 900 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 901 struct matrix_descr descr_type_sym; 902 PetscObjectState state; 903 PetscErrorCode ierr; 904 905 PetscFunctionBegin; 906 ierr = MatIsSymmetricKnown(A,&set,&flag); 907 if (!set || (set && !flag)) { 908 ierr = MatPtAP_SeqAIJ_SeqAIJ(A,P,scall,fill,C);CHKERRQ(ierr); 909 PetscFunctionReturn(0); 910 } 911 912 if (scall == MAT_REUSE_MATRIX) { 913 ierr = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(A,P,*C);CHKERRQ(ierr); 914 PetscFunctionReturn(0); 915 } 916 917 a = (Mat_SeqAIJMKL*)A->spptr; 918 p = (Mat_SeqAIJMKL*)P->spptr; 919 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 920 if (!a->sparse_optimized || a->state != state) { 921 MatSeqAIJMKL_create_mkl_handle(A); 922 } 923 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 924 if (!p->sparse_optimized || p->state != state) { 925 MatSeqAIJMKL_create_mkl_handle(P); 926 } 927 csrA = a->csrA; 928 csrP = p->csrA; 929 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 930 descr_type_sym.mode = SPARSE_FILL_MODE_LOWER; 931 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 932 933 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 934 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT); 935 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete full mkl_sparse_sypr"); 936 937 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 938 ierr = MatSetOption(*C,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 939 940 PetscFunctionReturn(0); 941 } 942 #endif 943 944 /* This function prototype is needed in MatConvert_SeqAIJ_SeqAIJMKL(), below. */ 945 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 946 947 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 948 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 949 * routine, but can also be used to convert an assembled SeqAIJ matrix 950 * into a SeqAIJMKL one. */ 951 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 952 { 953 PetscErrorCode ierr; 954 Mat B = *newmat; 955 Mat_SeqAIJMKL *aijmkl; 956 PetscBool set; 957 PetscBool sametype; 958 959 PetscFunctionBegin; 960 if (reuse == MAT_INITIAL_MATRIX) { 961 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 962 } 963 964 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 965 if (sametype) PetscFunctionReturn(0); 966 967 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 968 B->spptr = (void*) aijmkl; 969 970 /* Set function pointers for methods that we inherit from AIJ but override. 971 * We also parse some command line options below, since those determine some of the methods we point to. */ 972 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 973 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 974 B->ops->destroy = MatDestroy_SeqAIJMKL; 975 976 aijmkl->sparse_optimized = PETSC_FALSE; 977 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 978 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 979 #else 980 aijmkl->no_SpMV2 = PETSC_TRUE; 981 #endif 982 aijmkl->eager_inspection = PETSC_FALSE; 983 984 /* Parse command line options. */ 985 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 986 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 987 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 988 ierr = PetscOptionsEnd();CHKERRQ(ierr); 989 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 990 if(!aijmkl->no_SpMV2) { 991 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 992 aijmkl->no_SpMV2 = PETSC_TRUE; 993 } 994 #endif 995 996 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 997 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 998 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 999 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1000 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 1001 B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 1002 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1003 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 1004 # if !defined(PETSC_USE_COMPLEX) 1005 B->ops->ptap = MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2; 1006 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 1007 # endif 1008 # endif 1009 B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 1010 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1011 1012 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1013 /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1014 * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1015 * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1016 * versions in which the older interface has not been deprecated, we use the old interface. */ 1017 if (aijmkl->no_SpMV2) { 1018 B->ops->mult = MatMult_SeqAIJMKL; 1019 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 1020 B->ops->multadd = MatMultAdd_SeqAIJMKL; 1021 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1022 } 1023 #endif 1024 1025 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 1026 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 1027 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 1028 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 1029 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqaijmkl_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 1030 if(!aijmkl->no_SpMV2) { 1031 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1032 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1033 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1034 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1035 #endif 1036 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1037 #endif 1038 } 1039 1040 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 1041 *newmat = B; 1042 PetscFunctionReturn(0); 1043 } 1044 1045 /*@C 1046 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 1047 This type inherits from AIJ and is largely identical, but uses sparse BLAS 1048 routines from Intel MKL whenever possible. 1049 If the installed version of MKL supports the "SpMV2" sparse 1050 inspector-executor routines, then those are used by default. 1051 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 1052 symmetric A) operations are currently supported. 1053 Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 1054 1055 Collective 1056 1057 Input Parameters: 1058 + comm - MPI communicator, set to PETSC_COMM_SELF 1059 . m - number of rows 1060 . n - number of columns 1061 . nz - number of nonzeros per row (same for all rows) 1062 - nnz - array containing the number of nonzeros in the various rows 1063 (possibly different for each row) or NULL 1064 1065 Output Parameter: 1066 . A - the matrix 1067 1068 Options Database Keys: 1069 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1070 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 1071 1072 Notes: 1073 If nnz is given then nz is ignored 1074 1075 Level: intermediate 1076 1077 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 1078 @*/ 1079 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1080 { 1081 PetscErrorCode ierr; 1082 1083 PetscFunctionBegin; 1084 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1085 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1086 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 1087 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } 1090 1091 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1092 { 1093 PetscErrorCode ierr; 1094 1095 PetscFunctionBegin; 1096 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1097 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 1098 PetscFunctionReturn(0); 1099 } 1100