1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 18 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 19 PetscObjectState state; 20 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 21 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 22 struct matrix_descr descr; 23 #endif 24 } Mat_SeqAIJMKL; 25 26 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 27 28 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 29 { 30 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 31 /* so we will ignore 'MatType type'. */ 32 PetscErrorCode ierr; 33 Mat B = *newmat; 34 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 35 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 36 #endif 37 38 PetscFunctionBegin; 39 if (reuse == MAT_INITIAL_MATRIX) { 40 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 41 } 42 43 /* Reset the original function pointers. */ 44 B->ops->duplicate = MatDuplicate_SeqAIJ; 45 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 46 B->ops->destroy = MatDestroy_SeqAIJ; 47 B->ops->mult = MatMult_SeqAIJ; 48 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 49 B->ops->multadd = MatMultAdd_SeqAIJ; 50 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 51 B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ; 52 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 53 B->ops->ptap = MatPtAP_SeqAIJ_SeqAIJ; 54 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 55 B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ; 56 57 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 58 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 59 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 60 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 61 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqaijmkl_C",NULL);CHKERRQ(ierr); 62 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 63 if(!aijmkl->no_SpMV2) { 64 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 65 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M) 66 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 67 #endif 68 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 69 } 70 71 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 72 * simply involves destroying the MKL sparse matrix handle and then freeing 73 * the spptr pointer. */ 74 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 75 76 if (aijmkl->sparse_optimized) { 77 sparse_status_t stat; 78 stat = mkl_sparse_destroy(aijmkl->csrA); 79 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 80 } 81 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 82 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 83 84 /* Change the type of B to MATSEQAIJ. */ 85 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 86 87 *newmat = B; 88 PetscFunctionReturn(0); 89 } 90 91 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 92 { 93 PetscErrorCode ierr; 94 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 95 96 PetscFunctionBegin; 97 98 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 99 * spptr pointer. */ 100 if (aijmkl) { 101 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 102 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 103 if (aijmkl->sparse_optimized) { 104 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 105 stat = mkl_sparse_destroy(aijmkl->csrA); 106 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 107 } 108 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 109 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 110 } 111 112 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 113 * to destroy everything that remains. */ 114 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 115 /* Note that I don't call MatSetType(). I believe this is because that 116 * is only to be called when *building* a matrix. I could be wrong, but 117 * that is how things work for the SuperLU matrix class. */ 118 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 123 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 124 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 125 * handle, creates a new one, and then calls mkl_sparse_optimize(). 126 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 127 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 128 * an unoptimized matrix handle here. */ 129 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 130 { 131 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 132 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 133 * does nothing. We make it callable anyway in this case because it cuts 134 * down on littering the code with #ifdefs. */ 135 PetscFunctionBegin; 136 PetscFunctionReturn(0); 137 #else 138 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 139 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 140 PetscInt m,n; 141 MatScalar *aa; 142 PetscInt *aj,*ai; 143 sparse_status_t stat; 144 PetscErrorCode ierr; 145 146 PetscFunctionBegin; 147 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 148 149 if (aijmkl->sparse_optimized) { 150 /* Matrix has been previously assembled and optimized. Must destroy old 151 * matrix handle before running the optimization step again. */ 152 stat = mkl_sparse_destroy(aijmkl->csrA); 153 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 154 } 155 aijmkl->sparse_optimized = PETSC_FALSE; 156 157 /* Now perform the SpMV2 setup and matrix optimization. */ 158 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 159 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 160 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 161 m = A->rmap->n; 162 n = A->cmap->n; 163 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 164 aa = a->a; /* Nonzero elements stored row-by-row. */ 165 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 166 if ((a->nz!=0) & !(A->structure_only)) { 167 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 168 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 169 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 170 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle"); 171 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 172 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 173 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 174 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 175 if (!aijmkl->no_SpMV2) { 176 stat = mkl_sparse_optimize(aijmkl->csrA); 177 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 178 } 179 aijmkl->sparse_optimized = PETSC_TRUE; 180 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 181 } 182 183 PetscFunctionReturn(0); 184 #endif 185 } 186 187 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 188 * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 189 * matrix handle. 190 * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as 191 * there is no good alternative. */ 192 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 193 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat) 194 { 195 PetscErrorCode ierr; 196 sparse_status_t stat; 197 sparse_index_base_t indexing; 198 PetscInt nrows, ncols; 199 PetscInt *aj,*ai,*dummy; 200 MatScalar *aa; 201 Mat A; 202 Mat_SeqAIJMKL *aijmkl; 203 204 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 205 stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 206 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 207 208 if (reuse == MAT_REUSE_MATRIX) { 209 ierr = MatDestroy(mat);CHKERRQ(ierr); 210 } 211 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 212 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 213 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 214 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 215 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 216 * they will be destroyed when the MKL handle is destroyed. 217 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 218 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 219 220 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 221 * Now turn it into a MATSEQAIJMKL. */ 222 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 223 224 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 225 aijmkl->csrA = csrA; 226 227 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 228 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 229 * and just need to be able to run the MKL optimization step. */ 230 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 231 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 232 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 233 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 234 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 235 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 236 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 237 if (!aijmkl->no_SpMV2) { 238 stat = mkl_sparse_optimize(aijmkl->csrA); 239 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 240 } 241 aijmkl->sparse_optimized = PETSC_TRUE; 242 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 243 244 *mat = A; 245 PetscFunctionReturn(0); 246 } 247 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 248 249 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 250 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 251 * MatMatMultNumeric(). */ 252 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 253 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 254 { 255 PetscInt i; 256 PetscInt nrows,ncols; 257 PetscInt nz; 258 PetscInt *ai,*aj,*dummy; 259 PetscScalar *aa; 260 PetscErrorCode ierr; 261 Mat_SeqAIJMKL *aijmkl; 262 sparse_status_t stat; 263 sparse_index_base_t indexing; 264 265 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 266 267 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 268 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 269 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 270 271 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 272 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 273 for (i=0; i<nrows; i++) { 274 nz = ai[i+1] - ai[i]; 275 ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr); 276 } 277 278 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 279 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 280 281 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 282 /* We mark our matrix as having a valid, optimized MKL handle. 283 * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */ 284 aijmkl->sparse_optimized = PETSC_TRUE; 285 286 PetscFunctionReturn(0); 287 } 288 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 289 290 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 291 { 292 PetscErrorCode ierr; 293 Mat_SeqAIJMKL *aijmkl; 294 Mat_SeqAIJMKL *aijmkl_dest; 295 296 PetscFunctionBegin; 297 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 298 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 299 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 300 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 301 aijmkl_dest->sparse_optimized = PETSC_FALSE; 302 if (aijmkl->eager_inspection) { 303 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 304 } 305 PetscFunctionReturn(0); 306 } 307 308 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 309 { 310 PetscErrorCode ierr; 311 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 312 Mat_SeqAIJMKL *aijmkl; 313 314 PetscFunctionBegin; 315 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 316 317 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 318 * extra information and some different methods, call the AssemblyEnd 319 * routine for a MATSEQAIJ. 320 * I'm not sure if this is the best way to do this, but it avoids 321 * a lot of code duplication. */ 322 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 323 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 324 325 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 326 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 327 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 328 if (aijmkl->eager_inspection) { 329 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 330 } 331 332 PetscFunctionReturn(0); 333 } 334 335 #if !defined(PETSC_HAVE_MKL_SPARSE_SP2M) 336 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 337 { 338 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 339 const PetscScalar *x; 340 PetscScalar *y; 341 const MatScalar *aa; 342 PetscErrorCode ierr; 343 PetscInt m=A->rmap->n; 344 PetscInt n=A->cmap->n; 345 PetscScalar alpha = 1.0; 346 PetscScalar beta = 0.0; 347 const PetscInt *aj,*ai; 348 char matdescra[6]; 349 350 351 /* Variables not in MatMult_SeqAIJ. */ 352 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 353 354 PetscFunctionBegin; 355 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 356 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 357 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 358 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 359 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 360 aa = a->a; /* Nonzero elements stored row-by-row. */ 361 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 362 363 /* Call MKL sparse BLAS routine to do the MatMult. */ 364 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 365 366 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 367 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 368 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 369 PetscFunctionReturn(0); 370 } 371 #endif 372 373 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 374 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 375 { 376 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 377 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 378 const PetscScalar *x; 379 PetscScalar *y; 380 PetscErrorCode ierr; 381 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 382 PetscObjectState state; 383 384 PetscFunctionBegin; 385 386 /* If there are no nonzero entries, zero yy and return immediately. */ 387 if(!a->nz) { 388 PetscInt i; 389 PetscInt m=A->rmap->n; 390 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 391 for (i=0; i<m; i++) { 392 y[i] = 0.0; 393 } 394 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 395 PetscFunctionReturn(0); 396 } 397 398 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 399 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 400 401 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 402 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 403 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 404 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 405 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 406 MatSeqAIJMKL_create_mkl_handle(A); 407 } 408 409 /* Call MKL SpMV2 executor routine to do the MatMult. */ 410 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 411 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 412 413 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 414 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 415 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 419 420 #if !defined(PETSC_HAVE_MKL_SPARSE_SP2M) 421 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 422 { 423 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 424 const PetscScalar *x; 425 PetscScalar *y; 426 const MatScalar *aa; 427 PetscErrorCode ierr; 428 PetscInt m=A->rmap->n; 429 PetscInt n=A->cmap->n; 430 PetscScalar alpha = 1.0; 431 PetscScalar beta = 0.0; 432 const PetscInt *aj,*ai; 433 char matdescra[6]; 434 435 /* Variables not in MatMultTranspose_SeqAIJ. */ 436 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 437 438 PetscFunctionBegin; 439 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 440 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 441 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 442 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 443 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 444 aa = a->a; /* Nonzero elements stored row-by-row. */ 445 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 446 447 /* Call MKL sparse BLAS routine to do the MatMult. */ 448 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 449 450 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 451 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 452 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 453 PetscFunctionReturn(0); 454 } 455 #endif 456 457 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 458 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 459 { 460 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 461 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 462 const PetscScalar *x; 463 PetscScalar *y; 464 PetscErrorCode ierr; 465 sparse_status_t stat; 466 PetscObjectState state; 467 468 PetscFunctionBegin; 469 470 /* If there are no nonzero entries, zero yy and return immediately. */ 471 if(!a->nz) { 472 PetscInt i; 473 PetscInt n=A->cmap->n; 474 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 475 for (i=0; i<n; i++) { 476 y[i] = 0.0; 477 } 478 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 479 PetscFunctionReturn(0); 480 } 481 482 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 483 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 484 485 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 486 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 487 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 488 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 489 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 490 MatSeqAIJMKL_create_mkl_handle(A); 491 } 492 493 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 494 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 495 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 496 497 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 498 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 499 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 500 PetscFunctionReturn(0); 501 } 502 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 503 504 #if !defined(PETSC_HAVE_MKL_SPARSE_SP2M) 505 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 506 { 507 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 508 const PetscScalar *x; 509 PetscScalar *y,*z; 510 const MatScalar *aa; 511 PetscErrorCode ierr; 512 PetscInt m=A->rmap->n; 513 PetscInt n=A->cmap->n; 514 const PetscInt *aj,*ai; 515 PetscInt i; 516 517 /* Variables not in MatMultAdd_SeqAIJ. */ 518 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 519 PetscScalar alpha = 1.0; 520 PetscScalar beta; 521 char matdescra[6]; 522 523 PetscFunctionBegin; 524 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 525 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 526 527 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 528 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 529 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 530 aa = a->a; /* Nonzero elements stored row-by-row. */ 531 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 532 533 /* Call MKL sparse BLAS routine to do the MatMult. */ 534 if (zz == yy) { 535 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 536 beta = 1.0; 537 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 538 } else { 539 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 540 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 541 beta = 0.0; 542 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 543 for (i=0; i<m; i++) { 544 z[i] += y[i]; 545 } 546 } 547 548 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 549 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 550 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 551 PetscFunctionReturn(0); 552 } 553 #endif 554 555 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 556 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 557 { 558 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 559 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 560 const PetscScalar *x; 561 PetscScalar *y,*z; 562 PetscErrorCode ierr; 563 PetscInt m=A->rmap->n; 564 PetscInt i; 565 566 /* Variables not in MatMultAdd_SeqAIJ. */ 567 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 568 PetscObjectState state; 569 570 PetscFunctionBegin; 571 572 /* If there are no nonzero entries, set zz = yy and return immediately. */ 573 if(!a->nz) { 574 PetscInt i; 575 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 576 for (i=0; i<m; i++) { 577 z[i] = y[i]; 578 } 579 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 580 PetscFunctionReturn(0); 581 } 582 583 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 584 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 585 586 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 587 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 588 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 589 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 590 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 591 MatSeqAIJMKL_create_mkl_handle(A); 592 } 593 594 /* Call MKL sparse BLAS routine to do the MatMult. */ 595 if (zz == yy) { 596 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 597 * with alpha and beta both set to 1.0. */ 598 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 599 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 600 } else { 601 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 602 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 603 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 604 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 605 for (i=0; i<m; i++) { 606 z[i] += y[i]; 607 } 608 } 609 610 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 611 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 612 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 613 PetscFunctionReturn(0); 614 } 615 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 616 617 #if !defined(PETSC_HAVE_MKL_SPARSE_SP2M) 618 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 619 { 620 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 621 const PetscScalar *x; 622 PetscScalar *y,*z; 623 const MatScalar *aa; 624 PetscErrorCode ierr; 625 PetscInt m=A->rmap->n; 626 PetscInt n=A->cmap->n; 627 const PetscInt *aj,*ai; 628 PetscInt i; 629 630 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 631 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 632 PetscScalar alpha = 1.0; 633 PetscScalar beta; 634 char matdescra[6]; 635 636 PetscFunctionBegin; 637 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 638 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 639 640 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 641 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 642 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 643 aa = a->a; /* Nonzero elements stored row-by-row. */ 644 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 645 646 /* Call MKL sparse BLAS routine to do the MatMult. */ 647 if (zz == yy) { 648 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 649 beta = 1.0; 650 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 651 } else { 652 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 653 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 654 beta = 0.0; 655 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 656 for (i=0; i<n; i++) { 657 z[i] += y[i]; 658 } 659 } 660 661 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 662 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 663 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 #endif 667 668 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 669 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 670 { 671 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 672 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 673 const PetscScalar *x; 674 PetscScalar *y,*z; 675 PetscErrorCode ierr; 676 PetscInt n=A->cmap->n; 677 PetscInt i; 678 PetscObjectState state; 679 680 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 681 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 682 683 PetscFunctionBegin; 684 685 /* If there are no nonzero entries, set zz = yy and return immediately. */ 686 if(!a->nz) { 687 PetscInt i; 688 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 689 for (i=0; i<n; i++) { 690 z[i] = y[i]; 691 } 692 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 693 PetscFunctionReturn(0); 694 } 695 696 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 697 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 698 699 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 700 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 701 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 702 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 703 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 704 MatSeqAIJMKL_create_mkl_handle(A); 705 } 706 707 /* Call MKL sparse BLAS routine to do the MatMult. */ 708 if (zz == yy) { 709 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 710 * with alpha and beta both set to 1.0. */ 711 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 712 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 713 } else { 714 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 715 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 716 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 717 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 718 for (i=0; i<n; i++) { 719 z[i] += y[i]; 720 } 721 } 722 723 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 724 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 725 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 726 PetscFunctionReturn(0); 727 } 728 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 729 730 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 731 /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because 732 * the MatMatMult() interface code calls MatMatMultNumeric() in this case. 733 * For releases of MKL prior to version 18, update 2: 734 * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 735 * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 736 * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 737 * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */ 738 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 739 { 740 Mat_SeqAIJMKL *a, *b; 741 sparse_matrix_t csrA, csrB, csrC; 742 PetscErrorCode ierr; 743 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 744 PetscObjectState state; 745 746 PetscFunctionBegin; 747 a = (Mat_SeqAIJMKL*)A->spptr; 748 b = (Mat_SeqAIJMKL*)B->spptr; 749 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 750 if (!a->sparse_optimized || a->state != state) { 751 MatSeqAIJMKL_create_mkl_handle(A); 752 } 753 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 754 if (!b->sparse_optimized || b->state != state) { 755 MatSeqAIJMKL_create_mkl_handle(B); 756 } 757 csrA = a->csrA; 758 csrB = b->csrA; 759 760 stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC); 761 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 762 763 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 764 765 PetscFunctionReturn(0); 766 } 767 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 768 769 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M) 770 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C) 771 { 772 Mat_SeqAIJMKL *a, *b, *c; 773 sparse_matrix_t csrA, csrB, csrC; 774 PetscErrorCode ierr; 775 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 776 struct matrix_descr descr_type_gen; 777 PetscObjectState state; 778 779 PetscFunctionBegin; 780 a = (Mat_SeqAIJMKL*)A->spptr; 781 b = (Mat_SeqAIJMKL*)B->spptr; 782 c = (Mat_SeqAIJMKL*)C->spptr; 783 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 784 if (!a->sparse_optimized || a->state != state) { 785 MatSeqAIJMKL_create_mkl_handle(A); 786 } 787 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 788 if (!b->sparse_optimized || b->state != state) { 789 MatSeqAIJMKL_create_mkl_handle(B); 790 } 791 csrA = a->csrA; 792 csrB = b->csrA; 793 csrC = c->csrA; 794 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 795 796 stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA, 797 SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB, 798 SPARSE_STAGE_FINALIZE_MULT,&csrC); 799 800 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply"); 801 802 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 803 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 804 805 PetscFunctionReturn(0); 806 } 807 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M */ 808 809 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 810 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 811 { 812 Mat_SeqAIJMKL *a, *b; 813 sparse_matrix_t csrA, csrB, csrC; 814 PetscErrorCode ierr; 815 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 816 PetscObjectState state; 817 818 PetscFunctionBegin; 819 a = (Mat_SeqAIJMKL*)A->spptr; 820 b = (Mat_SeqAIJMKL*)B->spptr; 821 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 822 if (!a->sparse_optimized || a->state != state) { 823 MatSeqAIJMKL_create_mkl_handle(A); 824 } 825 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 826 if (!b->sparse_optimized || b->state != state) { 827 MatSeqAIJMKL_create_mkl_handle(B); 828 } 829 csrA = a->csrA; 830 csrB = b->csrA; 831 832 stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC); 833 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 834 835 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 836 837 PetscFunctionReturn(0); 838 } 839 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 840 841 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M) 842 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C) 843 { 844 Mat_SeqAIJMKL *a, *p, *c; 845 sparse_matrix_t csrA, csrP, csrC; 846 PetscBool set, flag; 847 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 848 struct matrix_descr descr_type_sym; 849 PetscObjectState state; 850 PetscErrorCode ierr; 851 852 PetscFunctionBegin; 853 ierr = MatIsSymmetricKnown(A,&set,&flag); 854 if (!set || (set && !flag)) { 855 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr); 856 PetscFunctionReturn(0); 857 } 858 859 a = (Mat_SeqAIJMKL*)A->spptr; 860 p = (Mat_SeqAIJMKL*)P->spptr; 861 c = (Mat_SeqAIJMKL*)C->spptr; 862 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 863 if (!a->sparse_optimized || a->state != state) { 864 MatSeqAIJMKL_create_mkl_handle(A); 865 } 866 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 867 if (!p->sparse_optimized || p->state != state) { 868 MatSeqAIJMKL_create_mkl_handle(P); 869 } 870 csrA = a->csrA; 871 csrP = p->csrA; 872 csrC = c->csrA; 873 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 874 descr_type_sym.mode = SPARSE_FILL_MODE_LOWER; 875 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 876 877 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 878 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT); 879 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr"); 880 881 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 882 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 883 884 PetscFunctionReturn(0); 885 } 886 #endif 887 888 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M) 889 PetscErrorCode MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 890 { 891 Mat_SeqAIJMKL *a, *p; 892 sparse_matrix_t csrA, csrP, csrC; 893 PetscBool set, flag; 894 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 895 struct matrix_descr descr_type_sym; 896 PetscObjectState state; 897 PetscErrorCode ierr; 898 899 PetscFunctionBegin; 900 ierr = MatIsSymmetricKnown(A,&set,&flag); 901 if (!set || (set && !flag)) { 902 ierr = MatPtAP_SeqAIJ_SeqAIJ(A,P,scall,fill,C);CHKERRQ(ierr); 903 PetscFunctionReturn(0); 904 } 905 906 if (scall == MAT_REUSE_MATRIX) { 907 ierr = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(A,P,*C);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 a = (Mat_SeqAIJMKL*)A->spptr; 912 p = (Mat_SeqAIJMKL*)P->spptr; 913 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 914 if (!a->sparse_optimized || a->state != state) { 915 MatSeqAIJMKL_create_mkl_handle(A); 916 } 917 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 918 if (!p->sparse_optimized || p->state != state) { 919 MatSeqAIJMKL_create_mkl_handle(P); 920 } 921 csrA = a->csrA; 922 csrP = p->csrA; 923 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 924 descr_type_sym.mode = SPARSE_FILL_MODE_LOWER; 925 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 926 927 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 928 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT); 929 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete full mkl_sparse_sypr"); 930 931 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 932 ierr = MatSetOption(*C,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 933 934 PetscFunctionReturn(0); 935 } 936 #endif 937 938 /* This function prototype is needed in MatConvert_SeqAIJ_SeqAIJMKL(), below. */ 939 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 940 941 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 942 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 943 * routine, but can also be used to convert an assembled SeqAIJ matrix 944 * into a SeqAIJMKL one. */ 945 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 946 { 947 PetscErrorCode ierr; 948 Mat B = *newmat; 949 Mat_SeqAIJMKL *aijmkl; 950 PetscBool set; 951 PetscBool sametype; 952 953 PetscFunctionBegin; 954 if (reuse == MAT_INITIAL_MATRIX) { 955 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 956 } 957 958 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 959 if (sametype) PetscFunctionReturn(0); 960 961 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 962 B->spptr = (void*) aijmkl; 963 964 /* Set function pointers for methods that we inherit from AIJ but override. 965 * We also parse some command line options below, since those determine some of the methods we point to. */ 966 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 967 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 968 B->ops->destroy = MatDestroy_SeqAIJMKL; 969 970 aijmkl->sparse_optimized = PETSC_FALSE; 971 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 972 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 973 #else 974 aijmkl->no_SpMV2 = PETSC_TRUE; 975 #endif 976 aijmkl->eager_inspection = PETSC_FALSE; 977 978 /* Parse command line options. */ 979 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 980 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 981 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 982 ierr = PetscOptionsEnd();CHKERRQ(ierr); 983 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 984 if(!aijmkl->no_SpMV2) { 985 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 986 aijmkl->no_SpMV2 = PETSC_TRUE; 987 } 988 #endif 989 990 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 991 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 992 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 993 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 994 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 995 B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 996 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M) 997 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 998 # if !defined(PETSC_USE_COMPLEX) 999 B->ops->ptap = MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2; 1000 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 1001 # endif 1002 # endif 1003 B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 1004 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1005 1006 #if !defined(PETSC_HAVE_MKL_SPARSE_SP2M) 1007 /* In the same release in which MKL introduced mkl_sparse_sp2m() (version 18, update 2), the old sparse BLAS interfaces were 1008 * marked as deprecated. If "no_SpMV2" has been specified by the user and MKL 18u2 or later is being used, we use the new 1009 * _SpMV2 routines (set above), but do not call mkl_sparse_optimize(), which results in the old numerical kernels (without the 1010 * inspector-executor model) being used. For versions in which the older interface has not been deprecated, we use the old 1011 * interface. */ 1012 if (aijmkl->no_SpMV2) { 1013 B->ops->mult = MatMult_SeqAIJMKL; 1014 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 1015 B->ops->multadd = MatMultAdd_SeqAIJMKL; 1016 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1017 } 1018 #endif 1019 1020 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 1021 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 1022 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 1023 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 1024 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqaijmkl_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 1025 if(!aijmkl->no_SpMV2) { 1026 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1027 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1028 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M) 1029 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1030 #endif 1031 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1032 #endif 1033 } 1034 1035 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 1036 *newmat = B; 1037 PetscFunctionReturn(0); 1038 } 1039 1040 /*@C 1041 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 1042 This type inherits from AIJ and is largely identical, but uses sparse BLAS 1043 routines from Intel MKL whenever possible. 1044 If the installed version of MKL supports the "SpMV2" sparse 1045 inspector-executor routines, then those are used by default. 1046 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 1047 symmetric A) operations are currently supported. 1048 Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 1049 1050 Collective on MPI_Comm 1051 1052 Input Parameters: 1053 + comm - MPI communicator, set to PETSC_COMM_SELF 1054 . m - number of rows 1055 . n - number of columns 1056 . nz - number of nonzeros per row (same for all rows) 1057 - nnz - array containing the number of nonzeros in the various rows 1058 (possibly different for each row) or NULL 1059 1060 Output Parameter: 1061 . A - the matrix 1062 1063 Options Database Keys: 1064 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1065 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 1066 1067 Notes: 1068 If nnz is given then nz is ignored 1069 1070 Level: intermediate 1071 1072 .keywords: matrix, MKL, sparse, parallel 1073 1074 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 1075 @*/ 1076 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1077 { 1078 PetscErrorCode ierr; 1079 1080 PetscFunctionBegin; 1081 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1082 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1083 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 1084 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1089 { 1090 PetscErrorCode ierr; 1091 1092 PetscFunctionBegin; 1093 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1094 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 1095 PetscFunctionReturn(0); 1096 } 1097