1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 18 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 19 PetscObjectState state; 20 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 21 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 22 struct matrix_descr descr; 23 #endif 24 } Mat_SeqAIJMKL; 25 26 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 27 28 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 29 { 30 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 31 /* so we will ignore 'MatType type'. */ 32 PetscErrorCode ierr; 33 Mat B = *newmat; 34 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 35 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 36 #endif 37 38 PetscFunctionBegin; 39 if (reuse == MAT_INITIAL_MATRIX) { 40 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 41 } 42 43 /* Reset the original function pointers. */ 44 B->ops->duplicate = MatDuplicate_SeqAIJ; 45 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 46 B->ops->destroy = MatDestroy_SeqAIJ; 47 B->ops->mult = MatMult_SeqAIJ; 48 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 49 B->ops->multadd = MatMultAdd_SeqAIJ; 50 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 51 B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ; 52 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 53 B->ops->ptap = MatPtAP_SeqAIJ_SeqAIJ; 54 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 55 B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ; 56 B->ops->scale = MatScale_SeqAIJ; 57 B->ops->diagonalscale = MatDiagonalScale_SeqAIJ; 58 B->ops->diagonalset = MatDiagonalSet_SeqAIJ; 59 B->ops->axpy = MatAXPY_SeqAIJ; 60 61 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 62 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 63 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 64 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 65 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 66 if(!aijmkl->no_SpMV2) { 67 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 68 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 69 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 70 #endif 71 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 72 } 73 74 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 75 * simply involves destroying the MKL sparse matrix handle and then freeing 76 * the spptr pointer. */ 77 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 78 79 if (aijmkl->sparse_optimized) { 80 sparse_status_t stat; 81 stat = mkl_sparse_destroy(aijmkl->csrA); 82 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 83 } 84 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 85 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 86 87 /* Change the type of B to MATSEQAIJ. */ 88 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 89 90 *newmat = B; 91 PetscFunctionReturn(0); 92 } 93 94 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 95 { 96 PetscErrorCode ierr; 97 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 98 99 PetscFunctionBegin; 100 101 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 102 * spptr pointer. */ 103 if (aijmkl) { 104 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 105 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 106 if (aijmkl->sparse_optimized) { 107 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 108 stat = mkl_sparse_destroy(aijmkl->csrA); 109 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 110 } 111 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 112 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 113 } 114 115 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 116 * to destroy everything that remains. */ 117 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 118 /* Note that I don't call MatSetType(). I believe this is because that 119 * is only to be called when *building* a matrix. I could be wrong, but 120 * that is how things work for the SuperLU matrix class. */ 121 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 122 PetscFunctionReturn(0); 123 } 124 125 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 126 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 127 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 128 * handle, creates a new one, and then calls mkl_sparse_optimize(). 129 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 130 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 131 * an unoptimized matrix handle here. */ 132 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 133 { 134 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 135 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 136 * does nothing. We make it callable anyway in this case because it cuts 137 * down on littering the code with #ifdefs. */ 138 PetscFunctionBegin; 139 PetscFunctionReturn(0); 140 #else 141 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 142 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 143 PetscInt m,n; 144 MatScalar *aa; 145 PetscInt *aj,*ai; 146 sparse_status_t stat; 147 PetscErrorCode ierr; 148 PetscObjectState state; 149 150 PetscFunctionBegin; 151 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 152 153 if (aijmkl->sparse_optimized) { 154 /* Matrix has been previously assembled and optimized. Must destroy old 155 * matrix handle before running the optimization step again. */ 156 stat = mkl_sparse_destroy(aijmkl->csrA); 157 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 158 } 159 aijmkl->sparse_optimized = PETSC_FALSE; 160 161 /* Now perform the SpMV2 setup and matrix optimization. */ 162 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 163 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 164 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 165 m = A->rmap->n; 166 n = A->cmap->n; 167 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 168 aa = a->a; /* Nonzero elements stored row-by-row. */ 169 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 170 if ((a->nz!=0) & !(A->structure_only)) { 171 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 172 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 173 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 174 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle"); 175 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 176 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 177 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 178 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 179 stat = mkl_sparse_optimize(aijmkl->csrA); 180 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 181 aijmkl->sparse_optimized = PETSC_TRUE; 182 } 183 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 184 185 PetscFunctionReturn(0); 186 #endif 187 } 188 189 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 190 * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 191 * matrix handle. 192 * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as 193 * there is no good alternative. */ 194 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 195 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat) 196 { 197 PetscErrorCode ierr; 198 sparse_status_t stat; 199 sparse_index_base_t indexing; 200 PetscInt nrows, ncols; 201 PetscInt *aj,*ai,*dummy; 202 MatScalar *aa; 203 Mat A; 204 Mat_SeqAIJMKL *aijmkl; 205 206 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 207 stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 208 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 209 210 if (reuse == MAT_REUSE_MATRIX) { 211 ierr = MatDestroy(mat);CHKERRQ(ierr); 212 } 213 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 214 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 215 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 216 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 217 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 218 * they will be destroyed when the MKL handle is destroyed. 219 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 220 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 221 222 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 223 * Now turn it into a MATSEQAIJMKL. */ 224 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 225 226 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 227 aijmkl->csrA = csrA; 228 229 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 230 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 231 * and just need to be able to run the MKL optimization step. */ 232 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 233 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 234 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 235 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 236 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 237 stat = mkl_sparse_optimize(aijmkl->csrA); 238 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 239 aijmkl->sparse_optimized = PETSC_TRUE; 240 241 *mat = A; 242 PetscFunctionReturn(0); 243 } 244 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 245 246 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 247 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 248 * MatMatMultNumeric(). */ 249 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 250 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 251 { 252 PetscInt i; 253 PetscInt nrows,ncols; 254 PetscInt nz; 255 PetscInt *ai,*aj,*dummy; 256 PetscScalar *aa; 257 PetscErrorCode ierr; 258 Mat_SeqAIJMKL *aijmkl; 259 sparse_status_t stat; 260 sparse_index_base_t indexing; 261 262 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 263 264 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 265 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 266 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 267 268 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 269 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 270 for (i=0; i<nrows; i++) { 271 nz = ai[i+1] - ai[i]; 272 ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr); 273 } 274 275 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 276 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 277 278 PetscFunctionReturn(0); 279 } 280 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 281 282 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 283 { 284 PetscErrorCode ierr; 285 Mat_SeqAIJMKL *aijmkl; 286 Mat_SeqAIJMKL *aijmkl_dest; 287 288 PetscFunctionBegin; 289 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 290 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 291 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 292 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 293 aijmkl_dest->sparse_optimized = PETSC_FALSE; 294 if (aijmkl->eager_inspection) { 295 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 296 } 297 PetscFunctionReturn(0); 298 } 299 300 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 301 { 302 PetscErrorCode ierr; 303 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 304 Mat_SeqAIJMKL *aijmkl; 305 306 PetscFunctionBegin; 307 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 308 309 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 310 * extra information and some different methods, call the AssemblyEnd 311 * routine for a MATSEQAIJ. 312 * I'm not sure if this is the best way to do this, but it avoids 313 * a lot of code duplication. */ 314 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 315 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 316 317 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 318 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 319 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 320 if (aijmkl->eager_inspection) { 321 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 322 } 323 324 PetscFunctionReturn(0); 325 } 326 327 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 328 { 329 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 330 const PetscScalar *x; 331 PetscScalar *y; 332 const MatScalar *aa; 333 PetscErrorCode ierr; 334 PetscInt m=A->rmap->n; 335 PetscInt n=A->cmap->n; 336 PetscScalar alpha = 1.0; 337 PetscScalar beta = 0.0; 338 const PetscInt *aj,*ai; 339 char matdescra[6]; 340 341 342 /* Variables not in MatMult_SeqAIJ. */ 343 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 344 345 PetscFunctionBegin; 346 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 347 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 348 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 349 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 350 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 351 aa = a->a; /* Nonzero elements stored row-by-row. */ 352 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 353 354 /* Call MKL sparse BLAS routine to do the MatMult. */ 355 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 356 357 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 358 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 359 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 360 PetscFunctionReturn(0); 361 } 362 363 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 364 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 365 { 366 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 367 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 368 const PetscScalar *x; 369 PetscScalar *y; 370 PetscErrorCode ierr; 371 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 372 PetscObjectState state; 373 374 PetscFunctionBegin; 375 376 /* If there are no nonzero entries, zero yy and return immediately. */ 377 if(!a->nz) { 378 PetscInt i; 379 PetscInt m=A->rmap->n; 380 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 381 for (i=0; i<m; i++) { 382 y[i] = 0.0; 383 } 384 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 389 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 390 391 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 392 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 393 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 394 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 395 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 396 MatSeqAIJMKL_create_mkl_handle(A); 397 } 398 399 /* Call MKL SpMV2 executor routine to do the MatMult. */ 400 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 401 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 402 403 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 404 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 405 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 406 PetscFunctionReturn(0); 407 } 408 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 409 410 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 411 { 412 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 413 const PetscScalar *x; 414 PetscScalar *y; 415 const MatScalar *aa; 416 PetscErrorCode ierr; 417 PetscInt m=A->rmap->n; 418 PetscInt n=A->cmap->n; 419 PetscScalar alpha = 1.0; 420 PetscScalar beta = 0.0; 421 const PetscInt *aj,*ai; 422 char matdescra[6]; 423 424 /* Variables not in MatMultTranspose_SeqAIJ. */ 425 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 426 427 PetscFunctionBegin; 428 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 429 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 430 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 431 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 432 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 433 aa = a->a; /* Nonzero elements stored row-by-row. */ 434 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 435 436 /* Call MKL sparse BLAS routine to do the MatMult. */ 437 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 438 439 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 440 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 441 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 442 PetscFunctionReturn(0); 443 } 444 445 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 446 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 447 { 448 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 449 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 450 const PetscScalar *x; 451 PetscScalar *y; 452 PetscErrorCode ierr; 453 sparse_status_t stat; 454 PetscObjectState state; 455 456 PetscFunctionBegin; 457 458 /* If there are no nonzero entries, zero yy and return immediately. */ 459 if(!a->nz) { 460 PetscInt i; 461 PetscInt n=A->cmap->n; 462 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 463 for (i=0; i<n; i++) { 464 y[i] = 0.0; 465 } 466 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 467 PetscFunctionReturn(0); 468 } 469 470 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 471 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 472 473 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 474 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 475 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 476 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 477 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 478 MatSeqAIJMKL_create_mkl_handle(A); 479 } 480 481 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 482 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 483 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 484 485 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 486 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 487 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 491 492 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 493 { 494 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 495 const PetscScalar *x; 496 PetscScalar *y,*z; 497 const MatScalar *aa; 498 PetscErrorCode ierr; 499 PetscInt m=A->rmap->n; 500 PetscInt n=A->cmap->n; 501 const PetscInt *aj,*ai; 502 PetscInt i; 503 504 /* Variables not in MatMultAdd_SeqAIJ. */ 505 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 506 PetscScalar alpha = 1.0; 507 PetscScalar beta; 508 char matdescra[6]; 509 510 PetscFunctionBegin; 511 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 512 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 513 514 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 515 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 516 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 517 aa = a->a; /* Nonzero elements stored row-by-row. */ 518 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 519 520 /* Call MKL sparse BLAS routine to do the MatMult. */ 521 if (zz == yy) { 522 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 523 beta = 1.0; 524 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 525 } else { 526 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 527 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 528 beta = 0.0; 529 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 530 for (i=0; i<m; i++) { 531 z[i] += y[i]; 532 } 533 } 534 535 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 536 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 537 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 538 PetscFunctionReturn(0); 539 } 540 541 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 542 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 543 { 544 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 545 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 546 const PetscScalar *x; 547 PetscScalar *y,*z; 548 PetscErrorCode ierr; 549 PetscInt m=A->rmap->n; 550 PetscInt i; 551 552 /* Variables not in MatMultAdd_SeqAIJ. */ 553 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 554 PetscObjectState state; 555 556 PetscFunctionBegin; 557 558 /* If there are no nonzero entries, set zz = yy and return immediately. */ 559 if(!a->nz) { 560 PetscInt i; 561 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 562 for (i=0; i<m; i++) { 563 z[i] = y[i]; 564 } 565 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 566 PetscFunctionReturn(0); 567 } 568 569 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 570 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 571 572 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 573 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 574 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 575 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 576 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 577 MatSeqAIJMKL_create_mkl_handle(A); 578 } 579 580 /* Call MKL sparse BLAS routine to do the MatMult. */ 581 if (zz == yy) { 582 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 583 * with alpha and beta both set to 1.0. */ 584 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 585 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 586 } else { 587 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 588 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 589 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 590 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 591 for (i=0; i<m; i++) { 592 z[i] += y[i]; 593 } 594 } 595 596 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 597 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 598 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 599 PetscFunctionReturn(0); 600 } 601 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 602 603 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 604 { 605 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 606 const PetscScalar *x; 607 PetscScalar *y,*z; 608 const MatScalar *aa; 609 PetscErrorCode ierr; 610 PetscInt m=A->rmap->n; 611 PetscInt n=A->cmap->n; 612 const PetscInt *aj,*ai; 613 PetscInt i; 614 615 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 616 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 617 PetscScalar alpha = 1.0; 618 PetscScalar beta; 619 char matdescra[6]; 620 621 PetscFunctionBegin; 622 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 623 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 624 625 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 626 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 627 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 628 aa = a->a; /* Nonzero elements stored row-by-row. */ 629 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 630 631 /* Call MKL sparse BLAS routine to do the MatMult. */ 632 if (zz == yy) { 633 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 634 beta = 1.0; 635 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 636 } else { 637 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 638 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 639 beta = 0.0; 640 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 641 for (i=0; i<n; i++) { 642 z[i] += y[i]; 643 } 644 } 645 646 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 647 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 648 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 649 PetscFunctionReturn(0); 650 } 651 652 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 653 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 654 { 655 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 656 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 657 const PetscScalar *x; 658 PetscScalar *y,*z; 659 PetscErrorCode ierr; 660 PetscInt n=A->cmap->n; 661 PetscInt i; 662 PetscObjectState state; 663 664 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 665 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 666 667 PetscFunctionBegin; 668 669 /* If there are no nonzero entries, set zz = yy and return immediately. */ 670 if(!a->nz) { 671 PetscInt i; 672 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 673 for (i=0; i<n; i++) { 674 z[i] = y[i]; 675 } 676 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 680 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 681 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 682 683 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 684 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 685 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 686 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 687 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 688 MatSeqAIJMKL_create_mkl_handle(A); 689 } 690 691 /* Call MKL sparse BLAS routine to do the MatMult. */ 692 if (zz == yy) { 693 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 694 * with alpha and beta both set to 1.0. */ 695 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 696 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 697 } else { 698 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 699 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 700 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 701 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 702 for (i=0; i<n; i++) { 703 z[i] += y[i]; 704 } 705 } 706 707 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 708 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 709 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 710 PetscFunctionReturn(0); 711 } 712 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 713 714 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 715 /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because 716 * the MatMatMult() interface code calls MatMatMultNumeric() in this case. 717 * For releases of MKL prior to version 18, update 2: 718 * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 719 * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 720 * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 721 * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */ 722 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 723 { 724 Mat_SeqAIJMKL *a, *b; 725 sparse_matrix_t csrA, csrB, csrC; 726 PetscErrorCode ierr; 727 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 728 PetscObjectState state; 729 730 PetscFunctionBegin; 731 a = (Mat_SeqAIJMKL*)A->spptr; 732 b = (Mat_SeqAIJMKL*)B->spptr; 733 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 734 if (!a->sparse_optimized || a->state != state) { 735 MatSeqAIJMKL_create_mkl_handle(A); 736 } 737 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 738 if (!b->sparse_optimized || b->state != state) { 739 MatSeqAIJMKL_create_mkl_handle(B); 740 } 741 csrA = a->csrA; 742 csrB = b->csrA; 743 744 stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC); 745 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 746 747 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 748 749 PetscFunctionReturn(0); 750 } 751 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 752 753 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 754 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C) 755 { 756 Mat_SeqAIJMKL *a, *b, *c; 757 sparse_matrix_t csrA, csrB, csrC; 758 PetscErrorCode ierr; 759 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 760 struct matrix_descr descr_type_gen; 761 PetscObjectState state; 762 763 PetscFunctionBegin; 764 a = (Mat_SeqAIJMKL*)A->spptr; 765 b = (Mat_SeqAIJMKL*)B->spptr; 766 c = (Mat_SeqAIJMKL*)C->spptr; 767 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 768 if (!a->sparse_optimized || a->state != state) { 769 MatSeqAIJMKL_create_mkl_handle(A); 770 } 771 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 772 if (!b->sparse_optimized || b->state != state) { 773 MatSeqAIJMKL_create_mkl_handle(B); 774 } 775 csrA = a->csrA; 776 csrB = b->csrA; 777 csrC = c->csrA; 778 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 779 780 stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA, 781 SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB, 782 SPARSE_STAGE_FINALIZE_MULT,&csrC); 783 784 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply"); 785 786 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 787 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 788 789 PetscFunctionReturn(0); 790 } 791 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M */ 792 793 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 794 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 795 { 796 Mat_SeqAIJMKL *a, *b; 797 sparse_matrix_t csrA, csrB, csrC; 798 PetscErrorCode ierr; 799 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 800 PetscObjectState state; 801 802 PetscFunctionBegin; 803 a = (Mat_SeqAIJMKL*)A->spptr; 804 b = (Mat_SeqAIJMKL*)B->spptr; 805 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 806 if (!a->sparse_optimized || a->state != state) { 807 MatSeqAIJMKL_create_mkl_handle(A); 808 } 809 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 810 if (!b->sparse_optimized || b->state != state) { 811 MatSeqAIJMKL_create_mkl_handle(B); 812 } 813 csrA = a->csrA; 814 csrB = b->csrA; 815 816 stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC); 817 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 818 819 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 820 821 PetscFunctionReturn(0); 822 } 823 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 824 825 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 826 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C) 827 { 828 Mat_SeqAIJMKL *a, *p, *c; 829 sparse_matrix_t csrA, csrP, csrC; 830 PetscBool set, flag; 831 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 832 struct matrix_descr descr_type_gen; 833 PetscObjectState state; 834 PetscErrorCode ierr; 835 836 PetscFunctionBegin; 837 ierr = MatIsSymmetricKnown(A,&set,&flag); 838 if (!set || (set && !flag)) { 839 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr); 840 PetscFunctionReturn(0); 841 } 842 843 a = (Mat_SeqAIJMKL*)A->spptr; 844 p = (Mat_SeqAIJMKL*)P->spptr; 845 c = (Mat_SeqAIJMKL*)C->spptr; 846 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 847 if (!a->sparse_optimized || a->state != state) { 848 MatSeqAIJMKL_create_mkl_handle(A); 849 } 850 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 851 if (!p->sparse_optimized || p->state != state) { 852 MatSeqAIJMKL_create_mkl_handle(P); 853 } 854 csrA = a->csrA; 855 csrP = p->csrA; 856 csrC = c->csrA; 857 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 858 859 /* TODO: Below won't work for complex matrix. Protect this! Maybe where function pointers are assigned in MatConvert? */ 860 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_gen,&csrC,SPARSE_STAGE_FINALIZE_MULT); 861 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr"); 862 863 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 864 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 865 866 PetscFunctionReturn(0); 867 } 868 #endif 869 870 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 871 PetscErrorCode MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 872 { 873 Mat_SeqAIJMKL *a, *p; 874 sparse_matrix_t csrA, csrP, csrC; 875 PetscBool set, flag; 876 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 877 struct matrix_descr descr_type_gen; 878 PetscObjectState state; 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 ierr = MatIsSymmetricKnown(A,&set,&flag); 883 if (!set || (set && !flag)) { 884 ierr = MatPtAP_SeqAIJ_SeqAIJ(A,P,scall,fill,C);CHKERRQ(ierr); 885 PetscFunctionReturn(0); 886 } 887 888 if (scall == MAT_REUSE_MATRIX) { 889 ierr = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(A,P,*C);CHKERRQ(ierr); 890 PetscFunctionReturn(0); 891 } 892 893 a = (Mat_SeqAIJMKL*)A->spptr; 894 p = (Mat_SeqAIJMKL*)P->spptr; 895 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 896 if (!a->sparse_optimized || a->state != state) { 897 MatSeqAIJMKL_create_mkl_handle(A); 898 } 899 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 900 if (!p->sparse_optimized || p->state != state) { 901 MatSeqAIJMKL_create_mkl_handle(P); 902 } 903 csrA = a->csrA; 904 csrP = p->csrA; 905 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 906 907 /* TODO: Below won't work for complex matrix. Protect this! Maybe where function pointers are assigned in MatConvert? */ 908 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_gen,&csrC,SPARSE_STAGE_FULL_MULT); 909 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete full mkl_sparse_sypr"); 910 911 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 912 ierr = MatSetOption(*C,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 913 914 PetscFunctionReturn(0); 915 } 916 #endif 917 918 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha) 919 { 920 PetscErrorCode ierr; 921 922 PetscFunctionBegin; 923 ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr); 924 ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 925 PetscFunctionReturn(0); 926 } 927 928 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr) 929 { 930 PetscErrorCode ierr; 931 932 PetscFunctionBegin; 933 ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr); 934 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 935 PetscFunctionReturn(0); 936 } 937 938 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is) 939 { 940 PetscErrorCode ierr; 941 942 PetscFunctionBegin; 943 ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr); 944 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 945 PetscFunctionReturn(0); 946 } 947 948 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 949 { 950 PetscErrorCode ierr; 951 952 PetscFunctionBegin; 953 ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr); 954 if (str == SAME_NONZERO_PATTERN) { 955 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 956 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 957 } 958 PetscFunctionReturn(0); 959 } 960 961 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 962 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 963 * routine, but can also be used to convert an assembled SeqAIJ matrix 964 * into a SeqAIJMKL one. */ 965 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 966 { 967 PetscErrorCode ierr; 968 Mat B = *newmat; 969 Mat_SeqAIJMKL *aijmkl; 970 PetscBool set; 971 PetscBool sametype; 972 973 PetscFunctionBegin; 974 if (reuse == MAT_INITIAL_MATRIX) { 975 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 976 } 977 978 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 979 if (sametype) PetscFunctionReturn(0); 980 981 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 982 B->spptr = (void*) aijmkl; 983 984 /* Set function pointers for methods that we inherit from AIJ but override. 985 * We also parse some command line options below, since those determine some of the methods we point to. */ 986 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 987 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 988 B->ops->destroy = MatDestroy_SeqAIJMKL; 989 990 aijmkl->sparse_optimized = PETSC_FALSE; 991 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 992 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 993 #else 994 aijmkl->no_SpMV2 = PETSC_TRUE; 995 #endif 996 aijmkl->eager_inspection = PETSC_FALSE; 997 998 /* Parse command line options. */ 999 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 1000 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 1001 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 1002 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1003 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 1004 if(!aijmkl->no_SpMV2) { 1005 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 1006 aijmkl->no_SpMV2 = PETSC_TRUE; 1007 } 1008 #endif 1009 1010 if(!aijmkl->no_SpMV2) { 1011 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 1012 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 1013 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 1014 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1015 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 1016 B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 1017 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 1018 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 1019 #ifndef PETSC_USE_COMPLEX 1020 B->ops->ptap = MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2; 1021 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 1022 #endif 1023 #endif 1024 B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 1025 #endif 1026 } else { 1027 B->ops->mult = MatMult_SeqAIJMKL; 1028 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 1029 B->ops->multadd = MatMultAdd_SeqAIJMKL; 1030 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1031 } 1032 1033 B->ops->scale = MatScale_SeqAIJMKL; 1034 B->ops->diagonalscale = MatDiagonalScale_SeqAIJMKL; 1035 B->ops->diagonalset = MatDiagonalSet_SeqAIJMKL; 1036 B->ops->axpy = MatAXPY_SeqAIJMKL; 1037 1038 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr); 1039 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 1040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 1041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 1042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 1043 if(!aijmkl->no_SpMV2) { 1044 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 1045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1046 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 1047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1048 #endif 1049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1050 #endif 1051 } 1052 1053 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 1054 *newmat = B; 1055 PetscFunctionReturn(0); 1056 } 1057 1058 /*@C 1059 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 1060 This type inherits from AIJ and is largely identical, but uses sparse BLAS 1061 routines from Intel MKL whenever possible. 1062 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, and MatTransposeMatMult 1063 operations are currently supported. 1064 If the installed version of MKL supports the "SpMV2" sparse 1065 inspector-executor routines, then those are used by default. 1066 1067 Collective on MPI_Comm 1068 1069 Input Parameters: 1070 + comm - MPI communicator, set to PETSC_COMM_SELF 1071 . m - number of rows 1072 . n - number of columns 1073 . nz - number of nonzeros per row (same for all rows) 1074 - nnz - array containing the number of nonzeros in the various rows 1075 (possibly different for each row) or NULL 1076 1077 Output Parameter: 1078 . A - the matrix 1079 1080 Options Database Keys: 1081 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1082 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 1083 1084 Notes: 1085 If nnz is given then nz is ignored 1086 1087 Level: intermediate 1088 1089 .keywords: matrix, MKL, sparse, parallel 1090 1091 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 1092 @*/ 1093 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1094 { 1095 PetscErrorCode ierr; 1096 1097 PetscFunctionBegin; 1098 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1099 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1100 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 1101 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } 1104 1105 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1106 { 1107 PetscErrorCode ierr; 1108 1109 PetscFunctionBegin; 1110 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1111 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } 1114