1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 18 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 19 PetscObjectState state; 20 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 21 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 22 struct matrix_descr descr; 23 #endif 24 } Mat_SeqAIJMKL; 25 26 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 27 28 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 29 { 30 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 31 /* so we will ignore 'MatType type'. */ 32 PetscErrorCode ierr; 33 Mat B = *newmat; 34 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 35 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 36 #endif 37 38 PetscFunctionBegin; 39 if (reuse == MAT_INITIAL_MATRIX) { 40 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 41 } 42 43 /* Reset the original function pointers. */ 44 B->ops->duplicate = MatDuplicate_SeqAIJ; 45 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 46 B->ops->destroy = MatDestroy_SeqAIJ; 47 B->ops->mult = MatMult_SeqAIJ; 48 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 49 B->ops->multadd = MatMultAdd_SeqAIJ; 50 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 51 B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ; 52 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 53 B->ops->ptap = MatPtAP_SeqAIJ_SeqAIJ; 54 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 55 B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ; 56 B->ops->scale = MatScale_SeqAIJ; 57 B->ops->diagonalscale = MatDiagonalScale_SeqAIJ; 58 B->ops->diagonalset = MatDiagonalSet_SeqAIJ; 59 B->ops->axpy = MatAXPY_SeqAIJ; 60 61 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 62 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 63 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 64 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 65 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 66 if(!aijmkl->no_SpMV2) { 67 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 68 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 69 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 70 #endif 71 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 72 } 73 74 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 75 * simply involves destroying the MKL sparse matrix handle and then freeing 76 * the spptr pointer. */ 77 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 78 79 if (aijmkl->sparse_optimized) { 80 sparse_status_t stat; 81 stat = mkl_sparse_destroy(aijmkl->csrA); 82 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 83 } 84 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 85 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 86 87 /* Change the type of B to MATSEQAIJ. */ 88 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 89 90 *newmat = B; 91 PetscFunctionReturn(0); 92 } 93 94 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 95 { 96 PetscErrorCode ierr; 97 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 98 99 PetscFunctionBegin; 100 101 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 102 * spptr pointer. */ 103 if (aijmkl) { 104 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 105 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 106 if (aijmkl->sparse_optimized) { 107 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 108 stat = mkl_sparse_destroy(aijmkl->csrA); 109 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 110 } 111 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 112 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 113 } 114 115 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 116 * to destroy everything that remains. */ 117 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 118 /* Note that I don't call MatSetType(). I believe this is because that 119 * is only to be called when *building* a matrix. I could be wrong, but 120 * that is how things work for the SuperLU matrix class. */ 121 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 122 PetscFunctionReturn(0); 123 } 124 125 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 126 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 127 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 128 * handle, creates a new one, and then calls mkl_sparse_optimize(). 129 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 130 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 131 * an unoptimized matrix handle here. */ 132 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 133 { 134 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 135 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 136 * does nothing. We make it callable anyway in this case because it cuts 137 * down on littering the code with #ifdefs. */ 138 PetscFunctionBegin; 139 PetscFunctionReturn(0); 140 #else 141 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 142 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 143 PetscInt m,n; 144 MatScalar *aa; 145 PetscInt *aj,*ai; 146 sparse_status_t stat; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 151 152 if (aijmkl->sparse_optimized) { 153 /* Matrix has been previously assembled and optimized. Must destroy old 154 * matrix handle before running the optimization step again. */ 155 stat = mkl_sparse_destroy(aijmkl->csrA); 156 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 157 } 158 aijmkl->sparse_optimized = PETSC_FALSE; 159 160 /* Now perform the SpMV2 setup and matrix optimization. */ 161 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 162 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 163 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 164 m = A->rmap->n; 165 n = A->cmap->n; 166 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 167 aa = a->a; /* Nonzero elements stored row-by-row. */ 168 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 169 if ((a->nz!=0) & !(A->structure_only)) { 170 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 171 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 172 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 173 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle"); 174 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 175 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 176 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 177 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 178 stat = mkl_sparse_optimize(aijmkl->csrA); 179 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 180 aijmkl->sparse_optimized = PETSC_TRUE; 181 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 182 } 183 184 PetscFunctionReturn(0); 185 #endif 186 } 187 188 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 189 * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 190 * matrix handle. 191 * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as 192 * there is no good alternative. */ 193 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 194 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat) 195 { 196 PetscErrorCode ierr; 197 sparse_status_t stat; 198 sparse_index_base_t indexing; 199 PetscInt nrows, ncols; 200 PetscInt *aj,*ai,*dummy; 201 MatScalar *aa; 202 Mat A; 203 Mat_SeqAIJMKL *aijmkl; 204 205 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 206 stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 207 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 208 209 if (reuse == MAT_REUSE_MATRIX) { 210 ierr = MatDestroy(mat);CHKERRQ(ierr); 211 } 212 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 213 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 214 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 215 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 216 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 217 * they will be destroyed when the MKL handle is destroyed. 218 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 219 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 220 221 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 222 * Now turn it into a MATSEQAIJMKL. */ 223 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 224 225 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 226 aijmkl->csrA = csrA; 227 228 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 229 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 230 * and just need to be able to run the MKL optimization step. */ 231 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 232 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 233 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 234 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 235 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 236 stat = mkl_sparse_optimize(aijmkl->csrA); 237 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 238 aijmkl->sparse_optimized = PETSC_TRUE; 239 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 240 241 *mat = A; 242 PetscFunctionReturn(0); 243 } 244 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 245 246 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 247 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 248 * MatMatMultNumeric(). */ 249 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 250 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 251 { 252 PetscInt i; 253 PetscInt nrows,ncols; 254 PetscInt nz; 255 PetscInt *ai,*aj,*dummy; 256 PetscScalar *aa; 257 PetscErrorCode ierr; 258 Mat_SeqAIJMKL *aijmkl; 259 sparse_status_t stat; 260 sparse_index_base_t indexing; 261 262 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 263 264 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 265 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 266 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 267 268 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 269 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 270 for (i=0; i<nrows; i++) { 271 nz = ai[i+1] - ai[i]; 272 ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr); 273 } 274 275 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 276 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 277 278 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 279 /* We mark our matrix as having a valid, optimized MKL handle. 280 * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */ 281 aijmkl->sparse_optimized = PETSC_TRUE; 282 283 PetscFunctionReturn(0); 284 } 285 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 286 287 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 288 { 289 PetscErrorCode ierr; 290 Mat_SeqAIJMKL *aijmkl; 291 Mat_SeqAIJMKL *aijmkl_dest; 292 293 PetscFunctionBegin; 294 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 295 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 296 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 297 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 298 aijmkl_dest->sparse_optimized = PETSC_FALSE; 299 if (aijmkl->eager_inspection) { 300 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 301 } 302 PetscFunctionReturn(0); 303 } 304 305 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 306 { 307 PetscErrorCode ierr; 308 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 309 Mat_SeqAIJMKL *aijmkl; 310 311 PetscFunctionBegin; 312 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 313 314 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 315 * extra information and some different methods, call the AssemblyEnd 316 * routine for a MATSEQAIJ. 317 * I'm not sure if this is the best way to do this, but it avoids 318 * a lot of code duplication. */ 319 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 320 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 321 322 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 323 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 324 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 325 if (aijmkl->eager_inspection) { 326 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 327 } 328 329 PetscFunctionReturn(0); 330 } 331 332 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 333 { 334 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 335 const PetscScalar *x; 336 PetscScalar *y; 337 const MatScalar *aa; 338 PetscErrorCode ierr; 339 PetscInt m=A->rmap->n; 340 PetscInt n=A->cmap->n; 341 PetscScalar alpha = 1.0; 342 PetscScalar beta = 0.0; 343 const PetscInt *aj,*ai; 344 char matdescra[6]; 345 346 347 /* Variables not in MatMult_SeqAIJ. */ 348 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 349 350 PetscFunctionBegin; 351 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 352 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 353 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 354 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 355 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 356 aa = a->a; /* Nonzero elements stored row-by-row. */ 357 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 358 359 /* Call MKL sparse BLAS routine to do the MatMult. */ 360 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 361 362 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 363 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 364 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 365 PetscFunctionReturn(0); 366 } 367 368 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 369 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 370 { 371 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 372 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 373 const PetscScalar *x; 374 PetscScalar *y; 375 PetscErrorCode ierr; 376 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 377 PetscObjectState state; 378 379 PetscFunctionBegin; 380 381 /* If there are no nonzero entries, zero yy and return immediately. */ 382 if(!a->nz) { 383 PetscInt i; 384 PetscInt m=A->rmap->n; 385 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 386 for (i=0; i<m; i++) { 387 y[i] = 0.0; 388 } 389 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 390 PetscFunctionReturn(0); 391 } 392 393 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 394 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 395 396 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 397 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 398 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 399 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 400 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 401 MatSeqAIJMKL_create_mkl_handle(A); 402 } 403 404 /* Call MKL SpMV2 executor routine to do the MatMult. */ 405 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 406 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 407 408 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 409 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 410 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 411 PetscFunctionReturn(0); 412 } 413 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 414 415 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 416 { 417 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 418 const PetscScalar *x; 419 PetscScalar *y; 420 const MatScalar *aa; 421 PetscErrorCode ierr; 422 PetscInt m=A->rmap->n; 423 PetscInt n=A->cmap->n; 424 PetscScalar alpha = 1.0; 425 PetscScalar beta = 0.0; 426 const PetscInt *aj,*ai; 427 char matdescra[6]; 428 429 /* Variables not in MatMultTranspose_SeqAIJ. */ 430 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 431 432 PetscFunctionBegin; 433 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 434 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 435 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 436 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 437 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 438 aa = a->a; /* Nonzero elements stored row-by-row. */ 439 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 440 441 /* Call MKL sparse BLAS routine to do the MatMult. */ 442 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 443 444 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 445 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 446 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 451 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 452 { 453 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 454 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 455 const PetscScalar *x; 456 PetscScalar *y; 457 PetscErrorCode ierr; 458 sparse_status_t stat; 459 PetscObjectState state; 460 461 PetscFunctionBegin; 462 463 /* If there are no nonzero entries, zero yy and return immediately. */ 464 if(!a->nz) { 465 PetscInt i; 466 PetscInt n=A->cmap->n; 467 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 468 for (i=0; i<n; i++) { 469 y[i] = 0.0; 470 } 471 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 472 PetscFunctionReturn(0); 473 } 474 475 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 476 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 477 478 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 479 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 480 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 481 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 482 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 483 MatSeqAIJMKL_create_mkl_handle(A); 484 } 485 486 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 487 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 488 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 489 490 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 491 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 492 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 493 PetscFunctionReturn(0); 494 } 495 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 496 497 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 498 { 499 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 500 const PetscScalar *x; 501 PetscScalar *y,*z; 502 const MatScalar *aa; 503 PetscErrorCode ierr; 504 PetscInt m=A->rmap->n; 505 PetscInt n=A->cmap->n; 506 const PetscInt *aj,*ai; 507 PetscInt i; 508 509 /* Variables not in MatMultAdd_SeqAIJ. */ 510 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 511 PetscScalar alpha = 1.0; 512 PetscScalar beta; 513 char matdescra[6]; 514 515 PetscFunctionBegin; 516 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 517 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 518 519 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 520 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 521 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 522 aa = a->a; /* Nonzero elements stored row-by-row. */ 523 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 524 525 /* Call MKL sparse BLAS routine to do the MatMult. */ 526 if (zz == yy) { 527 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 528 beta = 1.0; 529 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 530 } else { 531 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 532 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 533 beta = 0.0; 534 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 535 for (i=0; i<m; i++) { 536 z[i] += y[i]; 537 } 538 } 539 540 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 541 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 542 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 543 PetscFunctionReturn(0); 544 } 545 546 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 547 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 548 { 549 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 550 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 551 const PetscScalar *x; 552 PetscScalar *y,*z; 553 PetscErrorCode ierr; 554 PetscInt m=A->rmap->n; 555 PetscInt i; 556 557 /* Variables not in MatMultAdd_SeqAIJ. */ 558 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 559 PetscObjectState state; 560 561 PetscFunctionBegin; 562 563 /* If there are no nonzero entries, set zz = yy and return immediately. */ 564 if(!a->nz) { 565 PetscInt i; 566 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 567 for (i=0; i<m; i++) { 568 z[i] = y[i]; 569 } 570 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 571 PetscFunctionReturn(0); 572 } 573 574 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 575 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 576 577 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 578 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 579 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 580 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 581 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 582 MatSeqAIJMKL_create_mkl_handle(A); 583 } 584 585 /* Call MKL sparse BLAS routine to do the MatMult. */ 586 if (zz == yy) { 587 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 588 * with alpha and beta both set to 1.0. */ 589 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 590 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 591 } else { 592 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 593 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 594 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 595 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 596 for (i=0; i<m; i++) { 597 z[i] += y[i]; 598 } 599 } 600 601 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 602 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 603 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 604 PetscFunctionReturn(0); 605 } 606 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 607 608 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 609 { 610 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 611 const PetscScalar *x; 612 PetscScalar *y,*z; 613 const MatScalar *aa; 614 PetscErrorCode ierr; 615 PetscInt m=A->rmap->n; 616 PetscInt n=A->cmap->n; 617 const PetscInt *aj,*ai; 618 PetscInt i; 619 620 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 621 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 622 PetscScalar alpha = 1.0; 623 PetscScalar beta; 624 char matdescra[6]; 625 626 PetscFunctionBegin; 627 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 628 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 629 630 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 631 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 632 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 633 aa = a->a; /* Nonzero elements stored row-by-row. */ 634 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 635 636 /* Call MKL sparse BLAS routine to do the MatMult. */ 637 if (zz == yy) { 638 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 639 beta = 1.0; 640 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 641 } else { 642 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 643 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 644 beta = 0.0; 645 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 646 for (i=0; i<n; i++) { 647 z[i] += y[i]; 648 } 649 } 650 651 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 652 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 653 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 654 PetscFunctionReturn(0); 655 } 656 657 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 658 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 659 { 660 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 661 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 662 const PetscScalar *x; 663 PetscScalar *y,*z; 664 PetscErrorCode ierr; 665 PetscInt n=A->cmap->n; 666 PetscInt i; 667 PetscObjectState state; 668 669 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 670 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 671 672 PetscFunctionBegin; 673 674 /* If there are no nonzero entries, set zz = yy and return immediately. */ 675 if(!a->nz) { 676 PetscInt i; 677 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 678 for (i=0; i<n; i++) { 679 z[i] = y[i]; 680 } 681 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 682 PetscFunctionReturn(0); 683 } 684 685 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 686 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 687 688 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 689 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 690 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 691 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 692 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 693 MatSeqAIJMKL_create_mkl_handle(A); 694 } 695 696 /* Call MKL sparse BLAS routine to do the MatMult. */ 697 if (zz == yy) { 698 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 699 * with alpha and beta both set to 1.0. */ 700 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 701 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 702 } else { 703 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 704 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 705 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 706 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 707 for (i=0; i<n; i++) { 708 z[i] += y[i]; 709 } 710 } 711 712 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 713 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 714 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 715 PetscFunctionReturn(0); 716 } 717 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 718 719 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 720 /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because 721 * the MatMatMult() interface code calls MatMatMultNumeric() in this case. 722 * For releases of MKL prior to version 18, update 2: 723 * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 724 * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 725 * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 726 * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */ 727 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 728 { 729 Mat_SeqAIJMKL *a, *b; 730 sparse_matrix_t csrA, csrB, csrC; 731 PetscErrorCode ierr; 732 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 733 PetscObjectState state; 734 735 PetscFunctionBegin; 736 a = (Mat_SeqAIJMKL*)A->spptr; 737 b = (Mat_SeqAIJMKL*)B->spptr; 738 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 739 if (!a->sparse_optimized || a->state != state) { 740 MatSeqAIJMKL_create_mkl_handle(A); 741 } 742 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 743 if (!b->sparse_optimized || b->state != state) { 744 MatSeqAIJMKL_create_mkl_handle(B); 745 } 746 csrA = a->csrA; 747 csrB = b->csrA; 748 749 stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC); 750 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 751 752 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 753 754 PetscFunctionReturn(0); 755 } 756 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 757 758 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 759 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C) 760 { 761 Mat_SeqAIJMKL *a, *b, *c; 762 sparse_matrix_t csrA, csrB, csrC; 763 PetscErrorCode ierr; 764 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 765 struct matrix_descr descr_type_gen; 766 PetscObjectState state; 767 768 PetscFunctionBegin; 769 a = (Mat_SeqAIJMKL*)A->spptr; 770 b = (Mat_SeqAIJMKL*)B->spptr; 771 c = (Mat_SeqAIJMKL*)C->spptr; 772 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 773 if (!a->sparse_optimized || a->state != state) { 774 MatSeqAIJMKL_create_mkl_handle(A); 775 } 776 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 777 if (!b->sparse_optimized || b->state != state) { 778 MatSeqAIJMKL_create_mkl_handle(B); 779 } 780 csrA = a->csrA; 781 csrB = b->csrA; 782 csrC = c->csrA; 783 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 784 785 stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA, 786 SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB, 787 SPARSE_STAGE_FINALIZE_MULT,&csrC); 788 789 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply"); 790 791 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 792 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 793 794 PetscFunctionReturn(0); 795 } 796 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M */ 797 798 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 799 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 800 { 801 Mat_SeqAIJMKL *a, *b; 802 sparse_matrix_t csrA, csrB, csrC; 803 PetscErrorCode ierr; 804 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 805 PetscObjectState state; 806 807 PetscFunctionBegin; 808 a = (Mat_SeqAIJMKL*)A->spptr; 809 b = (Mat_SeqAIJMKL*)B->spptr; 810 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 811 if (!a->sparse_optimized || a->state != state) { 812 MatSeqAIJMKL_create_mkl_handle(A); 813 } 814 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 815 if (!b->sparse_optimized || b->state != state) { 816 MatSeqAIJMKL_create_mkl_handle(B); 817 } 818 csrA = a->csrA; 819 csrB = b->csrA; 820 821 stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC); 822 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 823 824 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 825 826 PetscFunctionReturn(0); 827 } 828 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 829 830 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 831 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C) 832 { 833 Mat_SeqAIJMKL *a, *p, *c; 834 sparse_matrix_t csrA, csrP, csrC; 835 PetscBool set, flag; 836 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 837 struct matrix_descr descr_type_gen; 838 PetscObjectState state; 839 PetscErrorCode ierr; 840 841 PetscFunctionBegin; 842 ierr = MatIsSymmetricKnown(A,&set,&flag); 843 if (!set || (set && !flag)) { 844 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 a = (Mat_SeqAIJMKL*)A->spptr; 849 p = (Mat_SeqAIJMKL*)P->spptr; 850 c = (Mat_SeqAIJMKL*)C->spptr; 851 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 852 if (!a->sparse_optimized || a->state != state) { 853 MatSeqAIJMKL_create_mkl_handle(A); 854 } 855 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 856 if (!p->sparse_optimized || p->state != state) { 857 MatSeqAIJMKL_create_mkl_handle(P); 858 } 859 csrA = a->csrA; 860 csrP = p->csrA; 861 csrC = c->csrA; 862 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 863 864 /* TODO: Below won't work for complex matrix. Protect this! Maybe where function pointers are assigned in MatConvert? */ 865 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_gen,&csrC,SPARSE_STAGE_FINALIZE_MULT); 866 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr"); 867 868 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 869 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 870 871 PetscFunctionReturn(0); 872 } 873 #endif 874 875 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 876 PetscErrorCode MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 877 { 878 Mat_SeqAIJMKL *a, *p; 879 sparse_matrix_t csrA, csrP, csrC; 880 PetscBool set, flag; 881 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 882 struct matrix_descr descr_type_gen; 883 PetscObjectState state; 884 PetscErrorCode ierr; 885 886 PetscFunctionBegin; 887 ierr = MatIsSymmetricKnown(A,&set,&flag); 888 if (!set || (set && !flag)) { 889 ierr = MatPtAP_SeqAIJ_SeqAIJ(A,P,scall,fill,C);CHKERRQ(ierr); 890 PetscFunctionReturn(0); 891 } 892 893 if (scall == MAT_REUSE_MATRIX) { 894 ierr = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(A,P,*C);CHKERRQ(ierr); 895 PetscFunctionReturn(0); 896 } 897 898 a = (Mat_SeqAIJMKL*)A->spptr; 899 p = (Mat_SeqAIJMKL*)P->spptr; 900 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 901 if (!a->sparse_optimized || a->state != state) { 902 MatSeqAIJMKL_create_mkl_handle(A); 903 } 904 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 905 if (!p->sparse_optimized || p->state != state) { 906 MatSeqAIJMKL_create_mkl_handle(P); 907 } 908 csrA = a->csrA; 909 csrP = p->csrA; 910 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 911 912 /* TODO: Below won't work for complex matrix. Protect this! Maybe where function pointers are assigned in MatConvert? */ 913 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_gen,&csrC,SPARSE_STAGE_FULL_MULT); 914 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete full mkl_sparse_sypr"); 915 916 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 917 ierr = MatSetOption(*C,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 918 919 PetscFunctionReturn(0); 920 } 921 #endif 922 923 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha) 924 { 925 PetscErrorCode ierr; 926 927 PetscFunctionBegin; 928 ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr); 929 ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 930 PetscFunctionReturn(0); 931 } 932 933 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr) 934 { 935 PetscErrorCode ierr; 936 937 PetscFunctionBegin; 938 ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr); 939 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 940 PetscFunctionReturn(0); 941 } 942 943 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is) 944 { 945 PetscErrorCode ierr; 946 947 PetscFunctionBegin; 948 ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr); 949 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 950 PetscFunctionReturn(0); 951 } 952 953 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 954 { 955 PetscErrorCode ierr; 956 957 PetscFunctionBegin; 958 ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr); 959 if (str == SAME_NONZERO_PATTERN) { 960 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 961 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 962 } 963 PetscFunctionReturn(0); 964 } 965 966 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 967 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 968 * routine, but can also be used to convert an assembled SeqAIJ matrix 969 * into a SeqAIJMKL one. */ 970 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 971 { 972 PetscErrorCode ierr; 973 Mat B = *newmat; 974 Mat_SeqAIJMKL *aijmkl; 975 PetscBool set; 976 PetscBool sametype; 977 978 PetscFunctionBegin; 979 if (reuse == MAT_INITIAL_MATRIX) { 980 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 981 } 982 983 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 984 if (sametype) PetscFunctionReturn(0); 985 986 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 987 B->spptr = (void*) aijmkl; 988 989 /* Set function pointers for methods that we inherit from AIJ but override. 990 * We also parse some command line options below, since those determine some of the methods we point to. */ 991 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 992 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 993 B->ops->destroy = MatDestroy_SeqAIJMKL; 994 995 aijmkl->sparse_optimized = PETSC_FALSE; 996 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 997 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 998 #else 999 aijmkl->no_SpMV2 = PETSC_TRUE; 1000 #endif 1001 aijmkl->eager_inspection = PETSC_FALSE; 1002 1003 /* Parse command line options. */ 1004 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 1005 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 1006 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 1007 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1008 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 1009 if(!aijmkl->no_SpMV2) { 1010 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 1011 aijmkl->no_SpMV2 = PETSC_TRUE; 1012 } 1013 #endif 1014 1015 if(!aijmkl->no_SpMV2) { 1016 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 1017 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 1018 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 1019 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1020 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 1021 B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 1022 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 1023 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 1024 #ifndef PETSC_USE_COMPLEX 1025 B->ops->ptap = MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2; 1026 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 1027 #endif 1028 #endif 1029 B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 1030 #endif 1031 } else { 1032 B->ops->mult = MatMult_SeqAIJMKL; 1033 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 1034 B->ops->multadd = MatMultAdd_SeqAIJMKL; 1035 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1036 } 1037 1038 B->ops->scale = MatScale_SeqAIJMKL; 1039 B->ops->diagonalscale = MatDiagonalScale_SeqAIJMKL; 1040 B->ops->diagonalset = MatDiagonalSet_SeqAIJMKL; 1041 B->ops->axpy = MatAXPY_SeqAIJMKL; 1042 1043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr); 1044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 1045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 1046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 1047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 1048 if(!aijmkl->no_SpMV2) { 1049 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 1050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1051 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 1052 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1053 #endif 1054 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1055 #endif 1056 } 1057 1058 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 1059 *newmat = B; 1060 PetscFunctionReturn(0); 1061 } 1062 1063 /*@C 1064 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 1065 This type inherits from AIJ and is largely identical, but uses sparse BLAS 1066 routines from Intel MKL whenever possible. 1067 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, and MatTransposeMatMult 1068 operations are currently supported. 1069 If the installed version of MKL supports the "SpMV2" sparse 1070 inspector-executor routines, then those are used by default. 1071 1072 Collective on MPI_Comm 1073 1074 Input Parameters: 1075 + comm - MPI communicator, set to PETSC_COMM_SELF 1076 . m - number of rows 1077 . n - number of columns 1078 . nz - number of nonzeros per row (same for all rows) 1079 - nnz - array containing the number of nonzeros in the various rows 1080 (possibly different for each row) or NULL 1081 1082 Output Parameter: 1083 . A - the matrix 1084 1085 Options Database Keys: 1086 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1087 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 1088 1089 Notes: 1090 If nnz is given then nz is ignored 1091 1092 Level: intermediate 1093 1094 .keywords: matrix, MKL, sparse, parallel 1095 1096 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 1097 @*/ 1098 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1099 { 1100 PetscErrorCode ierr; 1101 1102 PetscFunctionBegin; 1103 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1104 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1105 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 1106 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } 1109 1110 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1111 { 1112 PetscErrorCode ierr; 1113 1114 PetscFunctionBegin; 1115 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1116 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119