1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 18 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 19 PetscObjectState state; 20 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 21 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 22 struct matrix_descr descr; 23 #endif 24 } Mat_SeqAIJMKL; 25 26 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 27 28 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 29 { 30 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 31 /* so we will ignore 'MatType type'. */ 32 PetscErrorCode ierr; 33 Mat B = *newmat; 34 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 35 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 36 #endif 37 38 PetscFunctionBegin; 39 if (reuse == MAT_INITIAL_MATRIX) { 40 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 41 } 42 43 /* Reset the original function pointers. */ 44 B->ops->duplicate = MatDuplicate_SeqAIJ; 45 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 46 B->ops->destroy = MatDestroy_SeqAIJ; 47 B->ops->mult = MatMult_SeqAIJ; 48 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 49 B->ops->multadd = MatMultAdd_SeqAIJ; 50 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 51 B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ; 52 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 53 B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ; 54 B->ops->scale = MatScale_SeqAIJ; 55 B->ops->diagonalscale = MatDiagonalScale_SeqAIJ; 56 B->ops->diagonalset = MatDiagonalSet_SeqAIJ; 57 B->ops->axpy = MatAXPY_SeqAIJ; 58 59 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 60 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 61 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 62 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 63 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 64 if(!aijmkl->no_SpMV2) { 65 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 66 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 67 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 68 #endif 69 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 70 } 71 72 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 73 * simply involves destroying the MKL sparse matrix handle and then freeing 74 * the spptr pointer. */ 75 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 76 77 if (aijmkl->sparse_optimized) { 78 sparse_status_t stat; 79 stat = mkl_sparse_destroy(aijmkl->csrA); 80 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 81 } 82 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 83 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 84 85 /* Change the type of B to MATSEQAIJ. */ 86 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 87 88 *newmat = B; 89 PetscFunctionReturn(0); 90 } 91 92 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 93 { 94 PetscErrorCode ierr; 95 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 96 97 PetscFunctionBegin; 98 99 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 100 * spptr pointer. */ 101 if (aijmkl) { 102 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 103 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 104 if (aijmkl->sparse_optimized) { 105 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 106 stat = mkl_sparse_destroy(aijmkl->csrA); 107 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 108 } 109 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 110 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 111 } 112 113 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 114 * to destroy everything that remains. */ 115 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 116 /* Note that I don't call MatSetType(). I believe this is because that 117 * is only to be called when *building* a matrix. I could be wrong, but 118 * that is how things work for the SuperLU matrix class. */ 119 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 124 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 125 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 126 * handle, creates a new one, and then calls mkl_sparse_optimize(). 127 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 128 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 129 * an unoptimized matrix handle here. */ 130 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 131 { 132 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 133 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 134 * does nothing. We make it callable anyway in this case because it cuts 135 * down on littering the code with #ifdefs. */ 136 PetscFunctionBegin; 137 PetscFunctionReturn(0); 138 #else 139 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 140 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 141 PetscInt m,n; 142 MatScalar *aa; 143 PetscInt *aj,*ai; 144 sparse_status_t stat; 145 PetscErrorCode ierr; 146 PetscObjectState state; 147 148 PetscFunctionBegin; 149 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 150 151 if (aijmkl->sparse_optimized) { 152 /* Matrix has been previously assembled and optimized. Must destroy old 153 * matrix handle before running the optimization step again. */ 154 stat = mkl_sparse_destroy(aijmkl->csrA); 155 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 156 } 157 aijmkl->sparse_optimized = PETSC_FALSE; 158 159 /* Now perform the SpMV2 setup and matrix optimization. */ 160 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 161 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 162 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 163 m = A->rmap->n; 164 n = A->cmap->n; 165 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 166 aa = a->a; /* Nonzero elements stored row-by-row. */ 167 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 168 if ((a->nz!=0) & !(A->structure_only)) { 169 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 170 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 171 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 172 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle"); 173 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 174 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 175 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 176 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 177 stat = mkl_sparse_optimize(aijmkl->csrA); 178 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 179 aijmkl->sparse_optimized = PETSC_TRUE; 180 } 181 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 182 183 PetscFunctionReturn(0); 184 #endif 185 } 186 187 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 188 * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 189 * matrix handle. 190 * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as 191 * there is no good alternative. */ 192 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 193 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat) 194 { 195 PetscErrorCode ierr; 196 sparse_status_t stat; 197 sparse_index_base_t indexing; 198 PetscInt nrows, ncols; 199 PetscInt *aj,*ai,*dummy; 200 MatScalar *aa; 201 Mat A; 202 Mat_SeqAIJMKL *aijmkl; 203 204 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 205 stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 206 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 207 208 if (reuse == MAT_REUSE_MATRIX) { 209 ierr = MatDestroy(mat);CHKERRQ(ierr); 210 } 211 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 212 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 213 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 214 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 215 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 216 * they will be destroyed when the MKL handle is destroyed. 217 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 218 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 219 220 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 221 * Now turn it into a MATSEQAIJMKL. */ 222 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 223 224 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 225 aijmkl->csrA = csrA; 226 227 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 228 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 229 * and just need to be able to run the MKL optimization step. */ 230 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 231 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 232 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 233 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 234 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 235 stat = mkl_sparse_optimize(aijmkl->csrA); 236 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 237 aijmkl->sparse_optimized = PETSC_TRUE; 238 239 *mat = A; 240 PetscFunctionReturn(0); 241 } 242 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 243 244 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 245 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 246 * MatMatMultNumeric(). */ 247 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 248 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 249 { 250 PetscInt i; 251 PetscInt nrows,ncols; 252 PetscInt nz; 253 PetscInt *ai,*aj,*dummy; 254 PetscScalar *aa; 255 PetscErrorCode ierr; 256 Mat_SeqAIJMKL *aijmkl; 257 sparse_status_t stat; 258 sparse_index_base_t indexing; 259 260 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 261 262 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 263 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 264 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 265 266 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 267 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 268 for (i=0; i<nrows; i++) { 269 nz = ai[i+1] - ai[i]; 270 ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr); 271 } 272 273 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 274 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 275 276 PetscFunctionReturn(0); 277 } 278 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 279 280 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 281 { 282 PetscErrorCode ierr; 283 Mat_SeqAIJMKL *aijmkl; 284 Mat_SeqAIJMKL *aijmkl_dest; 285 286 PetscFunctionBegin; 287 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 288 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 289 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 290 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 291 aijmkl_dest->sparse_optimized = PETSC_FALSE; 292 if (aijmkl->eager_inspection) { 293 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 294 } 295 PetscFunctionReturn(0); 296 } 297 298 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 299 { 300 PetscErrorCode ierr; 301 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 302 Mat_SeqAIJMKL *aijmkl; 303 304 PetscFunctionBegin; 305 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 306 307 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 308 * extra information and some different methods, call the AssemblyEnd 309 * routine for a MATSEQAIJ. 310 * I'm not sure if this is the best way to do this, but it avoids 311 * a lot of code duplication. */ 312 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 313 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 314 315 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 316 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 317 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 318 if (aijmkl->eager_inspection) { 319 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 320 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 321 } else if (aijmkl->sparse_optimized) { 322 /* If doing lazy inspection and there is an optimized MKL handle, we need to destroy it, so that it will be 323 * rebuilt later when needed. Otherwise, some SeqAIJ implementations that we depend on for some operations 324 * (such as MatMatMultNumeric()) can modify the result matrix without the matrix handle being rebuilt. 325 * (The SeqAIJ version MatMatMultNumeric() knows nothing about matrix handles, but it *does* call MatAssemblyEnd().) */ 326 sparse_status_t stat = mkl_sparse_destroy(aijmkl->csrA); 327 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 328 aijmkl->sparse_optimized = PETSC_FALSE; 329 #endif 330 } 331 332 PetscFunctionReturn(0); 333 } 334 335 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 336 { 337 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 338 const PetscScalar *x; 339 PetscScalar *y; 340 const MatScalar *aa; 341 PetscErrorCode ierr; 342 PetscInt m=A->rmap->n; 343 PetscInt n=A->cmap->n; 344 PetscScalar alpha = 1.0; 345 PetscScalar beta = 0.0; 346 const PetscInt *aj,*ai; 347 char matdescra[6]; 348 349 350 /* Variables not in MatMult_SeqAIJ. */ 351 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 352 353 PetscFunctionBegin; 354 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 355 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 356 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 357 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 358 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 359 aa = a->a; /* Nonzero elements stored row-by-row. */ 360 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 361 362 /* Call MKL sparse BLAS routine to do the MatMult. */ 363 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 364 365 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 366 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 367 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 368 PetscFunctionReturn(0); 369 } 370 371 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 372 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 373 { 374 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 375 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 376 const PetscScalar *x; 377 PetscScalar *y; 378 PetscErrorCode ierr; 379 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 380 PetscObjectState state; 381 382 PetscFunctionBegin; 383 384 /* If there are no nonzero entries, zero yy and return immediately. */ 385 if(!a->nz) { 386 PetscInt i; 387 PetscInt m=A->rmap->n; 388 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 389 for (i=0; i<m; i++) { 390 y[i] = 0.0; 391 } 392 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 393 PetscFunctionReturn(0); 394 } 395 396 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 397 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 398 399 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 400 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 401 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 402 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 403 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 404 MatSeqAIJMKL_create_mkl_handle(A); 405 } 406 407 /* Call MKL SpMV2 executor routine to do the MatMult. */ 408 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 409 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 410 411 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 412 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 413 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 417 418 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 419 { 420 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 421 const PetscScalar *x; 422 PetscScalar *y; 423 const MatScalar *aa; 424 PetscErrorCode ierr; 425 PetscInt m=A->rmap->n; 426 PetscInt n=A->cmap->n; 427 PetscScalar alpha = 1.0; 428 PetscScalar beta = 0.0; 429 const PetscInt *aj,*ai; 430 char matdescra[6]; 431 432 /* Variables not in MatMultTranspose_SeqAIJ. */ 433 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 434 435 PetscFunctionBegin; 436 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 437 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 438 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 439 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 440 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 441 aa = a->a; /* Nonzero elements stored row-by-row. */ 442 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 443 444 /* Call MKL sparse BLAS routine to do the MatMult. */ 445 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 446 447 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 448 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 449 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 450 PetscFunctionReturn(0); 451 } 452 453 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 454 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 455 { 456 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 457 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 458 const PetscScalar *x; 459 PetscScalar *y; 460 PetscErrorCode ierr; 461 sparse_status_t stat; 462 PetscObjectState state; 463 464 PetscFunctionBegin; 465 466 /* If there are no nonzero entries, zero yy and return immediately. */ 467 if(!a->nz) { 468 PetscInt i; 469 PetscInt n=A->cmap->n; 470 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 471 for (i=0; i<n; i++) { 472 y[i] = 0.0; 473 } 474 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 479 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 480 481 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 482 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 483 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 484 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 485 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 486 MatSeqAIJMKL_create_mkl_handle(A); 487 } 488 489 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 490 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 491 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 492 493 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 494 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 495 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 496 PetscFunctionReturn(0); 497 } 498 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 499 500 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 501 { 502 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 503 const PetscScalar *x; 504 PetscScalar *y,*z; 505 const MatScalar *aa; 506 PetscErrorCode ierr; 507 PetscInt m=A->rmap->n; 508 PetscInt n=A->cmap->n; 509 const PetscInt *aj,*ai; 510 PetscInt i; 511 512 /* Variables not in MatMultAdd_SeqAIJ. */ 513 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 514 PetscScalar alpha = 1.0; 515 PetscScalar beta; 516 char matdescra[6]; 517 518 PetscFunctionBegin; 519 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 520 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 521 522 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 523 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 524 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 525 aa = a->a; /* Nonzero elements stored row-by-row. */ 526 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 527 528 /* Call MKL sparse BLAS routine to do the MatMult. */ 529 if (zz == yy) { 530 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 531 beta = 1.0; 532 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 533 } else { 534 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 535 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 536 beta = 0.0; 537 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 538 for (i=0; i<m; i++) { 539 z[i] += y[i]; 540 } 541 } 542 543 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 544 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 545 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 546 PetscFunctionReturn(0); 547 } 548 549 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 550 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 551 { 552 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 553 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 554 const PetscScalar *x; 555 PetscScalar *y,*z; 556 PetscErrorCode ierr; 557 PetscInt m=A->rmap->n; 558 PetscInt i; 559 560 /* Variables not in MatMultAdd_SeqAIJ. */ 561 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 562 PetscObjectState state; 563 564 PetscFunctionBegin; 565 566 /* If there are no nonzero entries, set zz = yy and return immediately. */ 567 if(!a->nz) { 568 PetscInt i; 569 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 570 for (i=0; i<m; i++) { 571 z[i] = y[i]; 572 } 573 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 574 PetscFunctionReturn(0); 575 } 576 577 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 578 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 579 580 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 581 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 582 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 583 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 584 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 585 MatSeqAIJMKL_create_mkl_handle(A); 586 } 587 588 /* Call MKL sparse BLAS routine to do the MatMult. */ 589 if (zz == yy) { 590 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 591 * with alpha and beta both set to 1.0. */ 592 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 593 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 594 } else { 595 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 596 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 597 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 598 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 599 for (i=0; i<m; i++) { 600 z[i] += y[i]; 601 } 602 } 603 604 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 605 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 606 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 607 PetscFunctionReturn(0); 608 } 609 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 610 611 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 612 { 613 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 614 const PetscScalar *x; 615 PetscScalar *y,*z; 616 const MatScalar *aa; 617 PetscErrorCode ierr; 618 PetscInt m=A->rmap->n; 619 PetscInt n=A->cmap->n; 620 const PetscInt *aj,*ai; 621 PetscInt i; 622 623 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 624 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 625 PetscScalar alpha = 1.0; 626 PetscScalar beta; 627 char matdescra[6]; 628 629 PetscFunctionBegin; 630 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 631 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 632 633 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 634 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 635 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 636 aa = a->a; /* Nonzero elements stored row-by-row. */ 637 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 638 639 /* Call MKL sparse BLAS routine to do the MatMult. */ 640 if (zz == yy) { 641 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 642 beta = 1.0; 643 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 644 } else { 645 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 646 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 647 beta = 0.0; 648 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 649 for (i=0; i<n; i++) { 650 z[i] += y[i]; 651 } 652 } 653 654 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 655 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 656 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 657 PetscFunctionReturn(0); 658 } 659 660 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 661 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 662 { 663 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 664 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 665 const PetscScalar *x; 666 PetscScalar *y,*z; 667 PetscErrorCode ierr; 668 PetscInt n=A->cmap->n; 669 PetscInt i; 670 PetscObjectState state; 671 672 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 673 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 674 675 PetscFunctionBegin; 676 677 /* If there are no nonzero entries, set zz = yy and return immediately. */ 678 if(!a->nz) { 679 PetscInt i; 680 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 681 for (i=0; i<n; i++) { 682 z[i] = y[i]; 683 } 684 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 685 PetscFunctionReturn(0); 686 } 687 688 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 689 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 690 691 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 692 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 693 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 694 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 695 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 696 MatSeqAIJMKL_create_mkl_handle(A); 697 } 698 699 /* Call MKL sparse BLAS routine to do the MatMult. */ 700 if (zz == yy) { 701 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 702 * with alpha and beta both set to 1.0. */ 703 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 704 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 705 } else { 706 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 707 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 708 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 709 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 710 for (i=0; i<n; i++) { 711 z[i] += y[i]; 712 } 713 } 714 715 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 716 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 717 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 718 PetscFunctionReturn(0); 719 } 720 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 721 722 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 723 /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because 724 * the MatMatMult() interface code calls MatMatMultNumeric() in this case. 725 * For releases of MKL prior to version 18, update 2: 726 * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 727 * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 728 * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 729 * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */ 730 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 731 { 732 Mat_SeqAIJMKL *a, *b; 733 sparse_matrix_t csrA, csrB, csrC; 734 PetscErrorCode ierr; 735 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 736 PetscObjectState state; 737 738 PetscFunctionBegin; 739 a = (Mat_SeqAIJMKL*)A->spptr; 740 b = (Mat_SeqAIJMKL*)B->spptr; 741 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 742 if (!a->sparse_optimized || a->state != state) { 743 MatSeqAIJMKL_create_mkl_handle(A); 744 } 745 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 746 if (!b->sparse_optimized || b->state != state) { 747 MatSeqAIJMKL_create_mkl_handle(B); 748 } 749 csrA = a->csrA; 750 csrB = b->csrA; 751 752 stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC); 753 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 754 755 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 756 757 PetscFunctionReturn(0); 758 } 759 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 760 761 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 762 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C) 763 { 764 Mat_SeqAIJMKL *a, *b, *c; 765 sparse_matrix_t csrA, csrB, csrC; 766 PetscErrorCode ierr; 767 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 768 struct matrix_descr descr_type_gen; 769 PetscObjectState state; 770 771 PetscFunctionBegin; 772 a = (Mat_SeqAIJMKL*)A->spptr; 773 b = (Mat_SeqAIJMKL*)B->spptr; 774 c = (Mat_SeqAIJMKL*)C->spptr; 775 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 776 if (!a->sparse_optimized || a->state != state) { 777 MatSeqAIJMKL_create_mkl_handle(A); 778 } 779 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 780 if (!b->sparse_optimized || b->state != state) { 781 MatSeqAIJMKL_create_mkl_handle(B); 782 } 783 csrA = a->csrA; 784 csrB = b->csrA; 785 csrC = c->csrA; 786 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 787 788 stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA, 789 SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB, 790 SPARSE_STAGE_FINALIZE_MULT,&csrC); 791 792 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply"); 793 794 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 795 ierr = MatSeqAIJMKL_update_from_mkl_handle(A);CHKERRQ(ierr); 796 797 PetscFunctionReturn(0); 798 } 799 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M */ 800 801 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 802 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 803 { 804 Mat_SeqAIJMKL *a, *b; 805 sparse_matrix_t csrA, csrB, csrC; 806 PetscErrorCode ierr; 807 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 808 PetscObjectState state; 809 810 PetscFunctionBegin; 811 a = (Mat_SeqAIJMKL*)A->spptr; 812 b = (Mat_SeqAIJMKL*)B->spptr; 813 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 814 if (!a->sparse_optimized || a->state != state) { 815 MatSeqAIJMKL_create_mkl_handle(A); 816 } 817 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 818 if (!b->sparse_optimized || b->state != state) { 819 MatSeqAIJMKL_create_mkl_handle(B); 820 } 821 csrA = a->csrA; 822 csrB = b->csrA; 823 824 stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC); 825 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 826 827 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 828 829 PetscFunctionReturn(0); 830 } 831 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 832 833 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha) 834 { 835 PetscErrorCode ierr; 836 837 PetscFunctionBegin; 838 ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr); 839 ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr); 840 PetscFunctionReturn(0); 841 } 842 843 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr) 844 { 845 PetscErrorCode ierr; 846 847 PetscFunctionBegin; 848 ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr); 849 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 850 PetscFunctionReturn(0); 851 } 852 853 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is) 854 { 855 PetscErrorCode ierr; 856 857 PetscFunctionBegin; 858 ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr); 859 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 860 PetscFunctionReturn(0); 861 } 862 863 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str) 864 { 865 PetscErrorCode ierr; 866 867 PetscFunctionBegin; 868 ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr); 869 if (str == SAME_NONZERO_PATTERN) { 870 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 871 ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr); 872 } 873 PetscFunctionReturn(0); 874 } 875 876 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 877 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 878 * routine, but can also be used to convert an assembled SeqAIJ matrix 879 * into a SeqAIJMKL one. */ 880 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 881 { 882 PetscErrorCode ierr; 883 Mat B = *newmat; 884 Mat_SeqAIJMKL *aijmkl; 885 PetscBool set; 886 PetscBool sametype; 887 888 PetscFunctionBegin; 889 if (reuse == MAT_INITIAL_MATRIX) { 890 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 891 } 892 893 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 894 if (sametype) PetscFunctionReturn(0); 895 896 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 897 B->spptr = (void*) aijmkl; 898 899 /* Set function pointers for methods that we inherit from AIJ but override. 900 * We also parse some command line options below, since those determine some of the methods we point to. */ 901 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 902 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 903 B->ops->destroy = MatDestroy_SeqAIJMKL; 904 905 aijmkl->sparse_optimized = PETSC_FALSE; 906 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 907 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 908 #else 909 aijmkl->no_SpMV2 = PETSC_TRUE; 910 #endif 911 aijmkl->eager_inspection = PETSC_FALSE; 912 913 /* Parse command line options. */ 914 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 915 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 916 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 917 ierr = PetscOptionsEnd();CHKERRQ(ierr); 918 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 919 if(!aijmkl->no_SpMV2) { 920 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 921 aijmkl->no_SpMV2 = PETSC_TRUE; 922 } 923 #endif 924 925 if(!aijmkl->no_SpMV2) { 926 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 927 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 928 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 929 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 930 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 931 B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 932 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 933 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 934 #endif 935 B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 936 #endif 937 } else { 938 B->ops->mult = MatMult_SeqAIJMKL; 939 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 940 B->ops->multadd = MatMultAdd_SeqAIJMKL; 941 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 942 } 943 944 B->ops->scale = MatScale_SeqAIJMKL; 945 B->ops->diagonalscale = MatDiagonalScale_SeqAIJMKL; 946 B->ops->diagonalset = MatDiagonalSet_SeqAIJMKL; 947 B->ops->axpy = MatAXPY_SeqAIJMKL; 948 949 ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr); 950 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 951 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 952 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 953 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 954 if(!aijmkl->no_SpMV2) { 955 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 956 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 957 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 958 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 959 #endif 960 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 961 #endif 962 } 963 964 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 965 *newmat = B; 966 PetscFunctionReturn(0); 967 } 968 969 /*@C 970 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 971 This type inherits from AIJ and is largely identical, but uses sparse BLAS 972 routines from Intel MKL whenever possible. 973 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, and MatTransposeMatMult 974 operations are currently supported. 975 If the installed version of MKL supports the "SpMV2" sparse 976 inspector-executor routines, then those are used by default. 977 978 Collective on MPI_Comm 979 980 Input Parameters: 981 + comm - MPI communicator, set to PETSC_COMM_SELF 982 . m - number of rows 983 . n - number of columns 984 . nz - number of nonzeros per row (same for all rows) 985 - nnz - array containing the number of nonzeros in the various rows 986 (possibly different for each row) or NULL 987 988 Output Parameter: 989 . A - the matrix 990 991 Options Database Keys: 992 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 993 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 994 995 Notes: 996 If nnz is given then nz is ignored 997 998 Level: intermediate 999 1000 .keywords: matrix, MKL, sparse, parallel 1001 1002 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 1003 @*/ 1004 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1005 { 1006 PetscErrorCode ierr; 1007 1008 PetscFunctionBegin; 1009 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1010 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1011 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 1012 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 1013 PetscFunctionReturn(0); 1014 } 1015 1016 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1017 { 1018 PetscErrorCode ierr; 1019 1020 PetscFunctionBegin; 1021 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1022 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 1023 PetscFunctionReturn(0); 1024 } 1025