1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 18 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 19 PetscObjectState state; 20 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 21 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 22 struct matrix_descr descr; 23 #endif 24 } Mat_SeqAIJMKL; 25 26 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 27 28 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 29 { 30 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 31 /* so we will ignore 'MatType type'. */ 32 PetscErrorCode ierr; 33 Mat B = *newmat; 34 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 35 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 36 #endif 37 38 PetscFunctionBegin; 39 if (reuse == MAT_INITIAL_MATRIX) { 40 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 41 } 42 43 /* Reset the original function pointers. */ 44 B->ops->duplicate = MatDuplicate_SeqAIJ; 45 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 46 B->ops->destroy = MatDestroy_SeqAIJ; 47 B->ops->mult = MatMult_SeqAIJ; 48 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 49 B->ops->multadd = MatMultAdd_SeqAIJ; 50 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 51 B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ; 52 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 53 B->ops->ptap = MatPtAP_SeqAIJ_SeqAIJ; 54 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 55 B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ; 56 57 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 58 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 59 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 60 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 61 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 62 if(!aijmkl->no_SpMV2) { 63 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 64 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 65 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 66 #endif 67 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 68 } 69 70 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 71 * simply involves destroying the MKL sparse matrix handle and then freeing 72 * the spptr pointer. */ 73 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 74 75 if (aijmkl->sparse_optimized) { 76 sparse_status_t stat; 77 stat = mkl_sparse_destroy(aijmkl->csrA); 78 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 79 } 80 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 81 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 82 83 /* Change the type of B to MATSEQAIJ. */ 84 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 85 86 *newmat = B; 87 PetscFunctionReturn(0); 88 } 89 90 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 91 { 92 PetscErrorCode ierr; 93 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 94 95 PetscFunctionBegin; 96 97 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 98 * spptr pointer. */ 99 if (aijmkl) { 100 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 101 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 102 if (aijmkl->sparse_optimized) { 103 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 104 stat = mkl_sparse_destroy(aijmkl->csrA); 105 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 106 } 107 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 108 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 109 } 110 111 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 112 * to destroy everything that remains. */ 113 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 114 /* Note that I don't call MatSetType(). I believe this is because that 115 * is only to be called when *building* a matrix. I could be wrong, but 116 * that is how things work for the SuperLU matrix class. */ 117 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 122 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 123 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 124 * handle, creates a new one, and then calls mkl_sparse_optimize(). 125 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 126 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 127 * an unoptimized matrix handle here. */ 128 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 129 { 130 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 131 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 132 * does nothing. We make it callable anyway in this case because it cuts 133 * down on littering the code with #ifdefs. */ 134 PetscFunctionBegin; 135 PetscFunctionReturn(0); 136 #else 137 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 138 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 139 PetscInt m,n; 140 MatScalar *aa; 141 PetscInt *aj,*ai; 142 sparse_status_t stat; 143 PetscErrorCode ierr; 144 145 PetscFunctionBegin; 146 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 147 148 if (aijmkl->sparse_optimized) { 149 /* Matrix has been previously assembled and optimized. Must destroy old 150 * matrix handle before running the optimization step again. */ 151 stat = mkl_sparse_destroy(aijmkl->csrA); 152 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 153 } 154 aijmkl->sparse_optimized = PETSC_FALSE; 155 156 /* Now perform the SpMV2 setup and matrix optimization. */ 157 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 158 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 159 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 160 m = A->rmap->n; 161 n = A->cmap->n; 162 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 163 aa = a->a; /* Nonzero elements stored row-by-row. */ 164 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 165 if ((a->nz!=0) & !(A->structure_only)) { 166 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 167 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 168 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 169 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle"); 170 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 171 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 172 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 173 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 174 if (!aijmkl->no_SpMV2) { 175 stat = mkl_sparse_optimize(aijmkl->csrA); 176 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 177 } 178 aijmkl->sparse_optimized = PETSC_TRUE; 179 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 180 } 181 182 PetscFunctionReturn(0); 183 #endif 184 } 185 186 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 187 * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 188 * matrix handle. 189 * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as 190 * there is no good alternative. */ 191 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 192 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat) 193 { 194 PetscErrorCode ierr; 195 sparse_status_t stat; 196 sparse_index_base_t indexing; 197 PetscInt nrows, ncols; 198 PetscInt *aj,*ai,*dummy; 199 MatScalar *aa; 200 Mat A; 201 Mat_SeqAIJMKL *aijmkl; 202 203 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 204 stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 205 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 206 207 if (reuse == MAT_REUSE_MATRIX) { 208 ierr = MatDestroy(mat);CHKERRQ(ierr); 209 } 210 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 211 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 212 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 213 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 214 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 215 * they will be destroyed when the MKL handle is destroyed. 216 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 217 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 218 219 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 220 * Now turn it into a MATSEQAIJMKL. */ 221 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 222 223 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 224 aijmkl->csrA = csrA; 225 226 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 227 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 228 * and just need to be able to run the MKL optimization step. */ 229 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 230 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 231 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 232 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 233 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 234 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 235 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 236 if (!aijmkl->no_SpMV2) { 237 stat = mkl_sparse_optimize(aijmkl->csrA); 238 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 239 } 240 aijmkl->sparse_optimized = PETSC_TRUE; 241 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 242 243 *mat = A; 244 PetscFunctionReturn(0); 245 } 246 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 247 248 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 249 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 250 * MatMatMultNumeric(). */ 251 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 252 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 253 { 254 PetscInt i; 255 PetscInt nrows,ncols; 256 PetscInt nz; 257 PetscInt *ai,*aj,*dummy; 258 PetscScalar *aa; 259 PetscErrorCode ierr; 260 Mat_SeqAIJMKL *aijmkl; 261 sparse_status_t stat; 262 sparse_index_base_t indexing; 263 264 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 265 266 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 267 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 268 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 269 270 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 271 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 272 for (i=0; i<nrows; i++) { 273 nz = ai[i+1] - ai[i]; 274 ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr); 275 } 276 277 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 278 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 279 280 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 281 /* We mark our matrix as having a valid, optimized MKL handle. 282 * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */ 283 aijmkl->sparse_optimized = PETSC_TRUE; 284 285 PetscFunctionReturn(0); 286 } 287 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 288 289 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 290 { 291 PetscErrorCode ierr; 292 Mat_SeqAIJMKL *aijmkl; 293 Mat_SeqAIJMKL *aijmkl_dest; 294 295 PetscFunctionBegin; 296 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 297 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 298 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 299 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 300 aijmkl_dest->sparse_optimized = PETSC_FALSE; 301 if (aijmkl->eager_inspection) { 302 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 303 } 304 PetscFunctionReturn(0); 305 } 306 307 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 308 { 309 PetscErrorCode ierr; 310 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 311 Mat_SeqAIJMKL *aijmkl; 312 313 PetscFunctionBegin; 314 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 315 316 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 317 * extra information and some different methods, call the AssemblyEnd 318 * routine for a MATSEQAIJ. 319 * I'm not sure if this is the best way to do this, but it avoids 320 * a lot of code duplication. */ 321 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 322 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 323 324 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 325 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 326 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 327 if (aijmkl->eager_inspection) { 328 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 329 } 330 331 PetscFunctionReturn(0); 332 } 333 334 #ifndef PETSC_HAVE_MKL_SPARSE_SP2M 335 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 336 { 337 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 338 const PetscScalar *x; 339 PetscScalar *y; 340 const MatScalar *aa; 341 PetscErrorCode ierr; 342 PetscInt m=A->rmap->n; 343 PetscInt n=A->cmap->n; 344 PetscScalar alpha = 1.0; 345 PetscScalar beta = 0.0; 346 const PetscInt *aj,*ai; 347 char matdescra[6]; 348 349 350 /* Variables not in MatMult_SeqAIJ. */ 351 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 352 353 PetscFunctionBegin; 354 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 355 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 356 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 357 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 358 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 359 aa = a->a; /* Nonzero elements stored row-by-row. */ 360 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 361 362 /* Call MKL sparse BLAS routine to do the MatMult. */ 363 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 364 365 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 366 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 367 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 368 PetscFunctionReturn(0); 369 } 370 #endif 371 372 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 373 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 374 { 375 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 376 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 377 const PetscScalar *x; 378 PetscScalar *y; 379 PetscErrorCode ierr; 380 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 381 PetscObjectState state; 382 383 PetscFunctionBegin; 384 385 /* If there are no nonzero entries, zero yy and return immediately. */ 386 if(!a->nz) { 387 PetscInt i; 388 PetscInt m=A->rmap->n; 389 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 390 for (i=0; i<m; i++) { 391 y[i] = 0.0; 392 } 393 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 398 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 399 400 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 401 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 402 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 403 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 404 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 405 MatSeqAIJMKL_create_mkl_handle(A); 406 } 407 408 /* Call MKL SpMV2 executor routine to do the MatMult. */ 409 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 410 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 411 412 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 413 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 414 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 415 PetscFunctionReturn(0); 416 } 417 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 418 419 #ifndef PETSC_HAVE_MKL_SPARSE_SP2M 420 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 421 { 422 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 423 const PetscScalar *x; 424 PetscScalar *y; 425 const MatScalar *aa; 426 PetscErrorCode ierr; 427 PetscInt m=A->rmap->n; 428 PetscInt n=A->cmap->n; 429 PetscScalar alpha = 1.0; 430 PetscScalar beta = 0.0; 431 const PetscInt *aj,*ai; 432 char matdescra[6]; 433 434 /* Variables not in MatMultTranspose_SeqAIJ. */ 435 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 436 437 PetscFunctionBegin; 438 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 439 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 440 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 441 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 442 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 443 aa = a->a; /* Nonzero elements stored row-by-row. */ 444 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 445 446 /* Call MKL sparse BLAS routine to do the MatMult. */ 447 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 448 449 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 450 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 451 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 452 PetscFunctionReturn(0); 453 } 454 #endif 455 456 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 457 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 458 { 459 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 460 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 461 const PetscScalar *x; 462 PetscScalar *y; 463 PetscErrorCode ierr; 464 sparse_status_t stat; 465 PetscObjectState state; 466 467 PetscFunctionBegin; 468 469 /* If there are no nonzero entries, zero yy and return immediately. */ 470 if(!a->nz) { 471 PetscInt i; 472 PetscInt n=A->cmap->n; 473 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 474 for (i=0; i<n; i++) { 475 y[i] = 0.0; 476 } 477 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 478 PetscFunctionReturn(0); 479 } 480 481 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 482 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 483 484 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 485 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 486 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 487 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 488 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 489 MatSeqAIJMKL_create_mkl_handle(A); 490 } 491 492 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 493 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 494 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 495 496 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 497 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 498 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 499 PetscFunctionReturn(0); 500 } 501 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 502 503 #ifndef PETSC_HAVE_MKL_SPARSE_SP2M 504 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 505 { 506 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 507 const PetscScalar *x; 508 PetscScalar *y,*z; 509 const MatScalar *aa; 510 PetscErrorCode ierr; 511 PetscInt m=A->rmap->n; 512 PetscInt n=A->cmap->n; 513 const PetscInt *aj,*ai; 514 PetscInt i; 515 516 /* Variables not in MatMultAdd_SeqAIJ. */ 517 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 518 PetscScalar alpha = 1.0; 519 PetscScalar beta; 520 char matdescra[6]; 521 522 PetscFunctionBegin; 523 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 524 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 525 526 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 527 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 528 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 529 aa = a->a; /* Nonzero elements stored row-by-row. */ 530 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 531 532 /* Call MKL sparse BLAS routine to do the MatMult. */ 533 if (zz == yy) { 534 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 535 beta = 1.0; 536 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 537 } else { 538 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 539 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 540 beta = 0.0; 541 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 542 for (i=0; i<m; i++) { 543 z[i] += y[i]; 544 } 545 } 546 547 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 548 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 549 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 550 PetscFunctionReturn(0); 551 } 552 #endif 553 554 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 555 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 556 { 557 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 558 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 559 const PetscScalar *x; 560 PetscScalar *y,*z; 561 PetscErrorCode ierr; 562 PetscInt m=A->rmap->n; 563 PetscInt i; 564 565 /* Variables not in MatMultAdd_SeqAIJ. */ 566 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 567 PetscObjectState state; 568 569 PetscFunctionBegin; 570 571 /* If there are no nonzero entries, set zz = yy and return immediately. */ 572 if(!a->nz) { 573 PetscInt i; 574 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 575 for (i=0; i<m; i++) { 576 z[i] = y[i]; 577 } 578 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 582 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 583 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 584 585 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 586 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 587 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 588 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 589 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 590 MatSeqAIJMKL_create_mkl_handle(A); 591 } 592 593 /* Call MKL sparse BLAS routine to do the MatMult. */ 594 if (zz == yy) { 595 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 596 * with alpha and beta both set to 1.0. */ 597 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 598 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 599 } else { 600 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 601 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 602 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 603 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 604 for (i=0; i<m; i++) { 605 z[i] += y[i]; 606 } 607 } 608 609 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 610 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 611 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 612 PetscFunctionReturn(0); 613 } 614 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 615 616 #ifndef PETSC_HAVE_MKL_SPARSE_SP2M 617 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 618 { 619 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 620 const PetscScalar *x; 621 PetscScalar *y,*z; 622 const MatScalar *aa; 623 PetscErrorCode ierr; 624 PetscInt m=A->rmap->n; 625 PetscInt n=A->cmap->n; 626 const PetscInt *aj,*ai; 627 PetscInt i; 628 629 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 630 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 631 PetscScalar alpha = 1.0; 632 PetscScalar beta; 633 char matdescra[6]; 634 635 PetscFunctionBegin; 636 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 637 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 638 639 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 640 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 641 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 642 aa = a->a; /* Nonzero elements stored row-by-row. */ 643 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 644 645 /* Call MKL sparse BLAS routine to do the MatMult. */ 646 if (zz == yy) { 647 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 648 beta = 1.0; 649 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 650 } else { 651 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 652 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 653 beta = 0.0; 654 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 655 for (i=0; i<n; i++) { 656 z[i] += y[i]; 657 } 658 } 659 660 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 661 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 662 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 #endif 666 667 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 668 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 669 { 670 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 671 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 672 const PetscScalar *x; 673 PetscScalar *y,*z; 674 PetscErrorCode ierr; 675 PetscInt n=A->cmap->n; 676 PetscInt i; 677 PetscObjectState state; 678 679 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 680 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 681 682 PetscFunctionBegin; 683 684 /* If there are no nonzero entries, set zz = yy and return immediately. */ 685 if(!a->nz) { 686 PetscInt i; 687 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 688 for (i=0; i<n; i++) { 689 z[i] = y[i]; 690 } 691 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 696 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 697 698 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 699 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 700 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 701 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 702 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 703 MatSeqAIJMKL_create_mkl_handle(A); 704 } 705 706 /* Call MKL sparse BLAS routine to do the MatMult. */ 707 if (zz == yy) { 708 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 709 * with alpha and beta both set to 1.0. */ 710 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 711 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 712 } else { 713 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 714 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 715 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 716 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 717 for (i=0; i<n; i++) { 718 z[i] += y[i]; 719 } 720 } 721 722 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 723 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 724 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 725 PetscFunctionReturn(0); 726 } 727 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 728 729 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 730 /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because 731 * the MatMatMult() interface code calls MatMatMultNumeric() in this case. 732 * For releases of MKL prior to version 18, update 2: 733 * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 734 * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 735 * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 736 * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */ 737 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 738 { 739 Mat_SeqAIJMKL *a, *b; 740 sparse_matrix_t csrA, csrB, csrC; 741 PetscErrorCode ierr; 742 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 743 PetscObjectState state; 744 745 PetscFunctionBegin; 746 a = (Mat_SeqAIJMKL*)A->spptr; 747 b = (Mat_SeqAIJMKL*)B->spptr; 748 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 749 if (!a->sparse_optimized || a->state != state) { 750 MatSeqAIJMKL_create_mkl_handle(A); 751 } 752 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 753 if (!b->sparse_optimized || b->state != state) { 754 MatSeqAIJMKL_create_mkl_handle(B); 755 } 756 csrA = a->csrA; 757 csrB = b->csrA; 758 759 stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC); 760 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 761 762 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 763 764 PetscFunctionReturn(0); 765 } 766 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 767 768 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 769 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C) 770 { 771 Mat_SeqAIJMKL *a, *b, *c; 772 sparse_matrix_t csrA, csrB, csrC; 773 PetscErrorCode ierr; 774 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 775 struct matrix_descr descr_type_gen; 776 PetscObjectState state; 777 778 PetscFunctionBegin; 779 a = (Mat_SeqAIJMKL*)A->spptr; 780 b = (Mat_SeqAIJMKL*)B->spptr; 781 c = (Mat_SeqAIJMKL*)C->spptr; 782 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 783 if (!a->sparse_optimized || a->state != state) { 784 MatSeqAIJMKL_create_mkl_handle(A); 785 } 786 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 787 if (!b->sparse_optimized || b->state != state) { 788 MatSeqAIJMKL_create_mkl_handle(B); 789 } 790 csrA = a->csrA; 791 csrB = b->csrA; 792 csrC = c->csrA; 793 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 794 795 stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA, 796 SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB, 797 SPARSE_STAGE_FINALIZE_MULT,&csrC); 798 799 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply"); 800 801 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 802 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 803 804 PetscFunctionReturn(0); 805 } 806 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M */ 807 808 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 809 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 810 { 811 Mat_SeqAIJMKL *a, *b; 812 sparse_matrix_t csrA, csrB, csrC; 813 PetscErrorCode ierr; 814 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 815 PetscObjectState state; 816 817 PetscFunctionBegin; 818 a = (Mat_SeqAIJMKL*)A->spptr; 819 b = (Mat_SeqAIJMKL*)B->spptr; 820 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 821 if (!a->sparse_optimized || a->state != state) { 822 MatSeqAIJMKL_create_mkl_handle(A); 823 } 824 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 825 if (!b->sparse_optimized || b->state != state) { 826 MatSeqAIJMKL_create_mkl_handle(B); 827 } 828 csrA = a->csrA; 829 csrB = b->csrA; 830 831 stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC); 832 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 833 834 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 835 836 PetscFunctionReturn(0); 837 } 838 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 839 840 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 841 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C) 842 { 843 Mat_SeqAIJMKL *a, *p, *c; 844 sparse_matrix_t csrA, csrP, csrC; 845 PetscBool set, flag; 846 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 847 struct matrix_descr descr_type_gen; 848 PetscObjectState state; 849 PetscErrorCode ierr; 850 851 PetscFunctionBegin; 852 ierr = MatIsSymmetricKnown(A,&set,&flag); 853 if (!set || (set && !flag)) { 854 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr); 855 PetscFunctionReturn(0); 856 } 857 858 a = (Mat_SeqAIJMKL*)A->spptr; 859 p = (Mat_SeqAIJMKL*)P->spptr; 860 c = (Mat_SeqAIJMKL*)C->spptr; 861 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 862 if (!a->sparse_optimized || a->state != state) { 863 MatSeqAIJMKL_create_mkl_handle(A); 864 } 865 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 866 if (!p->sparse_optimized || p->state != state) { 867 MatSeqAIJMKL_create_mkl_handle(P); 868 } 869 csrA = a->csrA; 870 csrP = p->csrA; 871 csrC = c->csrA; 872 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 873 874 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 875 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_gen,&csrC,SPARSE_STAGE_FINALIZE_MULT); 876 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr"); 877 878 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 879 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 880 881 PetscFunctionReturn(0); 882 } 883 #endif 884 885 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 886 PetscErrorCode MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 887 { 888 Mat_SeqAIJMKL *a, *p; 889 sparse_matrix_t csrA, csrP, csrC; 890 PetscBool set, flag; 891 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 892 struct matrix_descr descr_type_gen; 893 PetscObjectState state; 894 PetscErrorCode ierr; 895 896 PetscFunctionBegin; 897 ierr = MatIsSymmetricKnown(A,&set,&flag); 898 if (!set || (set && !flag)) { 899 ierr = MatPtAP_SeqAIJ_SeqAIJ(A,P,scall,fill,C);CHKERRQ(ierr); 900 PetscFunctionReturn(0); 901 } 902 903 if (scall == MAT_REUSE_MATRIX) { 904 ierr = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(A,P,*C);CHKERRQ(ierr); 905 PetscFunctionReturn(0); 906 } 907 908 a = (Mat_SeqAIJMKL*)A->spptr; 909 p = (Mat_SeqAIJMKL*)P->spptr; 910 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 911 if (!a->sparse_optimized || a->state != state) { 912 MatSeqAIJMKL_create_mkl_handle(A); 913 } 914 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 915 if (!p->sparse_optimized || p->state != state) { 916 MatSeqAIJMKL_create_mkl_handle(P); 917 } 918 csrA = a->csrA; 919 csrP = p->csrA; 920 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 921 922 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 923 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_gen,&csrC,SPARSE_STAGE_FULL_MULT); 924 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete full mkl_sparse_sypr"); 925 926 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 927 ierr = MatSetOption(*C,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 928 929 PetscFunctionReturn(0); 930 } 931 #endif 932 933 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 934 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 935 * routine, but can also be used to convert an assembled SeqAIJ matrix 936 * into a SeqAIJMKL one. */ 937 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 938 { 939 PetscErrorCode ierr; 940 Mat B = *newmat; 941 Mat_SeqAIJMKL *aijmkl; 942 PetscBool set; 943 PetscBool sametype; 944 945 PetscFunctionBegin; 946 if (reuse == MAT_INITIAL_MATRIX) { 947 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 948 } 949 950 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 951 if (sametype) PetscFunctionReturn(0); 952 953 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 954 B->spptr = (void*) aijmkl; 955 956 /* Set function pointers for methods that we inherit from AIJ but override. 957 * We also parse some command line options below, since those determine some of the methods we point to. */ 958 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 959 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 960 B->ops->destroy = MatDestroy_SeqAIJMKL; 961 962 aijmkl->sparse_optimized = PETSC_FALSE; 963 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 964 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 965 #else 966 aijmkl->no_SpMV2 = PETSC_TRUE; 967 #endif 968 aijmkl->eager_inspection = PETSC_FALSE; 969 970 /* Parse command line options. */ 971 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 972 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 973 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 974 ierr = PetscOptionsEnd();CHKERRQ(ierr); 975 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 976 if(!aijmkl->no_SpMV2) { 977 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 978 aijmkl->no_SpMV2 = PETSC_TRUE; 979 } 980 #endif 981 982 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 983 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 984 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 985 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 986 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 987 B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 988 # ifdef PETSC_HAVE_MKL_SPARSE_SP2M 989 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 990 # ifndef PETSC_USE_COMPLEX 991 B->ops->ptap = MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2; 992 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 993 # endif 994 # endif 995 B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 996 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 997 998 #ifndef PETSC_HAVE_MKL_SPARSE_SP2M 999 /* In the same release in which MKL introduced mkl_sparse_sp2m() (version 18, update 2), the old sparse BLAS interfaces were 1000 * marked as deprecated. If "no_SpMV2" has been specified by the user and MKL 18u2 or later is being used, we use the new 1001 * _SpMV2 routines (set above), but do not call mkl_sparse_optimize(), which results in the old numerical kernels (without the 1002 * inspector-executor model) being used. For versions in which the older interface has not been deprecated, we use the old 1003 * interface. */ 1004 if (aijmkl->no_SpMV2) { 1005 B->ops->mult = MatMult_SeqAIJMKL; 1006 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 1007 B->ops->multadd = MatMultAdd_SeqAIJMKL; 1008 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1009 } 1010 #endif 1011 1012 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 1013 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 1014 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 1015 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 1016 if(!aijmkl->no_SpMV2) { 1017 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 1018 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1019 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 1020 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1021 #endif 1022 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1023 #endif 1024 } 1025 1026 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 1027 *newmat = B; 1028 PetscFunctionReturn(0); 1029 } 1030 1031 /*@C 1032 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 1033 This type inherits from AIJ and is largely identical, but uses sparse BLAS 1034 routines from Intel MKL whenever possible. 1035 If the installed version of MKL supports the "SpMV2" sparse 1036 inspector-executor routines, then those are used by default. 1037 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 1038 symmetric A) operations are currently supported. 1039 Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 1040 1041 Collective on MPI_Comm 1042 1043 Input Parameters: 1044 + comm - MPI communicator, set to PETSC_COMM_SELF 1045 . m - number of rows 1046 . n - number of columns 1047 . nz - number of nonzeros per row (same for all rows) 1048 - nnz - array containing the number of nonzeros in the various rows 1049 (possibly different for each row) or NULL 1050 1051 Output Parameter: 1052 . A - the matrix 1053 1054 Options Database Keys: 1055 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1056 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 1057 1058 Notes: 1059 If nnz is given then nz is ignored 1060 1061 Level: intermediate 1062 1063 .keywords: matrix, MKL, sparse, parallel 1064 1065 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 1066 @*/ 1067 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1068 { 1069 PetscErrorCode ierr; 1070 1071 PetscFunctionBegin; 1072 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1073 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1074 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 1075 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 1076 PetscFunctionReturn(0); 1077 } 1078 1079 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1080 { 1081 PetscErrorCode ierr; 1082 1083 PetscFunctionBegin; 1084 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1085 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088