1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 18 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 19 PetscObjectState state; 20 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 21 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 22 struct matrix_descr descr; 23 #endif 24 } Mat_SeqAIJMKL; 25 26 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 27 28 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 29 { 30 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 31 /* so we will ignore 'MatType type'. */ 32 PetscErrorCode ierr; 33 Mat B = *newmat; 34 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 35 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 36 #endif 37 38 PetscFunctionBegin; 39 if (reuse == MAT_INITIAL_MATRIX) { 40 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 41 } 42 43 /* Reset the original function pointers. */ 44 B->ops->duplicate = MatDuplicate_SeqAIJ; 45 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 46 B->ops->destroy = MatDestroy_SeqAIJ; 47 B->ops->mult = MatMult_SeqAIJ; 48 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 49 B->ops->multadd = MatMultAdd_SeqAIJ; 50 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 51 B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ; 52 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 53 B->ops->ptap = MatPtAP_SeqAIJ_SeqAIJ; 54 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 55 B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ; 56 57 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 58 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 59 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 60 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 61 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 62 if(!aijmkl->no_SpMV2) { 63 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 64 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 65 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 66 #endif 67 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 68 } 69 70 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 71 * simply involves destroying the MKL sparse matrix handle and then freeing 72 * the spptr pointer. */ 73 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 74 75 if (aijmkl->sparse_optimized) { 76 sparse_status_t stat; 77 stat = mkl_sparse_destroy(aijmkl->csrA); 78 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 79 } 80 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 81 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 82 83 /* Change the type of B to MATSEQAIJ. */ 84 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 85 86 *newmat = B; 87 PetscFunctionReturn(0); 88 } 89 90 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 91 { 92 PetscErrorCode ierr; 93 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 94 95 PetscFunctionBegin; 96 97 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 98 * spptr pointer. */ 99 if (aijmkl) { 100 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 101 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 102 if (aijmkl->sparse_optimized) { 103 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 104 stat = mkl_sparse_destroy(aijmkl->csrA); 105 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 106 } 107 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 108 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 109 } 110 111 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 112 * to destroy everything that remains. */ 113 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 114 /* Note that I don't call MatSetType(). I believe this is because that 115 * is only to be called when *building* a matrix. I could be wrong, but 116 * that is how things work for the SuperLU matrix class. */ 117 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 122 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 123 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 124 * handle, creates a new one, and then calls mkl_sparse_optimize(). 125 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 126 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 127 * an unoptimized matrix handle here. */ 128 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 129 { 130 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 131 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 132 * does nothing. We make it callable anyway in this case because it cuts 133 * down on littering the code with #ifdefs. */ 134 PetscFunctionBegin; 135 PetscFunctionReturn(0); 136 #else 137 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 138 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 139 PetscInt m,n; 140 MatScalar *aa; 141 PetscInt *aj,*ai; 142 sparse_status_t stat; 143 PetscErrorCode ierr; 144 145 PetscFunctionBegin; 146 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 147 148 if (aijmkl->sparse_optimized) { 149 /* Matrix has been previously assembled and optimized. Must destroy old 150 * matrix handle before running the optimization step again. */ 151 stat = mkl_sparse_destroy(aijmkl->csrA); 152 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 153 } 154 aijmkl->sparse_optimized = PETSC_FALSE; 155 156 /* Now perform the SpMV2 setup and matrix optimization. */ 157 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 158 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 159 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 160 m = A->rmap->n; 161 n = A->cmap->n; 162 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 163 aa = a->a; /* Nonzero elements stored row-by-row. */ 164 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 165 if ((a->nz!=0) & !(A->structure_only)) { 166 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 167 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 168 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 169 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle"); 170 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 171 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 172 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 173 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 174 stat = mkl_sparse_optimize(aijmkl->csrA); 175 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 176 aijmkl->sparse_optimized = PETSC_TRUE; 177 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 178 } 179 180 PetscFunctionReturn(0); 181 #endif 182 } 183 184 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 185 * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 186 * matrix handle. 187 * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as 188 * there is no good alternative. */ 189 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 190 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat) 191 { 192 PetscErrorCode ierr; 193 sparse_status_t stat; 194 sparse_index_base_t indexing; 195 PetscInt nrows, ncols; 196 PetscInt *aj,*ai,*dummy; 197 MatScalar *aa; 198 Mat A; 199 Mat_SeqAIJMKL *aijmkl; 200 201 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 202 stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 203 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 204 205 if (reuse == MAT_REUSE_MATRIX) { 206 ierr = MatDestroy(mat);CHKERRQ(ierr); 207 } 208 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 209 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 210 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 211 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 212 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 213 * they will be destroyed when the MKL handle is destroyed. 214 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 215 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 216 217 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 218 * Now turn it into a MATSEQAIJMKL. */ 219 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 220 221 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 222 aijmkl->csrA = csrA; 223 224 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 225 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 226 * and just need to be able to run the MKL optimization step. */ 227 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 228 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 229 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 230 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 231 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 232 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 233 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 234 stat = mkl_sparse_optimize(aijmkl->csrA); 235 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 236 aijmkl->sparse_optimized = PETSC_TRUE; 237 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 238 239 *mat = A; 240 PetscFunctionReturn(0); 241 } 242 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 243 244 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 245 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 246 * MatMatMultNumeric(). */ 247 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 248 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 249 { 250 PetscInt i; 251 PetscInt nrows,ncols; 252 PetscInt nz; 253 PetscInt *ai,*aj,*dummy; 254 PetscScalar *aa; 255 PetscErrorCode ierr; 256 Mat_SeqAIJMKL *aijmkl; 257 sparse_status_t stat; 258 sparse_index_base_t indexing; 259 260 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 261 262 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 263 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 264 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 265 266 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 267 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 268 for (i=0; i<nrows; i++) { 269 nz = ai[i+1] - ai[i]; 270 ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr); 271 } 272 273 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 274 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 275 276 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 277 /* We mark our matrix as having a valid, optimized MKL handle. 278 * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */ 279 aijmkl->sparse_optimized = PETSC_TRUE; 280 281 PetscFunctionReturn(0); 282 } 283 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 284 285 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 286 { 287 PetscErrorCode ierr; 288 Mat_SeqAIJMKL *aijmkl; 289 Mat_SeqAIJMKL *aijmkl_dest; 290 291 PetscFunctionBegin; 292 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 293 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 294 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 295 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 296 aijmkl_dest->sparse_optimized = PETSC_FALSE; 297 if (aijmkl->eager_inspection) { 298 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 299 } 300 PetscFunctionReturn(0); 301 } 302 303 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 304 { 305 PetscErrorCode ierr; 306 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 307 Mat_SeqAIJMKL *aijmkl; 308 309 PetscFunctionBegin; 310 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 311 312 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 313 * extra information and some different methods, call the AssemblyEnd 314 * routine for a MATSEQAIJ. 315 * I'm not sure if this is the best way to do this, but it avoids 316 * a lot of code duplication. */ 317 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 318 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 319 320 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 321 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 322 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 323 if (aijmkl->eager_inspection) { 324 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 325 } 326 327 PetscFunctionReturn(0); 328 } 329 330 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 331 { 332 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 333 const PetscScalar *x; 334 PetscScalar *y; 335 const MatScalar *aa; 336 PetscErrorCode ierr; 337 PetscInt m=A->rmap->n; 338 PetscInt n=A->cmap->n; 339 PetscScalar alpha = 1.0; 340 PetscScalar beta = 0.0; 341 const PetscInt *aj,*ai; 342 char matdescra[6]; 343 344 345 /* Variables not in MatMult_SeqAIJ. */ 346 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 347 348 PetscFunctionBegin; 349 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 350 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 351 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 352 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 353 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 354 aa = a->a; /* Nonzero elements stored row-by-row. */ 355 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 356 357 /* Call MKL sparse BLAS routine to do the MatMult. */ 358 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 359 360 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 361 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 362 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 363 PetscFunctionReturn(0); 364 } 365 366 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 367 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 368 { 369 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 370 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 371 const PetscScalar *x; 372 PetscScalar *y; 373 PetscErrorCode ierr; 374 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 375 PetscObjectState state; 376 377 PetscFunctionBegin; 378 379 /* If there are no nonzero entries, zero yy and return immediately. */ 380 if(!a->nz) { 381 PetscInt i; 382 PetscInt m=A->rmap->n; 383 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 384 for (i=0; i<m; i++) { 385 y[i] = 0.0; 386 } 387 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 388 PetscFunctionReturn(0); 389 } 390 391 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 392 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 393 394 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 395 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 396 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 397 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 398 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 399 MatSeqAIJMKL_create_mkl_handle(A); 400 } 401 402 /* Call MKL SpMV2 executor routine to do the MatMult. */ 403 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 404 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 405 406 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 407 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 408 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 409 PetscFunctionReturn(0); 410 } 411 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 412 413 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 414 { 415 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 416 const PetscScalar *x; 417 PetscScalar *y; 418 const MatScalar *aa; 419 PetscErrorCode ierr; 420 PetscInt m=A->rmap->n; 421 PetscInt n=A->cmap->n; 422 PetscScalar alpha = 1.0; 423 PetscScalar beta = 0.0; 424 const PetscInt *aj,*ai; 425 char matdescra[6]; 426 427 /* Variables not in MatMultTranspose_SeqAIJ. */ 428 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 429 430 PetscFunctionBegin; 431 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 432 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 433 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 434 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 435 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 436 aa = a->a; /* Nonzero elements stored row-by-row. */ 437 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 438 439 /* Call MKL sparse BLAS routine to do the MatMult. */ 440 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 441 442 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 443 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 444 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 445 PetscFunctionReturn(0); 446 } 447 448 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 449 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 450 { 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 452 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 453 const PetscScalar *x; 454 PetscScalar *y; 455 PetscErrorCode ierr; 456 sparse_status_t stat; 457 PetscObjectState state; 458 459 PetscFunctionBegin; 460 461 /* If there are no nonzero entries, zero yy and return immediately. */ 462 if(!a->nz) { 463 PetscInt i; 464 PetscInt n=A->cmap->n; 465 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 466 for (i=0; i<n; i++) { 467 y[i] = 0.0; 468 } 469 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 470 PetscFunctionReturn(0); 471 } 472 473 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 474 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 475 476 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 477 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 478 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 479 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 480 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 481 MatSeqAIJMKL_create_mkl_handle(A); 482 } 483 484 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 485 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 486 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 487 488 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 489 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 490 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 491 PetscFunctionReturn(0); 492 } 493 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 494 495 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 496 { 497 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 498 const PetscScalar *x; 499 PetscScalar *y,*z; 500 const MatScalar *aa; 501 PetscErrorCode ierr; 502 PetscInt m=A->rmap->n; 503 PetscInt n=A->cmap->n; 504 const PetscInt *aj,*ai; 505 PetscInt i; 506 507 /* Variables not in MatMultAdd_SeqAIJ. */ 508 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 509 PetscScalar alpha = 1.0; 510 PetscScalar beta; 511 char matdescra[6]; 512 513 PetscFunctionBegin; 514 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 515 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 516 517 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 518 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 519 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 520 aa = a->a; /* Nonzero elements stored row-by-row. */ 521 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 522 523 /* Call MKL sparse BLAS routine to do the MatMult. */ 524 if (zz == yy) { 525 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 526 beta = 1.0; 527 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 528 } else { 529 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 530 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 531 beta = 0.0; 532 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 533 for (i=0; i<m; i++) { 534 z[i] += y[i]; 535 } 536 } 537 538 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 539 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 540 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 541 PetscFunctionReturn(0); 542 } 543 544 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 545 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 546 { 547 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 548 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 549 const PetscScalar *x; 550 PetscScalar *y,*z; 551 PetscErrorCode ierr; 552 PetscInt m=A->rmap->n; 553 PetscInt i; 554 555 /* Variables not in MatMultAdd_SeqAIJ. */ 556 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 557 PetscObjectState state; 558 559 PetscFunctionBegin; 560 561 /* If there are no nonzero entries, set zz = yy and return immediately. */ 562 if(!a->nz) { 563 PetscInt i; 564 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 565 for (i=0; i<m; i++) { 566 z[i] = y[i]; 567 } 568 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 569 PetscFunctionReturn(0); 570 } 571 572 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 573 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 574 575 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 576 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 577 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 578 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 579 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 580 MatSeqAIJMKL_create_mkl_handle(A); 581 } 582 583 /* Call MKL sparse BLAS routine to do the MatMult. */ 584 if (zz == yy) { 585 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 586 * with alpha and beta both set to 1.0. */ 587 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 588 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 589 } else { 590 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 591 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 592 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 593 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 594 for (i=0; i<m; i++) { 595 z[i] += y[i]; 596 } 597 } 598 599 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 600 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 601 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 602 PetscFunctionReturn(0); 603 } 604 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 605 606 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 607 { 608 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 609 const PetscScalar *x; 610 PetscScalar *y,*z; 611 const MatScalar *aa; 612 PetscErrorCode ierr; 613 PetscInt m=A->rmap->n; 614 PetscInt n=A->cmap->n; 615 const PetscInt *aj,*ai; 616 PetscInt i; 617 618 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 619 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 620 PetscScalar alpha = 1.0; 621 PetscScalar beta; 622 char matdescra[6]; 623 624 PetscFunctionBegin; 625 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 626 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 627 628 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 629 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 630 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 631 aa = a->a; /* Nonzero elements stored row-by-row. */ 632 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 633 634 /* Call MKL sparse BLAS routine to do the MatMult. */ 635 if (zz == yy) { 636 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 637 beta = 1.0; 638 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 639 } else { 640 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 641 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 642 beta = 0.0; 643 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 644 for (i=0; i<n; i++) { 645 z[i] += y[i]; 646 } 647 } 648 649 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 650 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 651 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 652 PetscFunctionReturn(0); 653 } 654 655 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 656 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 657 { 658 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 659 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 660 const PetscScalar *x; 661 PetscScalar *y,*z; 662 PetscErrorCode ierr; 663 PetscInt n=A->cmap->n; 664 PetscInt i; 665 PetscObjectState state; 666 667 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 668 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 669 670 PetscFunctionBegin; 671 672 /* If there are no nonzero entries, set zz = yy and return immediately. */ 673 if(!a->nz) { 674 PetscInt i; 675 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 676 for (i=0; i<n; i++) { 677 z[i] = y[i]; 678 } 679 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 680 PetscFunctionReturn(0); 681 } 682 683 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 684 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 685 686 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 687 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 688 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 689 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 690 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 691 MatSeqAIJMKL_create_mkl_handle(A); 692 } 693 694 /* Call MKL sparse BLAS routine to do the MatMult. */ 695 if (zz == yy) { 696 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 697 * with alpha and beta both set to 1.0. */ 698 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 699 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 700 } else { 701 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 702 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 703 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 704 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 705 for (i=0; i<n; i++) { 706 z[i] += y[i]; 707 } 708 } 709 710 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 711 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 712 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 713 PetscFunctionReturn(0); 714 } 715 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 716 717 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 718 /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because 719 * the MatMatMult() interface code calls MatMatMultNumeric() in this case. 720 * For releases of MKL prior to version 18, update 2: 721 * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 722 * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 723 * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 724 * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */ 725 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 726 { 727 Mat_SeqAIJMKL *a, *b; 728 sparse_matrix_t csrA, csrB, csrC; 729 PetscErrorCode ierr; 730 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 731 PetscObjectState state; 732 733 PetscFunctionBegin; 734 a = (Mat_SeqAIJMKL*)A->spptr; 735 b = (Mat_SeqAIJMKL*)B->spptr; 736 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 737 if (!a->sparse_optimized || a->state != state) { 738 MatSeqAIJMKL_create_mkl_handle(A); 739 } 740 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 741 if (!b->sparse_optimized || b->state != state) { 742 MatSeqAIJMKL_create_mkl_handle(B); 743 } 744 csrA = a->csrA; 745 csrB = b->csrA; 746 747 stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC); 748 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 749 750 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 751 752 PetscFunctionReturn(0); 753 } 754 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 755 756 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 757 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C) 758 { 759 Mat_SeqAIJMKL *a, *b, *c; 760 sparse_matrix_t csrA, csrB, csrC; 761 PetscErrorCode ierr; 762 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 763 struct matrix_descr descr_type_gen; 764 PetscObjectState state; 765 766 PetscFunctionBegin; 767 a = (Mat_SeqAIJMKL*)A->spptr; 768 b = (Mat_SeqAIJMKL*)B->spptr; 769 c = (Mat_SeqAIJMKL*)C->spptr; 770 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 771 if (!a->sparse_optimized || a->state != state) { 772 MatSeqAIJMKL_create_mkl_handle(A); 773 } 774 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 775 if (!b->sparse_optimized || b->state != state) { 776 MatSeqAIJMKL_create_mkl_handle(B); 777 } 778 csrA = a->csrA; 779 csrB = b->csrA; 780 csrC = c->csrA; 781 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 782 783 stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA, 784 SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB, 785 SPARSE_STAGE_FINALIZE_MULT,&csrC); 786 787 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply"); 788 789 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 790 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 791 792 PetscFunctionReturn(0); 793 } 794 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M */ 795 796 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 797 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 798 { 799 Mat_SeqAIJMKL *a, *b; 800 sparse_matrix_t csrA, csrB, csrC; 801 PetscErrorCode ierr; 802 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 803 PetscObjectState state; 804 805 PetscFunctionBegin; 806 a = (Mat_SeqAIJMKL*)A->spptr; 807 b = (Mat_SeqAIJMKL*)B->spptr; 808 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 809 if (!a->sparse_optimized || a->state != state) { 810 MatSeqAIJMKL_create_mkl_handle(A); 811 } 812 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 813 if (!b->sparse_optimized || b->state != state) { 814 MatSeqAIJMKL_create_mkl_handle(B); 815 } 816 csrA = a->csrA; 817 csrB = b->csrA; 818 819 stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC); 820 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 821 822 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 823 824 PetscFunctionReturn(0); 825 } 826 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 827 828 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 829 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C) 830 { 831 Mat_SeqAIJMKL *a, *p, *c; 832 sparse_matrix_t csrA, csrP, csrC; 833 PetscBool set, flag; 834 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 835 struct matrix_descr descr_type_gen; 836 PetscObjectState state; 837 PetscErrorCode ierr; 838 839 PetscFunctionBegin; 840 ierr = MatIsSymmetricKnown(A,&set,&flag); 841 if (!set || (set && !flag)) { 842 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr); 843 PetscFunctionReturn(0); 844 } 845 846 a = (Mat_SeqAIJMKL*)A->spptr; 847 p = (Mat_SeqAIJMKL*)P->spptr; 848 c = (Mat_SeqAIJMKL*)C->spptr; 849 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 850 if (!a->sparse_optimized || a->state != state) { 851 MatSeqAIJMKL_create_mkl_handle(A); 852 } 853 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 854 if (!p->sparse_optimized || p->state != state) { 855 MatSeqAIJMKL_create_mkl_handle(P); 856 } 857 csrA = a->csrA; 858 csrP = p->csrA; 859 csrC = c->csrA; 860 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 861 862 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 863 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_gen,&csrC,SPARSE_STAGE_FINALIZE_MULT); 864 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr"); 865 866 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 867 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 868 869 PetscFunctionReturn(0); 870 } 871 #endif 872 873 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 874 PetscErrorCode MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 875 { 876 Mat_SeqAIJMKL *a, *p; 877 sparse_matrix_t csrA, csrP, csrC; 878 PetscBool set, flag; 879 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 880 struct matrix_descr descr_type_gen; 881 PetscObjectState state; 882 PetscErrorCode ierr; 883 884 PetscFunctionBegin; 885 ierr = MatIsSymmetricKnown(A,&set,&flag); 886 if (!set || (set && !flag)) { 887 ierr = MatPtAP_SeqAIJ_SeqAIJ(A,P,scall,fill,C);CHKERRQ(ierr); 888 PetscFunctionReturn(0); 889 } 890 891 if (scall == MAT_REUSE_MATRIX) { 892 ierr = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(A,P,*C);CHKERRQ(ierr); 893 PetscFunctionReturn(0); 894 } 895 896 a = (Mat_SeqAIJMKL*)A->spptr; 897 p = (Mat_SeqAIJMKL*)P->spptr; 898 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 899 if (!a->sparse_optimized || a->state != state) { 900 MatSeqAIJMKL_create_mkl_handle(A); 901 } 902 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 903 if (!p->sparse_optimized || p->state != state) { 904 MatSeqAIJMKL_create_mkl_handle(P); 905 } 906 csrA = a->csrA; 907 csrP = p->csrA; 908 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 909 910 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 911 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_gen,&csrC,SPARSE_STAGE_FULL_MULT); 912 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete full mkl_sparse_sypr"); 913 914 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 915 ierr = MatSetOption(*C,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 916 917 PetscFunctionReturn(0); 918 } 919 #endif 920 921 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 922 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 923 * routine, but can also be used to convert an assembled SeqAIJ matrix 924 * into a SeqAIJMKL one. */ 925 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 926 { 927 PetscErrorCode ierr; 928 Mat B = *newmat; 929 Mat_SeqAIJMKL *aijmkl; 930 PetscBool set; 931 PetscBool sametype; 932 933 PetscFunctionBegin; 934 if (reuse == MAT_INITIAL_MATRIX) { 935 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 936 } 937 938 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 939 if (sametype) PetscFunctionReturn(0); 940 941 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 942 B->spptr = (void*) aijmkl; 943 944 /* Set function pointers for methods that we inherit from AIJ but override. 945 * We also parse some command line options below, since those determine some of the methods we point to. */ 946 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 947 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 948 B->ops->destroy = MatDestroy_SeqAIJMKL; 949 950 aijmkl->sparse_optimized = PETSC_FALSE; 951 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 952 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 953 #else 954 aijmkl->no_SpMV2 = PETSC_TRUE; 955 #endif 956 aijmkl->eager_inspection = PETSC_FALSE; 957 958 /* Parse command line options. */ 959 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 960 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 961 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 962 ierr = PetscOptionsEnd();CHKERRQ(ierr); 963 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 964 if(!aijmkl->no_SpMV2) { 965 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 966 aijmkl->no_SpMV2 = PETSC_TRUE; 967 } 968 #endif 969 970 if(!aijmkl->no_SpMV2) { 971 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 972 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 973 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 974 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 975 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 976 B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 977 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 978 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 979 #ifndef PETSC_USE_COMPLEX 980 B->ops->ptap = MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2; 981 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 982 #endif 983 #endif 984 B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 985 #endif 986 } else { 987 B->ops->mult = MatMult_SeqAIJMKL; 988 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 989 B->ops->multadd = MatMultAdd_SeqAIJMKL; 990 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 991 } 992 993 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 994 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 995 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 996 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 997 if(!aijmkl->no_SpMV2) { 998 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 999 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1000 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 1001 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1002 #endif 1003 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1004 #endif 1005 } 1006 1007 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 1008 *newmat = B; 1009 PetscFunctionReturn(0); 1010 } 1011 1012 /*@C 1013 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 1014 This type inherits from AIJ and is largely identical, but uses sparse BLAS 1015 routines from Intel MKL whenever possible. 1016 If the installed version of MKL supports the "SpMV2" sparse 1017 inspector-executor routines, then those are used by default. 1018 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 1019 symmetric A) operations are currently supported. 1020 Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 1021 1022 Collective on MPI_Comm 1023 1024 Input Parameters: 1025 + comm - MPI communicator, set to PETSC_COMM_SELF 1026 . m - number of rows 1027 . n - number of columns 1028 . nz - number of nonzeros per row (same for all rows) 1029 - nnz - array containing the number of nonzeros in the various rows 1030 (possibly different for each row) or NULL 1031 1032 Output Parameter: 1033 . A - the matrix 1034 1035 Options Database Keys: 1036 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1037 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 1038 1039 Notes: 1040 If nnz is given then nz is ignored 1041 1042 Level: intermediate 1043 1044 .keywords: matrix, MKL, sparse, parallel 1045 1046 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 1047 @*/ 1048 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1049 { 1050 PetscErrorCode ierr; 1051 1052 PetscFunctionBegin; 1053 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1054 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1055 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 1056 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 1057 PetscFunctionReturn(0); 1058 } 1059 1060 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1061 { 1062 PetscErrorCode ierr; 1063 1064 PetscFunctionBegin; 1065 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1066 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 1067 PetscFunctionReturn(0); 1068 } 1069