1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 18 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 19 PetscObjectState state; 20 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 21 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 22 struct matrix_descr descr; 23 #endif 24 } Mat_SeqAIJMKL; 25 26 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 27 28 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 29 { 30 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 31 /* so we will ignore 'MatType type'. */ 32 PetscErrorCode ierr; 33 Mat B = *newmat; 34 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 35 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 36 #endif 37 38 PetscFunctionBegin; 39 if (reuse == MAT_INITIAL_MATRIX) { 40 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 41 } 42 43 /* Reset the original function pointers. */ 44 B->ops->duplicate = MatDuplicate_SeqAIJ; 45 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 46 B->ops->destroy = MatDestroy_SeqAIJ; 47 B->ops->mult = MatMult_SeqAIJ; 48 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 49 B->ops->multadd = MatMultAdd_SeqAIJ; 50 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 51 B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ; 52 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 53 B->ops->ptap = MatPtAP_SeqAIJ_SeqAIJ; 54 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 55 B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ; 56 57 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 58 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 59 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 60 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 61 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 62 if(!aijmkl->no_SpMV2) { 63 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 64 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 65 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 66 #endif 67 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 68 } 69 70 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 71 * simply involves destroying the MKL sparse matrix handle and then freeing 72 * the spptr pointer. */ 73 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 74 75 if (aijmkl->sparse_optimized) { 76 sparse_status_t stat; 77 stat = mkl_sparse_destroy(aijmkl->csrA); 78 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 79 } 80 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 81 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 82 83 /* Change the type of B to MATSEQAIJ. */ 84 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 85 86 *newmat = B; 87 PetscFunctionReturn(0); 88 } 89 90 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 91 { 92 PetscErrorCode ierr; 93 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 94 95 PetscFunctionBegin; 96 97 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 98 * spptr pointer. */ 99 if (aijmkl) { 100 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 101 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 102 if (aijmkl->sparse_optimized) { 103 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 104 stat = mkl_sparse_destroy(aijmkl->csrA); 105 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 106 } 107 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 108 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 109 } 110 111 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 112 * to destroy everything that remains. */ 113 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 114 /* Note that I don't call MatSetType(). I believe this is because that 115 * is only to be called when *building* a matrix. I could be wrong, but 116 * that is how things work for the SuperLU matrix class. */ 117 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 122 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 123 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 124 * handle, creates a new one, and then calls mkl_sparse_optimize(). 125 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 126 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 127 * an unoptimized matrix handle here. */ 128 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 129 { 130 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 131 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 132 * does nothing. We make it callable anyway in this case because it cuts 133 * down on littering the code with #ifdefs. */ 134 PetscFunctionBegin; 135 PetscFunctionReturn(0); 136 #else 137 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 138 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 139 PetscInt m,n; 140 MatScalar *aa; 141 PetscInt *aj,*ai; 142 sparse_status_t stat; 143 PetscErrorCode ierr; 144 145 PetscFunctionBegin; 146 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 147 148 if (aijmkl->sparse_optimized) { 149 /* Matrix has been previously assembled and optimized. Must destroy old 150 * matrix handle before running the optimization step again. */ 151 stat = mkl_sparse_destroy(aijmkl->csrA); 152 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 153 } 154 aijmkl->sparse_optimized = PETSC_FALSE; 155 156 /* Now perform the SpMV2 setup and matrix optimization. */ 157 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 158 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 159 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 160 m = A->rmap->n; 161 n = A->cmap->n; 162 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 163 aa = a->a; /* Nonzero elements stored row-by-row. */ 164 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 165 if ((a->nz!=0) & !(A->structure_only)) { 166 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 167 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 168 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 169 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle"); 170 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 171 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 172 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 173 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 174 stat = mkl_sparse_optimize(aijmkl->csrA); 175 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 176 aijmkl->sparse_optimized = PETSC_TRUE; 177 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 178 } 179 180 PetscFunctionReturn(0); 181 #endif 182 } 183 184 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 185 * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 186 * matrix handle. 187 * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as 188 * there is no good alternative. */ 189 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 190 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat) 191 { 192 PetscErrorCode ierr; 193 sparse_status_t stat; 194 sparse_index_base_t indexing; 195 PetscInt nrows, ncols; 196 PetscInt *aj,*ai,*dummy; 197 MatScalar *aa; 198 Mat A; 199 Mat_SeqAIJMKL *aijmkl; 200 201 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 202 stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 203 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 204 205 if (reuse == MAT_REUSE_MATRIX) { 206 ierr = MatDestroy(mat);CHKERRQ(ierr); 207 } 208 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 209 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 210 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 211 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 212 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 213 * they will be destroyed when the MKL handle is destroyed. 214 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 215 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 216 217 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 218 * Now turn it into a MATSEQAIJMKL. */ 219 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 220 221 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 222 aijmkl->csrA = csrA; 223 224 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 225 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 226 * and just need to be able to run the MKL optimization step. */ 227 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 228 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 229 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 230 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 231 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 232 stat = mkl_sparse_optimize(aijmkl->csrA); 233 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 234 aijmkl->sparse_optimized = PETSC_TRUE; 235 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 236 237 *mat = A; 238 PetscFunctionReturn(0); 239 } 240 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 241 242 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 243 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 244 * MatMatMultNumeric(). */ 245 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 246 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 247 { 248 PetscInt i; 249 PetscInt nrows,ncols; 250 PetscInt nz; 251 PetscInt *ai,*aj,*dummy; 252 PetscScalar *aa; 253 PetscErrorCode ierr; 254 Mat_SeqAIJMKL *aijmkl; 255 sparse_status_t stat; 256 sparse_index_base_t indexing; 257 258 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 259 260 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 261 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 262 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 263 264 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 265 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 266 for (i=0; i<nrows; i++) { 267 nz = ai[i+1] - ai[i]; 268 ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr); 269 } 270 271 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 272 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 273 274 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 275 /* We mark our matrix as having a valid, optimized MKL handle. 276 * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */ 277 aijmkl->sparse_optimized = PETSC_TRUE; 278 279 PetscFunctionReturn(0); 280 } 281 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 282 283 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 284 { 285 PetscErrorCode ierr; 286 Mat_SeqAIJMKL *aijmkl; 287 Mat_SeqAIJMKL *aijmkl_dest; 288 289 PetscFunctionBegin; 290 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 291 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 292 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 293 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 294 aijmkl_dest->sparse_optimized = PETSC_FALSE; 295 if (aijmkl->eager_inspection) { 296 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 297 } 298 PetscFunctionReturn(0); 299 } 300 301 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 302 { 303 PetscErrorCode ierr; 304 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 305 Mat_SeqAIJMKL *aijmkl; 306 307 PetscFunctionBegin; 308 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 309 310 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 311 * extra information and some different methods, call the AssemblyEnd 312 * routine for a MATSEQAIJ. 313 * I'm not sure if this is the best way to do this, but it avoids 314 * a lot of code duplication. */ 315 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 316 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 317 318 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 319 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 320 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 321 if (aijmkl->eager_inspection) { 322 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 323 } 324 325 PetscFunctionReturn(0); 326 } 327 328 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 329 { 330 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 331 const PetscScalar *x; 332 PetscScalar *y; 333 const MatScalar *aa; 334 PetscErrorCode ierr; 335 PetscInt m=A->rmap->n; 336 PetscInt n=A->cmap->n; 337 PetscScalar alpha = 1.0; 338 PetscScalar beta = 0.0; 339 const PetscInt *aj,*ai; 340 char matdescra[6]; 341 342 343 /* Variables not in MatMult_SeqAIJ. */ 344 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 345 346 PetscFunctionBegin; 347 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 348 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 349 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 350 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 351 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 352 aa = a->a; /* Nonzero elements stored row-by-row. */ 353 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 354 355 /* Call MKL sparse BLAS routine to do the MatMult. */ 356 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 357 358 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 359 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 360 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 361 PetscFunctionReturn(0); 362 } 363 364 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 365 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 366 { 367 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 368 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 369 const PetscScalar *x; 370 PetscScalar *y; 371 PetscErrorCode ierr; 372 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 373 PetscObjectState state; 374 375 PetscFunctionBegin; 376 377 /* If there are no nonzero entries, zero yy and return immediately. */ 378 if(!a->nz) { 379 PetscInt i; 380 PetscInt m=A->rmap->n; 381 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 382 for (i=0; i<m; i++) { 383 y[i] = 0.0; 384 } 385 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 390 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 391 392 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 393 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 394 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 395 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 396 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 397 MatSeqAIJMKL_create_mkl_handle(A); 398 } 399 400 /* Call MKL SpMV2 executor routine to do the MatMult. */ 401 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 402 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 403 404 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 405 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 406 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 407 PetscFunctionReturn(0); 408 } 409 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 410 411 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 412 { 413 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 414 const PetscScalar *x; 415 PetscScalar *y; 416 const MatScalar *aa; 417 PetscErrorCode ierr; 418 PetscInt m=A->rmap->n; 419 PetscInt n=A->cmap->n; 420 PetscScalar alpha = 1.0; 421 PetscScalar beta = 0.0; 422 const PetscInt *aj,*ai; 423 char matdescra[6]; 424 425 /* Variables not in MatMultTranspose_SeqAIJ. */ 426 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 427 428 PetscFunctionBegin; 429 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 430 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 431 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 432 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 433 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 434 aa = a->a; /* Nonzero elements stored row-by-row. */ 435 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 436 437 /* Call MKL sparse BLAS routine to do the MatMult. */ 438 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 439 440 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 441 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 442 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 443 PetscFunctionReturn(0); 444 } 445 446 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 447 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 448 { 449 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 450 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 451 const PetscScalar *x; 452 PetscScalar *y; 453 PetscErrorCode ierr; 454 sparse_status_t stat; 455 PetscObjectState state; 456 457 PetscFunctionBegin; 458 459 /* If there are no nonzero entries, zero yy and return immediately. */ 460 if(!a->nz) { 461 PetscInt i; 462 PetscInt n=A->cmap->n; 463 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 464 for (i=0; i<n; i++) { 465 y[i] = 0.0; 466 } 467 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 468 PetscFunctionReturn(0); 469 } 470 471 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 472 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 473 474 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 475 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 476 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 477 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 478 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 479 MatSeqAIJMKL_create_mkl_handle(A); 480 } 481 482 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 483 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 484 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 485 486 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 487 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 488 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 489 PetscFunctionReturn(0); 490 } 491 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 492 493 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 494 { 495 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 496 const PetscScalar *x; 497 PetscScalar *y,*z; 498 const MatScalar *aa; 499 PetscErrorCode ierr; 500 PetscInt m=A->rmap->n; 501 PetscInt n=A->cmap->n; 502 const PetscInt *aj,*ai; 503 PetscInt i; 504 505 /* Variables not in MatMultAdd_SeqAIJ. */ 506 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 507 PetscScalar alpha = 1.0; 508 PetscScalar beta; 509 char matdescra[6]; 510 511 PetscFunctionBegin; 512 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 513 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 514 515 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 516 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 517 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 518 aa = a->a; /* Nonzero elements stored row-by-row. */ 519 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 520 521 /* Call MKL sparse BLAS routine to do the MatMult. */ 522 if (zz == yy) { 523 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 524 beta = 1.0; 525 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 526 } else { 527 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 528 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 529 beta = 0.0; 530 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 531 for (i=0; i<m; i++) { 532 z[i] += y[i]; 533 } 534 } 535 536 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 537 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 538 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 539 PetscFunctionReturn(0); 540 } 541 542 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 543 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 544 { 545 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 546 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 547 const PetscScalar *x; 548 PetscScalar *y,*z; 549 PetscErrorCode ierr; 550 PetscInt m=A->rmap->n; 551 PetscInt i; 552 553 /* Variables not in MatMultAdd_SeqAIJ. */ 554 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 555 PetscObjectState state; 556 557 PetscFunctionBegin; 558 559 /* If there are no nonzero entries, set zz = yy and return immediately. */ 560 if(!a->nz) { 561 PetscInt i; 562 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 563 for (i=0; i<m; i++) { 564 z[i] = y[i]; 565 } 566 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 567 PetscFunctionReturn(0); 568 } 569 570 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 571 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 572 573 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 574 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 575 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 576 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 577 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 578 MatSeqAIJMKL_create_mkl_handle(A); 579 } 580 581 /* Call MKL sparse BLAS routine to do the MatMult. */ 582 if (zz == yy) { 583 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 584 * with alpha and beta both set to 1.0. */ 585 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 586 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 587 } else { 588 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 589 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 590 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 591 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 592 for (i=0; i<m; i++) { 593 z[i] += y[i]; 594 } 595 } 596 597 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 598 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 599 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 600 PetscFunctionReturn(0); 601 } 602 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 603 604 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 605 { 606 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 607 const PetscScalar *x; 608 PetscScalar *y,*z; 609 const MatScalar *aa; 610 PetscErrorCode ierr; 611 PetscInt m=A->rmap->n; 612 PetscInt n=A->cmap->n; 613 const PetscInt *aj,*ai; 614 PetscInt i; 615 616 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 617 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 618 PetscScalar alpha = 1.0; 619 PetscScalar beta; 620 char matdescra[6]; 621 622 PetscFunctionBegin; 623 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 624 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 625 626 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 627 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 628 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 629 aa = a->a; /* Nonzero elements stored row-by-row. */ 630 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 631 632 /* Call MKL sparse BLAS routine to do the MatMult. */ 633 if (zz == yy) { 634 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 635 beta = 1.0; 636 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 637 } else { 638 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 639 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 640 beta = 0.0; 641 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 642 for (i=0; i<n; i++) { 643 z[i] += y[i]; 644 } 645 } 646 647 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 648 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 649 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 654 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 655 { 656 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 657 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 658 const PetscScalar *x; 659 PetscScalar *y,*z; 660 PetscErrorCode ierr; 661 PetscInt n=A->cmap->n; 662 PetscInt i; 663 PetscObjectState state; 664 665 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 666 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 667 668 PetscFunctionBegin; 669 670 /* If there are no nonzero entries, set zz = yy and return immediately. */ 671 if(!a->nz) { 672 PetscInt i; 673 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 674 for (i=0; i<n; i++) { 675 z[i] = y[i]; 676 } 677 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 678 PetscFunctionReturn(0); 679 } 680 681 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 682 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 683 684 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 685 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 686 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 687 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 688 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 689 MatSeqAIJMKL_create_mkl_handle(A); 690 } 691 692 /* Call MKL sparse BLAS routine to do the MatMult. */ 693 if (zz == yy) { 694 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 695 * with alpha and beta both set to 1.0. */ 696 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 697 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 698 } else { 699 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 700 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 701 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 702 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 703 for (i=0; i<n; i++) { 704 z[i] += y[i]; 705 } 706 } 707 708 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 709 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 710 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 711 PetscFunctionReturn(0); 712 } 713 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 714 715 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 716 /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because 717 * the MatMatMult() interface code calls MatMatMultNumeric() in this case. 718 * For releases of MKL prior to version 18, update 2: 719 * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 720 * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 721 * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 722 * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */ 723 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 724 { 725 Mat_SeqAIJMKL *a, *b; 726 sparse_matrix_t csrA, csrB, csrC; 727 PetscErrorCode ierr; 728 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 729 PetscObjectState state; 730 731 PetscFunctionBegin; 732 a = (Mat_SeqAIJMKL*)A->spptr; 733 b = (Mat_SeqAIJMKL*)B->spptr; 734 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 735 if (!a->sparse_optimized || a->state != state) { 736 MatSeqAIJMKL_create_mkl_handle(A); 737 } 738 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 739 if (!b->sparse_optimized || b->state != state) { 740 MatSeqAIJMKL_create_mkl_handle(B); 741 } 742 csrA = a->csrA; 743 csrB = b->csrA; 744 745 stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC); 746 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 747 748 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 749 750 PetscFunctionReturn(0); 751 } 752 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 753 754 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 755 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C) 756 { 757 Mat_SeqAIJMKL *a, *b, *c; 758 sparse_matrix_t csrA, csrB, csrC; 759 PetscErrorCode ierr; 760 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 761 struct matrix_descr descr_type_gen; 762 PetscObjectState state; 763 764 PetscFunctionBegin; 765 a = (Mat_SeqAIJMKL*)A->spptr; 766 b = (Mat_SeqAIJMKL*)B->spptr; 767 c = (Mat_SeqAIJMKL*)C->spptr; 768 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 769 if (!a->sparse_optimized || a->state != state) { 770 MatSeqAIJMKL_create_mkl_handle(A); 771 } 772 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 773 if (!b->sparse_optimized || b->state != state) { 774 MatSeqAIJMKL_create_mkl_handle(B); 775 } 776 csrA = a->csrA; 777 csrB = b->csrA; 778 csrC = c->csrA; 779 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 780 781 stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA, 782 SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB, 783 SPARSE_STAGE_FINALIZE_MULT,&csrC); 784 785 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply"); 786 787 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 788 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 789 790 PetscFunctionReturn(0); 791 } 792 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M */ 793 794 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 795 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 796 { 797 Mat_SeqAIJMKL *a, *b; 798 sparse_matrix_t csrA, csrB, csrC; 799 PetscErrorCode ierr; 800 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 801 PetscObjectState state; 802 803 PetscFunctionBegin; 804 a = (Mat_SeqAIJMKL*)A->spptr; 805 b = (Mat_SeqAIJMKL*)B->spptr; 806 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 807 if (!a->sparse_optimized || a->state != state) { 808 MatSeqAIJMKL_create_mkl_handle(A); 809 } 810 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 811 if (!b->sparse_optimized || b->state != state) { 812 MatSeqAIJMKL_create_mkl_handle(B); 813 } 814 csrA = a->csrA; 815 csrB = b->csrA; 816 817 stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC); 818 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 819 820 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 821 822 PetscFunctionReturn(0); 823 } 824 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 825 826 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 827 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C) 828 { 829 Mat_SeqAIJMKL *a, *p, *c; 830 sparse_matrix_t csrA, csrP, csrC; 831 PetscBool set, flag; 832 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 833 struct matrix_descr descr_type_gen; 834 PetscObjectState state; 835 PetscErrorCode ierr; 836 837 PetscFunctionBegin; 838 ierr = MatIsSymmetricKnown(A,&set,&flag); 839 if (!set || (set && !flag)) { 840 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr); 841 PetscFunctionReturn(0); 842 } 843 844 a = (Mat_SeqAIJMKL*)A->spptr; 845 p = (Mat_SeqAIJMKL*)P->spptr; 846 c = (Mat_SeqAIJMKL*)C->spptr; 847 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 848 if (!a->sparse_optimized || a->state != state) { 849 MatSeqAIJMKL_create_mkl_handle(A); 850 } 851 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 852 if (!p->sparse_optimized || p->state != state) { 853 MatSeqAIJMKL_create_mkl_handle(P); 854 } 855 csrA = a->csrA; 856 csrP = p->csrA; 857 csrC = c->csrA; 858 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 859 860 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 861 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_gen,&csrC,SPARSE_STAGE_FINALIZE_MULT); 862 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr"); 863 864 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 865 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 866 867 PetscFunctionReturn(0); 868 } 869 #endif 870 871 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 872 PetscErrorCode MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 873 { 874 Mat_SeqAIJMKL *a, *p; 875 sparse_matrix_t csrA, csrP, csrC; 876 PetscBool set, flag; 877 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 878 struct matrix_descr descr_type_gen; 879 PetscObjectState state; 880 PetscErrorCode ierr; 881 882 PetscFunctionBegin; 883 ierr = MatIsSymmetricKnown(A,&set,&flag); 884 if (!set || (set && !flag)) { 885 ierr = MatPtAP_SeqAIJ_SeqAIJ(A,P,scall,fill,C);CHKERRQ(ierr); 886 PetscFunctionReturn(0); 887 } 888 889 if (scall == MAT_REUSE_MATRIX) { 890 ierr = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(A,P,*C);CHKERRQ(ierr); 891 PetscFunctionReturn(0); 892 } 893 894 a = (Mat_SeqAIJMKL*)A->spptr; 895 p = (Mat_SeqAIJMKL*)P->spptr; 896 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 897 if (!a->sparse_optimized || a->state != state) { 898 MatSeqAIJMKL_create_mkl_handle(A); 899 } 900 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 901 if (!p->sparse_optimized || p->state != state) { 902 MatSeqAIJMKL_create_mkl_handle(P); 903 } 904 csrA = a->csrA; 905 csrP = p->csrA; 906 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 907 908 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 909 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_gen,&csrC,SPARSE_STAGE_FULL_MULT); 910 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete full mkl_sparse_sypr"); 911 912 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 913 ierr = MatSetOption(*C,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 914 915 PetscFunctionReturn(0); 916 } 917 #endif 918 919 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 920 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 921 * routine, but can also be used to convert an assembled SeqAIJ matrix 922 * into a SeqAIJMKL one. */ 923 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 924 { 925 PetscErrorCode ierr; 926 Mat B = *newmat; 927 Mat_SeqAIJMKL *aijmkl; 928 PetscBool set; 929 PetscBool sametype; 930 931 PetscFunctionBegin; 932 if (reuse == MAT_INITIAL_MATRIX) { 933 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 934 } 935 936 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 937 if (sametype) PetscFunctionReturn(0); 938 939 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 940 B->spptr = (void*) aijmkl; 941 942 /* Set function pointers for methods that we inherit from AIJ but override. 943 * We also parse some command line options below, since those determine some of the methods we point to. */ 944 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 945 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 946 B->ops->destroy = MatDestroy_SeqAIJMKL; 947 948 aijmkl->sparse_optimized = PETSC_FALSE; 949 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 950 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 951 #else 952 aijmkl->no_SpMV2 = PETSC_TRUE; 953 #endif 954 aijmkl->eager_inspection = PETSC_FALSE; 955 956 /* Parse command line options. */ 957 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 958 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 959 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 960 ierr = PetscOptionsEnd();CHKERRQ(ierr); 961 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 962 if(!aijmkl->no_SpMV2) { 963 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 964 aijmkl->no_SpMV2 = PETSC_TRUE; 965 } 966 #endif 967 968 if(!aijmkl->no_SpMV2) { 969 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 970 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 971 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 972 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 973 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 974 B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 975 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 976 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 977 #ifndef PETSC_USE_COMPLEX 978 B->ops->ptap = MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2; 979 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 980 #endif 981 #endif 982 B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 983 #endif 984 } else { 985 B->ops->mult = MatMult_SeqAIJMKL; 986 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 987 B->ops->multadd = MatMultAdd_SeqAIJMKL; 988 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 989 } 990 991 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 992 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 993 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 994 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 995 if(!aijmkl->no_SpMV2) { 996 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 997 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 998 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M 999 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1000 #endif 1001 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1002 #endif 1003 } 1004 1005 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 1006 *newmat = B; 1007 PetscFunctionReturn(0); 1008 } 1009 1010 /*@C 1011 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 1012 This type inherits from AIJ and is largely identical, but uses sparse BLAS 1013 routines from Intel MKL whenever possible. 1014 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, and MatTransposeMatMult 1015 operations are currently supported. 1016 If the installed version of MKL supports the "SpMV2" sparse 1017 inspector-executor routines, then those are used by default. 1018 1019 Collective on MPI_Comm 1020 1021 Input Parameters: 1022 + comm - MPI communicator, set to PETSC_COMM_SELF 1023 . m - number of rows 1024 . n - number of columns 1025 . nz - number of nonzeros per row (same for all rows) 1026 - nnz - array containing the number of nonzeros in the various rows 1027 (possibly different for each row) or NULL 1028 1029 Output Parameter: 1030 . A - the matrix 1031 1032 Options Database Keys: 1033 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1034 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 1035 1036 Notes: 1037 If nnz is given then nz is ignored 1038 1039 Level: intermediate 1040 1041 .keywords: matrix, MKL, sparse, parallel 1042 1043 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 1044 @*/ 1045 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1046 { 1047 PetscErrorCode ierr; 1048 1049 PetscFunctionBegin; 1050 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1051 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1052 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 1053 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 1054 PetscFunctionReturn(0); 1055 } 1056 1057 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1058 { 1059 PetscErrorCode ierr; 1060 1061 PetscFunctionBegin; 1062 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1063 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066