1 2 /* 3 Defines basic operations for the MATSEQAIJMKL matrix class. 4 This class is derived from the MATSEQAIJ class and retains the 5 compressed row storage (aka Yale sparse matrix format) but uses 6 sparse BLAS operations from the Intel Math Kernel Library (MKL) 7 wherever possible. 8 */ 9 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12 #include <mkl_spblas.h> 13 14 typedef struct { 15 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 16 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 PetscObjectState state; 19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 28 { 29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 30 /* so we will ignore 'MatType type'. */ 31 PetscErrorCode ierr; 32 Mat B = *newmat; 33 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 34 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 35 #endif 36 37 PetscFunctionBegin; 38 if (reuse == MAT_INITIAL_MATRIX) { 39 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 45 B->ops->destroy = MatDestroy_SeqAIJ; 46 B->ops->mult = MatMult_SeqAIJ; 47 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 48 B->ops->multadd = MatMultAdd_SeqAIJ; 49 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50 B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ; 51 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 52 B->ops->ptap = MatPtAP_SeqAIJ_SeqAIJ; 53 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 54 B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ; 55 56 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 57 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 58 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 59 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 60 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqaijmkl_C",NULL);CHKERRQ(ierr); 61 #if defined(PETSC_HAVE_HYPRE) 62 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 63 #endif 64 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 65 if(!aijmkl->no_SpMV2) { 66 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 67 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 68 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 69 #endif 70 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 71 } 72 73 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 74 * simply involves destroying the MKL sparse matrix handle and then freeing 75 * the spptr pointer. */ 76 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 77 78 if (aijmkl->sparse_optimized) { 79 sparse_status_t stat; 80 stat = mkl_sparse_destroy(aijmkl->csrA); 81 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 82 } 83 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 84 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 85 86 /* Change the type of B to MATSEQAIJ. */ 87 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 88 89 *newmat = B; 90 PetscFunctionReturn(0); 91 } 92 93 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 94 { 95 PetscErrorCode ierr; 96 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 97 98 PetscFunctionBegin; 99 100 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 101 * spptr pointer. */ 102 if (aijmkl) { 103 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 104 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 105 if (aijmkl->sparse_optimized) { 106 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 107 stat = mkl_sparse_destroy(aijmkl->csrA); 108 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 109 } 110 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 111 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 112 } 113 114 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 115 * to destroy everything that remains. */ 116 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 117 /* Note that I don't call MatSetType(). I believe this is because that 118 * is only to be called when *building* a matrix. I could be wrong, but 119 * that is how things work for the SuperLU matrix class. */ 120 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 125 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 126 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 127 * handle, creates a new one, and then calls mkl_sparse_optimize(). 128 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 129 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 130 * an unoptimized matrix handle here. */ 131 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 132 { 133 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 134 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 135 * does nothing. We make it callable anyway in this case because it cuts 136 * down on littering the code with #ifdefs. */ 137 PetscFunctionBegin; 138 PetscFunctionReturn(0); 139 #else 140 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 141 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 142 PetscInt m,n; 143 MatScalar *aa; 144 PetscInt *aj,*ai; 145 sparse_status_t stat; 146 PetscErrorCode ierr; 147 148 PetscFunctionBegin; 149 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 150 /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 151 * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 152 * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 153 * mkl_sparse_optimize() later. */ 154 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 155 #endif 156 157 if (aijmkl->sparse_optimized) { 158 /* Matrix has been previously assembled and optimized. Must destroy old 159 * matrix handle before running the optimization step again. */ 160 stat = mkl_sparse_destroy(aijmkl->csrA); 161 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 162 } 163 aijmkl->sparse_optimized = PETSC_FALSE; 164 165 /* Now perform the SpMV2 setup and matrix optimization. */ 166 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 167 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 168 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 169 m = A->rmap->n; 170 n = A->cmap->n; 171 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 172 aa = a->a; /* Nonzero elements stored row-by-row. */ 173 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 174 if ((a->nz!=0) & !(A->structure_only)) { 175 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 176 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 177 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 178 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle"); 179 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 180 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 181 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 182 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 183 if (!aijmkl->no_SpMV2) { 184 stat = mkl_sparse_optimize(aijmkl->csrA); 185 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 186 } 187 aijmkl->sparse_optimized = PETSC_TRUE; 188 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 189 } 190 191 PetscFunctionReturn(0); 192 #endif 193 } 194 195 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 196 * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 197 * matrix handle. 198 * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as 199 * there is no good alternative. */ 200 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 201 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat) 202 { 203 PetscErrorCode ierr; 204 sparse_status_t stat; 205 sparse_index_base_t indexing; 206 PetscInt nrows, ncols; 207 PetscInt *aj,*ai,*dummy; 208 MatScalar *aa; 209 Mat A; 210 Mat_SeqAIJMKL *aijmkl; 211 212 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 213 stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 214 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 215 216 if (reuse == MAT_REUSE_MATRIX) { 217 ierr = MatDestroy(mat);CHKERRQ(ierr); 218 } 219 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 220 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 221 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 222 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 223 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 224 * they will be destroyed when the MKL handle is destroyed. 225 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 226 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 227 228 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 229 * Now turn it into a MATSEQAIJMKL. */ 230 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 231 232 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 233 aijmkl->csrA = csrA; 234 235 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 236 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 237 * and just need to be able to run the MKL optimization step. */ 238 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 239 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 240 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 241 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 242 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 243 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 244 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 245 if (!aijmkl->no_SpMV2) { 246 stat = mkl_sparse_optimize(aijmkl->csrA); 247 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 248 } 249 aijmkl->sparse_optimized = PETSC_TRUE; 250 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 251 252 *mat = A; 253 PetscFunctionReturn(0); 254 } 255 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 256 257 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 258 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 259 * MatMatMultNumeric(). */ 260 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 261 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 262 { 263 PetscInt i; 264 PetscInt nrows,ncols; 265 PetscInt nz; 266 PetscInt *ai,*aj,*dummy; 267 PetscScalar *aa; 268 PetscErrorCode ierr; 269 Mat_SeqAIJMKL *aijmkl; 270 sparse_status_t stat; 271 sparse_index_base_t indexing; 272 273 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 274 275 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 276 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 277 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 278 279 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 280 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 281 for (i=0; i<nrows; i++) { 282 nz = ai[i+1] - ai[i]; 283 ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr); 284 } 285 286 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 287 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 288 289 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 290 /* We mark our matrix as having a valid, optimized MKL handle. 291 * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */ 292 aijmkl->sparse_optimized = PETSC_TRUE; 293 294 PetscFunctionReturn(0); 295 } 296 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 297 298 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 299 { 300 PetscErrorCode ierr; 301 Mat_SeqAIJMKL *aijmkl; 302 Mat_SeqAIJMKL *aijmkl_dest; 303 304 PetscFunctionBegin; 305 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 306 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 307 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 308 ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr); 309 aijmkl_dest->sparse_optimized = PETSC_FALSE; 310 if (aijmkl->eager_inspection) { 311 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 312 } 313 PetscFunctionReturn(0); 314 } 315 316 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 317 { 318 PetscErrorCode ierr; 319 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 320 Mat_SeqAIJMKL *aijmkl; 321 322 PetscFunctionBegin; 323 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 324 325 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 326 * extra information and some different methods, call the AssemblyEnd 327 * routine for a MATSEQAIJ. 328 * I'm not sure if this is the best way to do this, but it avoids 329 * a lot of code duplication. */ 330 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 331 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 332 333 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 334 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 335 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 336 if (aijmkl->eager_inspection) { 337 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 338 } 339 340 PetscFunctionReturn(0); 341 } 342 343 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 344 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 345 { 346 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 347 const PetscScalar *x; 348 PetscScalar *y; 349 const MatScalar *aa; 350 PetscErrorCode ierr; 351 PetscInt m=A->rmap->n; 352 PetscInt n=A->cmap->n; 353 PetscScalar alpha = 1.0; 354 PetscScalar beta = 0.0; 355 const PetscInt *aj,*ai; 356 char matdescra[6]; 357 358 359 /* Variables not in MatMult_SeqAIJ. */ 360 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 361 362 PetscFunctionBegin; 363 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 364 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 365 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 366 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 367 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 368 aa = a->a; /* Nonzero elements stored row-by-row. */ 369 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 370 371 /* Call MKL sparse BLAS routine to do the MatMult. */ 372 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 373 374 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 375 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 376 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 377 PetscFunctionReturn(0); 378 } 379 #endif 380 381 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 382 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 383 { 384 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 385 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 386 const PetscScalar *x; 387 PetscScalar *y; 388 PetscErrorCode ierr; 389 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 390 PetscObjectState state; 391 392 PetscFunctionBegin; 393 394 /* If there are no nonzero entries, zero yy and return immediately. */ 395 if(!a->nz) { 396 PetscInt i; 397 PetscInt m=A->rmap->n; 398 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 399 for (i=0; i<m; i++) { 400 y[i] = 0.0; 401 } 402 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 403 PetscFunctionReturn(0); 404 } 405 406 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 407 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 408 409 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 410 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 411 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 412 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 413 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 414 MatSeqAIJMKL_create_mkl_handle(A); 415 } 416 417 /* Call MKL SpMV2 executor routine to do the MatMult. */ 418 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 419 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 420 421 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 422 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 423 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 424 PetscFunctionReturn(0); 425 } 426 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 427 428 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 429 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 430 { 431 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 432 const PetscScalar *x; 433 PetscScalar *y; 434 const MatScalar *aa; 435 PetscErrorCode ierr; 436 PetscInt m=A->rmap->n; 437 PetscInt n=A->cmap->n; 438 PetscScalar alpha = 1.0; 439 PetscScalar beta = 0.0; 440 const PetscInt *aj,*ai; 441 char matdescra[6]; 442 443 /* Variables not in MatMultTranspose_SeqAIJ. */ 444 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 445 446 PetscFunctionBegin; 447 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 448 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 449 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 450 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 451 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 452 aa = a->a; /* Nonzero elements stored row-by-row. */ 453 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 454 455 /* Call MKL sparse BLAS routine to do the MatMult. */ 456 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 457 458 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 459 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 460 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 #endif 464 465 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 466 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 467 { 468 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 469 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 470 const PetscScalar *x; 471 PetscScalar *y; 472 PetscErrorCode ierr; 473 sparse_status_t stat; 474 PetscObjectState state; 475 476 PetscFunctionBegin; 477 478 /* If there are no nonzero entries, zero yy and return immediately. */ 479 if(!a->nz) { 480 PetscInt i; 481 PetscInt n=A->cmap->n; 482 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 483 for (i=0; i<n; i++) { 484 y[i] = 0.0; 485 } 486 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 487 PetscFunctionReturn(0); 488 } 489 490 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 491 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 492 493 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 494 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 495 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 496 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 497 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 498 MatSeqAIJMKL_create_mkl_handle(A); 499 } 500 501 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 502 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 503 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 504 505 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 506 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 507 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 508 PetscFunctionReturn(0); 509 } 510 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 511 512 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 513 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 514 { 515 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 516 const PetscScalar *x; 517 PetscScalar *y,*z; 518 const MatScalar *aa; 519 PetscErrorCode ierr; 520 PetscInt m=A->rmap->n; 521 PetscInt n=A->cmap->n; 522 const PetscInt *aj,*ai; 523 PetscInt i; 524 525 /* Variables not in MatMultAdd_SeqAIJ. */ 526 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 527 PetscScalar alpha = 1.0; 528 PetscScalar beta; 529 char matdescra[6]; 530 531 PetscFunctionBegin; 532 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 533 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 534 535 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 536 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 537 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 538 aa = a->a; /* Nonzero elements stored row-by-row. */ 539 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 540 541 /* Call MKL sparse BLAS routine to do the MatMult. */ 542 if (zz == yy) { 543 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 544 beta = 1.0; 545 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 546 } else { 547 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 548 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 549 beta = 0.0; 550 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 551 for (i=0; i<m; i++) { 552 z[i] += y[i]; 553 } 554 } 555 556 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 557 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 558 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 559 PetscFunctionReturn(0); 560 } 561 #endif 562 563 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 564 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 565 { 566 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 567 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 568 const PetscScalar *x; 569 PetscScalar *y,*z; 570 PetscErrorCode ierr; 571 PetscInt m=A->rmap->n; 572 PetscInt i; 573 574 /* Variables not in MatMultAdd_SeqAIJ. */ 575 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 576 PetscObjectState state; 577 578 PetscFunctionBegin; 579 580 /* If there are no nonzero entries, set zz = yy and return immediately. */ 581 if(!a->nz) { 582 PetscInt i; 583 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 584 for (i=0; i<m; i++) { 585 z[i] = y[i]; 586 } 587 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 588 PetscFunctionReturn(0); 589 } 590 591 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 592 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 593 594 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 595 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 596 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 597 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 598 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 599 MatSeqAIJMKL_create_mkl_handle(A); 600 } 601 602 /* Call MKL sparse BLAS routine to do the MatMult. */ 603 if (zz == yy) { 604 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 605 * with alpha and beta both set to 1.0. */ 606 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 607 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 608 } else { 609 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 610 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 611 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 612 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 613 for (i=0; i<m; i++) { 614 z[i] += y[i]; 615 } 616 } 617 618 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 619 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 620 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 621 PetscFunctionReturn(0); 622 } 623 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 624 625 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 626 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 627 { 628 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 629 const PetscScalar *x; 630 PetscScalar *y,*z; 631 const MatScalar *aa; 632 PetscErrorCode ierr; 633 PetscInt m=A->rmap->n; 634 PetscInt n=A->cmap->n; 635 const PetscInt *aj,*ai; 636 PetscInt i; 637 638 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 639 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 640 PetscScalar alpha = 1.0; 641 PetscScalar beta; 642 char matdescra[6]; 643 644 PetscFunctionBegin; 645 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 646 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 647 648 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 649 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 650 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 651 aa = a->a; /* Nonzero elements stored row-by-row. */ 652 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 653 654 /* Call MKL sparse BLAS routine to do the MatMult. */ 655 if (zz == yy) { 656 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 657 beta = 1.0; 658 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 659 } else { 660 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 661 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 662 beta = 0.0; 663 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 664 for (i=0; i<n; i++) { 665 z[i] += y[i]; 666 } 667 } 668 669 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 670 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 671 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 672 PetscFunctionReturn(0); 673 } 674 #endif 675 676 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 677 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 678 { 679 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 680 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 681 const PetscScalar *x; 682 PetscScalar *y,*z; 683 PetscErrorCode ierr; 684 PetscInt n=A->cmap->n; 685 PetscInt i; 686 PetscObjectState state; 687 688 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 689 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 690 691 PetscFunctionBegin; 692 693 /* If there are no nonzero entries, set zz = yy and return immediately. */ 694 if(!a->nz) { 695 PetscInt i; 696 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 697 for (i=0; i<n; i++) { 698 z[i] = y[i]; 699 } 700 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 705 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 706 707 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 708 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 709 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 710 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 711 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 712 MatSeqAIJMKL_create_mkl_handle(A); 713 } 714 715 /* Call MKL sparse BLAS routine to do the MatMult. */ 716 if (zz == yy) { 717 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 718 * with alpha and beta both set to 1.0. */ 719 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 720 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 721 } else { 722 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 723 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 724 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 725 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 726 for (i=0; i<n; i++) { 727 z[i] += y[i]; 728 } 729 } 730 731 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 732 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 733 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 734 PetscFunctionReturn(0); 735 } 736 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 737 738 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 739 /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because 740 * the MatMatMult() interface code calls MatMatMultNumeric() in this case. 741 * For releases of MKL prior to version 18, update 2: 742 * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 743 * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 744 * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 745 * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */ 746 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 747 { 748 Mat_SeqAIJMKL *a, *b; 749 sparse_matrix_t csrA, csrB, csrC; 750 PetscErrorCode ierr; 751 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 752 PetscObjectState state; 753 754 PetscFunctionBegin; 755 a = (Mat_SeqAIJMKL*)A->spptr; 756 b = (Mat_SeqAIJMKL*)B->spptr; 757 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 758 if (!a->sparse_optimized || a->state != state) { 759 MatSeqAIJMKL_create_mkl_handle(A); 760 } 761 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 762 if (!b->sparse_optimized || b->state != state) { 763 MatSeqAIJMKL_create_mkl_handle(B); 764 } 765 csrA = a->csrA; 766 csrB = b->csrA; 767 768 stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC); 769 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 770 771 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 772 773 PetscFunctionReturn(0); 774 } 775 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 776 777 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 778 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C) 779 { 780 Mat_SeqAIJMKL *a, *b, *c; 781 sparse_matrix_t csrA, csrB, csrC; 782 PetscErrorCode ierr; 783 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 784 struct matrix_descr descr_type_gen; 785 PetscObjectState state; 786 787 PetscFunctionBegin; 788 a = (Mat_SeqAIJMKL*)A->spptr; 789 b = (Mat_SeqAIJMKL*)B->spptr; 790 c = (Mat_SeqAIJMKL*)C->spptr; 791 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 792 if (!a->sparse_optimized || a->state != state) { 793 MatSeqAIJMKL_create_mkl_handle(A); 794 } 795 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 796 if (!b->sparse_optimized || b->state != state) { 797 MatSeqAIJMKL_create_mkl_handle(B); 798 } 799 csrA = a->csrA; 800 csrB = b->csrA; 801 csrC = c->csrA; 802 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 803 804 stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA, 805 SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB, 806 SPARSE_STAGE_FINALIZE_MULT,&csrC); 807 808 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply"); 809 810 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 811 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 812 813 PetscFunctionReturn(0); 814 } 815 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 816 817 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 818 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 819 { 820 Mat_SeqAIJMKL *a, *b; 821 sparse_matrix_t csrA, csrB, csrC; 822 PetscErrorCode ierr; 823 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 824 PetscObjectState state; 825 826 PetscFunctionBegin; 827 a = (Mat_SeqAIJMKL*)A->spptr; 828 b = (Mat_SeqAIJMKL*)B->spptr; 829 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 830 if (!a->sparse_optimized || a->state != state) { 831 MatSeqAIJMKL_create_mkl_handle(A); 832 } 833 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 834 if (!b->sparse_optimized || b->state != state) { 835 MatSeqAIJMKL_create_mkl_handle(B); 836 } 837 csrA = a->csrA; 838 csrB = b->csrA; 839 840 stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC); 841 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 842 843 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 844 845 PetscFunctionReturn(0); 846 } 847 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 848 849 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 850 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C) 851 { 852 Mat_SeqAIJMKL *a, *p, *c; 853 sparse_matrix_t csrA, csrP, csrC; 854 PetscBool set, flag; 855 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 856 struct matrix_descr descr_type_sym; 857 PetscObjectState state; 858 PetscErrorCode ierr; 859 860 PetscFunctionBegin; 861 ierr = MatIsSymmetricKnown(A,&set,&flag); 862 if (!set || (set && !flag)) { 863 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr); 864 PetscFunctionReturn(0); 865 } 866 867 a = (Mat_SeqAIJMKL*)A->spptr; 868 p = (Mat_SeqAIJMKL*)P->spptr; 869 c = (Mat_SeqAIJMKL*)C->spptr; 870 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 871 if (!a->sparse_optimized || a->state != state) { 872 MatSeqAIJMKL_create_mkl_handle(A); 873 } 874 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 875 if (!p->sparse_optimized || p->state != state) { 876 MatSeqAIJMKL_create_mkl_handle(P); 877 } 878 csrA = a->csrA; 879 csrP = p->csrA; 880 csrC = c->csrA; 881 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 882 descr_type_sym.mode = SPARSE_FILL_MODE_LOWER; 883 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 884 885 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 886 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT); 887 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr"); 888 889 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 890 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 891 892 PetscFunctionReturn(0); 893 } 894 #endif 895 896 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 897 PetscErrorCode MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 898 { 899 Mat_SeqAIJMKL *a, *p; 900 sparse_matrix_t csrA, csrP, csrC; 901 PetscBool set, flag; 902 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 903 struct matrix_descr descr_type_sym; 904 PetscObjectState state; 905 PetscErrorCode ierr; 906 907 PetscFunctionBegin; 908 ierr = MatIsSymmetricKnown(A,&set,&flag); 909 if (!set || (set && !flag)) { 910 ierr = MatPtAP_SeqAIJ_SeqAIJ(A,P,scall,fill,C);CHKERRQ(ierr); 911 PetscFunctionReturn(0); 912 } 913 914 if (scall == MAT_REUSE_MATRIX) { 915 ierr = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(A,P,*C);CHKERRQ(ierr); 916 PetscFunctionReturn(0); 917 } 918 919 a = (Mat_SeqAIJMKL*)A->spptr; 920 p = (Mat_SeqAIJMKL*)P->spptr; 921 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 922 if (!a->sparse_optimized || a->state != state) { 923 MatSeqAIJMKL_create_mkl_handle(A); 924 } 925 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 926 if (!p->sparse_optimized || p->state != state) { 927 MatSeqAIJMKL_create_mkl_handle(P); 928 } 929 csrA = a->csrA; 930 csrP = p->csrA; 931 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 932 descr_type_sym.mode = SPARSE_FILL_MODE_LOWER; 933 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 934 935 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 936 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT); 937 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete full mkl_sparse_sypr"); 938 939 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 940 ierr = MatSetOption(*C,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 941 942 PetscFunctionReturn(0); 943 } 944 #endif 945 946 /* These function prototypes are needed in MatConvert_SeqAIJ_SeqAIJMKL(), below. */ 947 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 948 #if defined(PETSC_HAVE_HYPRE) 949 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 950 #endif 951 952 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 953 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 954 * routine, but can also be used to convert an assembled SeqAIJ matrix 955 * into a SeqAIJMKL one. */ 956 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 957 { 958 PetscErrorCode ierr; 959 Mat B = *newmat; 960 Mat_SeqAIJMKL *aijmkl; 961 PetscBool set; 962 PetscBool sametype; 963 964 PetscFunctionBegin; 965 if (reuse == MAT_INITIAL_MATRIX) { 966 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 967 } 968 969 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 970 if (sametype) PetscFunctionReturn(0); 971 972 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 973 B->spptr = (void*) aijmkl; 974 975 /* Set function pointers for methods that we inherit from AIJ but override. 976 * We also parse some command line options below, since those determine some of the methods we point to. */ 977 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 978 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 979 B->ops->destroy = MatDestroy_SeqAIJMKL; 980 981 aijmkl->sparse_optimized = PETSC_FALSE; 982 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 983 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 984 #else 985 aijmkl->no_SpMV2 = PETSC_TRUE; 986 #endif 987 aijmkl->eager_inspection = PETSC_FALSE; 988 989 /* Parse command line options. */ 990 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 991 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 992 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 993 ierr = PetscOptionsEnd();CHKERRQ(ierr); 994 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 995 if(!aijmkl->no_SpMV2) { 996 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 997 aijmkl->no_SpMV2 = PETSC_TRUE; 998 } 999 #endif 1000 1001 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1002 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 1003 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 1004 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1005 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 1006 B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 1007 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1008 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 1009 # if !defined(PETSC_USE_COMPLEX) 1010 B->ops->ptap = MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2; 1011 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 1012 # endif 1013 # endif 1014 B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 1015 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1016 1017 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1018 /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1019 * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1020 * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1021 * versions in which the older interface has not been deprecated, we use the old interface. */ 1022 if (aijmkl->no_SpMV2) { 1023 B->ops->mult = MatMult_SeqAIJMKL; 1024 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 1025 B->ops->multadd = MatMultAdd_SeqAIJMKL; 1026 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1027 } 1028 #endif 1029 1030 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 1031 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 1032 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 1033 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 1034 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqaijmkl_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 1035 #if defined(PETSC_HAVE_HYPRE) 1036 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_seqaijmkl_seqaijmkl_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 1037 #endif 1038 if(!aijmkl->no_SpMV2) { 1039 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1041 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1043 #endif 1044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1045 #endif 1046 } 1047 1048 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 1049 *newmat = B; 1050 PetscFunctionReturn(0); 1051 } 1052 1053 /*@C 1054 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 1055 This type inherits from AIJ and is largely identical, but uses sparse BLAS 1056 routines from Intel MKL whenever possible. 1057 If the installed version of MKL supports the "SpMV2" sparse 1058 inspector-executor routines, then those are used by default. 1059 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 1060 symmetric A) operations are currently supported. 1061 Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 1062 1063 Collective 1064 1065 Input Parameters: 1066 + comm - MPI communicator, set to PETSC_COMM_SELF 1067 . m - number of rows 1068 . n - number of columns 1069 . nz - number of nonzeros per row (same for all rows) 1070 - nnz - array containing the number of nonzeros in the various rows 1071 (possibly different for each row) or NULL 1072 1073 Output Parameter: 1074 . A - the matrix 1075 1076 Options Database Keys: 1077 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1078 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 1079 1080 Notes: 1081 If nnz is given then nz is ignored 1082 1083 Level: intermediate 1084 1085 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 1086 @*/ 1087 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1088 { 1089 PetscErrorCode ierr; 1090 1091 PetscFunctionBegin; 1092 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1093 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1094 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 1095 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1100 { 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1105 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 1106 PetscFunctionReturn(0); 1107 } 1108