1 2 /* 3 Defines basic operations for the MATSEQAIJMKL matrix class. 4 This class is derived from the MATSEQAIJ class and retains the 5 compressed row storage (aka Yale sparse matrix format) but uses 6 sparse BLAS operations from the Intel Math Kernel Library (MKL) 7 wherever possible. 8 */ 9 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12 #include <mkl_spblas.h> 13 14 typedef struct { 15 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 16 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 PetscObjectState state; 19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 28 { 29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 30 /* so we will ignore 'MatType type'. */ 31 PetscErrorCode ierr; 32 Mat B = *newmat; 33 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 34 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 35 #endif 36 37 PetscFunctionBegin; 38 if (reuse == MAT_INITIAL_MATRIX) { 39 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 45 B->ops->destroy = MatDestroy_SeqAIJ; 46 B->ops->mult = MatMult_SeqAIJ; 47 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 48 B->ops->multadd = MatMultAdd_SeqAIJ; 49 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50 B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ; 51 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 52 B->ops->ptap = MatPtAP_SeqAIJ_SeqAIJ; 53 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 54 B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ; 55 56 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 57 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 58 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 59 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 60 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqaijmkl_C",NULL);CHKERRQ(ierr); 61 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 62 if(!aijmkl->no_SpMV2) { 63 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 64 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 65 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 66 #endif 67 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 68 } 69 70 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 71 * simply involves destroying the MKL sparse matrix handle and then freeing 72 * the spptr pointer. */ 73 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 74 75 if (aijmkl->sparse_optimized) { 76 sparse_status_t stat; 77 stat = mkl_sparse_destroy(aijmkl->csrA); 78 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 79 } 80 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 81 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 82 83 /* Change the type of B to MATSEQAIJ. */ 84 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 85 86 *newmat = B; 87 PetscFunctionReturn(0); 88 } 89 90 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 91 { 92 PetscErrorCode ierr; 93 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 94 95 PetscFunctionBegin; 96 97 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 98 * spptr pointer. */ 99 if (aijmkl) { 100 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 101 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 102 if (aijmkl->sparse_optimized) { 103 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 104 stat = mkl_sparse_destroy(aijmkl->csrA); 105 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 106 } 107 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 108 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 109 } 110 111 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 112 * to destroy everything that remains. */ 113 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 114 /* Note that I don't call MatSetType(). I believe this is because that 115 * is only to be called when *building* a matrix. I could be wrong, but 116 * that is how things work for the SuperLU matrix class. */ 117 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 122 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 123 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 124 * handle, creates a new one, and then calls mkl_sparse_optimize(). 125 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 126 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 127 * an unoptimized matrix handle here. */ 128 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 129 { 130 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 131 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 132 * does nothing. We make it callable anyway in this case because it cuts 133 * down on littering the code with #ifdefs. */ 134 PetscFunctionBegin; 135 PetscFunctionReturn(0); 136 #else 137 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 138 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 139 PetscInt m,n; 140 MatScalar *aa; 141 PetscInt *aj,*ai; 142 sparse_status_t stat; 143 PetscErrorCode ierr; 144 145 PetscFunctionBegin; 146 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 147 /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 148 * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 149 * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 150 * mkl_sparse_optimize() later. */ 151 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 152 #endif 153 154 if (aijmkl->sparse_optimized) { 155 /* Matrix has been previously assembled and optimized. Must destroy old 156 * matrix handle before running the optimization step again. */ 157 stat = mkl_sparse_destroy(aijmkl->csrA); 158 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 159 } 160 aijmkl->sparse_optimized = PETSC_FALSE; 161 162 /* Now perform the SpMV2 setup and matrix optimization. */ 163 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 164 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 165 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 166 m = A->rmap->n; 167 n = A->cmap->n; 168 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 169 aa = a->a; /* Nonzero elements stored row-by-row. */ 170 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 171 if ((a->nz!=0) & !(A->structure_only)) { 172 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 173 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 174 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 175 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle"); 176 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 177 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 178 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 179 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 180 if (!aijmkl->no_SpMV2) { 181 stat = mkl_sparse_optimize(aijmkl->csrA); 182 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 183 } 184 aijmkl->sparse_optimized = PETSC_TRUE; 185 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 186 } 187 188 PetscFunctionReturn(0); 189 #endif 190 } 191 192 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 193 * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 194 * matrix handle. 195 * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as 196 * there is no good alternative. */ 197 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 198 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat) 199 { 200 PetscErrorCode ierr; 201 sparse_status_t stat; 202 sparse_index_base_t indexing; 203 PetscInt nrows, ncols; 204 PetscInt *aj,*ai,*dummy; 205 MatScalar *aa; 206 Mat A; 207 Mat_SeqAIJMKL *aijmkl; 208 209 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 210 stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 211 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 212 213 if (reuse == MAT_REUSE_MATRIX) { 214 ierr = MatDestroy(mat);CHKERRQ(ierr); 215 } 216 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 217 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 218 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 219 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 220 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 221 * they will be destroyed when the MKL handle is destroyed. 222 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 223 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr); 224 225 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 226 * Now turn it into a MATSEQAIJMKL. */ 227 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 228 229 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 230 aijmkl->csrA = csrA; 231 232 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 233 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 234 * and just need to be able to run the MKL optimization step. */ 235 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 236 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 237 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 238 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 239 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 240 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 241 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 242 if (!aijmkl->no_SpMV2) { 243 stat = mkl_sparse_optimize(aijmkl->csrA); 244 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 245 } 246 aijmkl->sparse_optimized = PETSC_TRUE; 247 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 248 249 *mat = A; 250 PetscFunctionReturn(0); 251 } 252 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 253 254 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 255 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 256 * MatMatMultNumeric(). */ 257 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 258 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 259 { 260 PetscInt i; 261 PetscInt nrows,ncols; 262 PetscInt nz; 263 PetscInt *ai,*aj,*dummy; 264 PetscScalar *aa; 265 PetscErrorCode ierr; 266 Mat_SeqAIJMKL *aijmkl; 267 sparse_status_t stat; 268 sparse_index_base_t indexing; 269 270 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 271 272 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 273 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 274 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 275 276 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 277 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 278 for (i=0; i<nrows; i++) { 279 nz = ai[i+1] - ai[i]; 280 ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr); 281 } 282 283 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 284 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 285 286 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 287 /* We mark our matrix as having a valid, optimized MKL handle. 288 * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */ 289 aijmkl->sparse_optimized = PETSC_TRUE; 290 291 PetscFunctionReturn(0); 292 } 293 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 294 295 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 296 { 297 PetscErrorCode ierr; 298 Mat_SeqAIJMKL *aijmkl; 299 Mat_SeqAIJMKL *aijmkl_dest; 300 301 PetscFunctionBegin; 302 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 303 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 304 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 305 ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr); 306 aijmkl_dest->sparse_optimized = PETSC_FALSE; 307 if (aijmkl->eager_inspection) { 308 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 309 } 310 PetscFunctionReturn(0); 311 } 312 313 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 314 { 315 PetscErrorCode ierr; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 317 Mat_SeqAIJMKL *aijmkl; 318 319 PetscFunctionBegin; 320 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 321 322 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 323 * extra information and some different methods, call the AssemblyEnd 324 * routine for a MATSEQAIJ. 325 * I'm not sure if this is the best way to do this, but it avoids 326 * a lot of code duplication. */ 327 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 328 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 329 330 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 331 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 332 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 333 if (aijmkl->eager_inspection) { 334 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 335 } 336 337 PetscFunctionReturn(0); 338 } 339 340 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 341 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 342 { 343 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 344 const PetscScalar *x; 345 PetscScalar *y; 346 const MatScalar *aa; 347 PetscErrorCode ierr; 348 PetscInt m=A->rmap->n; 349 PetscInt n=A->cmap->n; 350 PetscScalar alpha = 1.0; 351 PetscScalar beta = 0.0; 352 const PetscInt *aj,*ai; 353 char matdescra[6]; 354 355 356 /* Variables not in MatMult_SeqAIJ. */ 357 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 358 359 PetscFunctionBegin; 360 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 361 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 362 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 363 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 364 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 365 aa = a->a; /* Nonzero elements stored row-by-row. */ 366 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 367 368 /* Call MKL sparse BLAS routine to do the MatMult. */ 369 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 370 371 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 372 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 373 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 374 PetscFunctionReturn(0); 375 } 376 #endif 377 378 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 379 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 380 { 381 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 382 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 383 const PetscScalar *x; 384 PetscScalar *y; 385 PetscErrorCode ierr; 386 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 387 PetscObjectState state; 388 389 PetscFunctionBegin; 390 391 /* If there are no nonzero entries, zero yy and return immediately. */ 392 if(!a->nz) { 393 PetscInt i; 394 PetscInt m=A->rmap->n; 395 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 396 for (i=0; i<m; i++) { 397 y[i] = 0.0; 398 } 399 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 400 PetscFunctionReturn(0); 401 } 402 403 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 404 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 405 406 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 407 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 408 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 409 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 410 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 411 MatSeqAIJMKL_create_mkl_handle(A); 412 } 413 414 /* Call MKL SpMV2 executor routine to do the MatMult. */ 415 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 416 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 417 418 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 419 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 420 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 421 PetscFunctionReturn(0); 422 } 423 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 424 425 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 426 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 427 { 428 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 429 const PetscScalar *x; 430 PetscScalar *y; 431 const MatScalar *aa; 432 PetscErrorCode ierr; 433 PetscInt m=A->rmap->n; 434 PetscInt n=A->cmap->n; 435 PetscScalar alpha = 1.0; 436 PetscScalar beta = 0.0; 437 const PetscInt *aj,*ai; 438 char matdescra[6]; 439 440 /* Variables not in MatMultTranspose_SeqAIJ. */ 441 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 442 443 PetscFunctionBegin; 444 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 445 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 446 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 447 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 448 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 449 aa = a->a; /* Nonzero elements stored row-by-row. */ 450 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 451 452 /* Call MKL sparse BLAS routine to do the MatMult. */ 453 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 454 455 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 456 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 457 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 458 PetscFunctionReturn(0); 459 } 460 #endif 461 462 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 463 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 464 { 465 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 466 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 467 const PetscScalar *x; 468 PetscScalar *y; 469 PetscErrorCode ierr; 470 sparse_status_t stat; 471 PetscObjectState state; 472 473 PetscFunctionBegin; 474 475 /* If there are no nonzero entries, zero yy and return immediately. */ 476 if(!a->nz) { 477 PetscInt i; 478 PetscInt n=A->cmap->n; 479 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 480 for (i=0; i<n; i++) { 481 y[i] = 0.0; 482 } 483 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 484 PetscFunctionReturn(0); 485 } 486 487 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 488 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 489 490 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 491 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 492 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 493 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 494 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 495 MatSeqAIJMKL_create_mkl_handle(A); 496 } 497 498 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 499 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 500 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 501 502 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 503 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 504 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 505 PetscFunctionReturn(0); 506 } 507 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 508 509 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 510 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 511 { 512 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 513 const PetscScalar *x; 514 PetscScalar *y,*z; 515 const MatScalar *aa; 516 PetscErrorCode ierr; 517 PetscInt m=A->rmap->n; 518 PetscInt n=A->cmap->n; 519 const PetscInt *aj,*ai; 520 PetscInt i; 521 522 /* Variables not in MatMultAdd_SeqAIJ. */ 523 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 524 PetscScalar alpha = 1.0; 525 PetscScalar beta; 526 char matdescra[6]; 527 528 PetscFunctionBegin; 529 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 530 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 531 532 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 533 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 534 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 535 aa = a->a; /* Nonzero elements stored row-by-row. */ 536 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 537 538 /* Call MKL sparse BLAS routine to do the MatMult. */ 539 if (zz == yy) { 540 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 541 beta = 1.0; 542 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 543 } else { 544 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 545 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 546 beta = 0.0; 547 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 548 for (i=0; i<m; i++) { 549 z[i] += y[i]; 550 } 551 } 552 553 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 554 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 555 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 #endif 559 560 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 561 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 562 { 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 565 const PetscScalar *x; 566 PetscScalar *y,*z; 567 PetscErrorCode ierr; 568 PetscInt m=A->rmap->n; 569 PetscInt i; 570 571 /* Variables not in MatMultAdd_SeqAIJ. */ 572 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 573 PetscObjectState state; 574 575 PetscFunctionBegin; 576 577 /* If there are no nonzero entries, set zz = yy and return immediately. */ 578 if(!a->nz) { 579 PetscInt i; 580 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 581 for (i=0; i<m; i++) { 582 z[i] = y[i]; 583 } 584 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 585 PetscFunctionReturn(0); 586 } 587 588 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 589 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 590 591 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 592 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 593 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 594 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 595 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 596 MatSeqAIJMKL_create_mkl_handle(A); 597 } 598 599 /* Call MKL sparse BLAS routine to do the MatMult. */ 600 if (zz == yy) { 601 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 602 * with alpha and beta both set to 1.0. */ 603 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 604 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 605 } else { 606 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 607 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 608 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 609 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 610 for (i=0; i<m; i++) { 611 z[i] += y[i]; 612 } 613 } 614 615 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 616 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 617 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 618 PetscFunctionReturn(0); 619 } 620 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 621 622 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 623 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 624 { 625 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 626 const PetscScalar *x; 627 PetscScalar *y,*z; 628 const MatScalar *aa; 629 PetscErrorCode ierr; 630 PetscInt m=A->rmap->n; 631 PetscInt n=A->cmap->n; 632 const PetscInt *aj,*ai; 633 PetscInt i; 634 635 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 636 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 637 PetscScalar alpha = 1.0; 638 PetscScalar beta; 639 char matdescra[6]; 640 641 PetscFunctionBegin; 642 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 643 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 644 645 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 646 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 647 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 648 aa = a->a; /* Nonzero elements stored row-by-row. */ 649 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 650 651 /* Call MKL sparse BLAS routine to do the MatMult. */ 652 if (zz == yy) { 653 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 654 beta = 1.0; 655 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 656 } else { 657 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 658 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 659 beta = 0.0; 660 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 661 for (i=0; i<n; i++) { 662 z[i] += y[i]; 663 } 664 } 665 666 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 667 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 668 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 669 PetscFunctionReturn(0); 670 } 671 #endif 672 673 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 674 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 675 { 676 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 677 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 678 const PetscScalar *x; 679 PetscScalar *y,*z; 680 PetscErrorCode ierr; 681 PetscInt n=A->cmap->n; 682 PetscInt i; 683 PetscObjectState state; 684 685 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 686 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 687 688 PetscFunctionBegin; 689 690 /* If there are no nonzero entries, set zz = yy and return immediately. */ 691 if(!a->nz) { 692 PetscInt i; 693 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 694 for (i=0; i<n; i++) { 695 z[i] = y[i]; 696 } 697 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 698 PetscFunctionReturn(0); 699 } 700 701 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 702 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 703 704 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 705 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 706 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 707 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 708 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 709 MatSeqAIJMKL_create_mkl_handle(A); 710 } 711 712 /* Call MKL sparse BLAS routine to do the MatMult. */ 713 if (zz == yy) { 714 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 715 * with alpha and beta both set to 1.0. */ 716 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 717 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 718 } else { 719 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 720 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 721 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 722 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 723 for (i=0; i<n; i++) { 724 z[i] += y[i]; 725 } 726 } 727 728 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 729 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 730 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 731 PetscFunctionReturn(0); 732 } 733 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 734 735 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 736 /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because 737 * the MatMatMult() interface code calls MatMatMultNumeric() in this case. 738 * For releases of MKL prior to version 18, update 2: 739 * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 740 * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 741 * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 742 * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */ 743 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 744 { 745 Mat_SeqAIJMKL *a, *b; 746 sparse_matrix_t csrA, csrB, csrC; 747 PetscErrorCode ierr; 748 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 749 PetscObjectState state; 750 751 PetscFunctionBegin; 752 a = (Mat_SeqAIJMKL*)A->spptr; 753 b = (Mat_SeqAIJMKL*)B->spptr; 754 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 755 if (!a->sparse_optimized || a->state != state) { 756 MatSeqAIJMKL_create_mkl_handle(A); 757 } 758 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 759 if (!b->sparse_optimized || b->state != state) { 760 MatSeqAIJMKL_create_mkl_handle(B); 761 } 762 csrA = a->csrA; 763 csrB = b->csrA; 764 765 stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC); 766 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 767 768 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 769 770 PetscFunctionReturn(0); 771 } 772 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 773 774 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 775 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C) 776 { 777 Mat_SeqAIJMKL *a, *b, *c; 778 sparse_matrix_t csrA, csrB, csrC; 779 PetscErrorCode ierr; 780 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 781 struct matrix_descr descr_type_gen; 782 PetscObjectState state; 783 784 PetscFunctionBegin; 785 a = (Mat_SeqAIJMKL*)A->spptr; 786 b = (Mat_SeqAIJMKL*)B->spptr; 787 c = (Mat_SeqAIJMKL*)C->spptr; 788 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 789 if (!a->sparse_optimized || a->state != state) { 790 MatSeqAIJMKL_create_mkl_handle(A); 791 } 792 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 793 if (!b->sparse_optimized || b->state != state) { 794 MatSeqAIJMKL_create_mkl_handle(B); 795 } 796 csrA = a->csrA; 797 csrB = b->csrA; 798 csrC = c->csrA; 799 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 800 801 stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA, 802 SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB, 803 SPARSE_STAGE_FINALIZE_MULT,&csrC); 804 805 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply"); 806 807 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 808 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 809 810 PetscFunctionReturn(0); 811 } 812 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 813 814 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 815 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C) 816 { 817 Mat_SeqAIJMKL *a, *b; 818 sparse_matrix_t csrA, csrB, csrC; 819 PetscErrorCode ierr; 820 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 821 PetscObjectState state; 822 823 PetscFunctionBegin; 824 a = (Mat_SeqAIJMKL*)A->spptr; 825 b = (Mat_SeqAIJMKL*)B->spptr; 826 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 827 if (!a->sparse_optimized || a->state != state) { 828 MatSeqAIJMKL_create_mkl_handle(A); 829 } 830 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 831 if (!b->sparse_optimized || b->state != state) { 832 MatSeqAIJMKL_create_mkl_handle(B); 833 } 834 csrA = a->csrA; 835 csrB = b->csrA; 836 837 stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC); 838 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply"); 839 840 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 841 842 PetscFunctionReturn(0); 843 } 844 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 845 846 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 847 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C) 848 { 849 Mat_SeqAIJMKL *a, *p, *c; 850 sparse_matrix_t csrA, csrP, csrC; 851 PetscBool set, flag; 852 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 853 struct matrix_descr descr_type_sym; 854 PetscObjectState state; 855 PetscErrorCode ierr; 856 857 PetscFunctionBegin; 858 ierr = MatIsSymmetricKnown(A,&set,&flag); 859 if (!set || (set && !flag)) { 860 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr); 861 PetscFunctionReturn(0); 862 } 863 864 a = (Mat_SeqAIJMKL*)A->spptr; 865 p = (Mat_SeqAIJMKL*)P->spptr; 866 c = (Mat_SeqAIJMKL*)C->spptr; 867 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 868 if (!a->sparse_optimized || a->state != state) { 869 MatSeqAIJMKL_create_mkl_handle(A); 870 } 871 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 872 if (!p->sparse_optimized || p->state != state) { 873 MatSeqAIJMKL_create_mkl_handle(P); 874 } 875 csrA = a->csrA; 876 csrP = p->csrA; 877 csrC = c->csrA; 878 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 879 descr_type_sym.mode = SPARSE_FILL_MODE_LOWER; 880 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 881 882 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 883 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT); 884 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr"); 885 886 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 887 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 888 889 PetscFunctionReturn(0); 890 } 891 #endif 892 893 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 894 PetscErrorCode MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 895 { 896 Mat_SeqAIJMKL *a, *p; 897 sparse_matrix_t csrA, csrP, csrC; 898 PetscBool set, flag; 899 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 900 struct matrix_descr descr_type_sym; 901 PetscObjectState state; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 ierr = MatIsSymmetricKnown(A,&set,&flag); 906 if (!set || (set && !flag)) { 907 ierr = MatPtAP_SeqAIJ_SeqAIJ(A,P,scall,fill,C);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 if (scall == MAT_REUSE_MATRIX) { 912 ierr = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(A,P,*C);CHKERRQ(ierr); 913 PetscFunctionReturn(0); 914 } 915 916 a = (Mat_SeqAIJMKL*)A->spptr; 917 p = (Mat_SeqAIJMKL*)P->spptr; 918 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 919 if (!a->sparse_optimized || a->state != state) { 920 MatSeqAIJMKL_create_mkl_handle(A); 921 } 922 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 923 if (!p->sparse_optimized || p->state != state) { 924 MatSeqAIJMKL_create_mkl_handle(P); 925 } 926 csrA = a->csrA; 927 csrP = p->csrA; 928 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 929 descr_type_sym.mode = SPARSE_FILL_MODE_LOWER; 930 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 931 932 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 933 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT); 934 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete full mkl_sparse_sypr"); 935 936 ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr); 937 ierr = MatSetOption(*C,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 938 939 PetscFunctionReturn(0); 940 } 941 #endif 942 943 /* This function prototype is needed in MatConvert_SeqAIJ_SeqAIJMKL(), below. */ 944 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 945 946 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 947 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 948 * routine, but can also be used to convert an assembled SeqAIJ matrix 949 * into a SeqAIJMKL one. */ 950 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 951 { 952 PetscErrorCode ierr; 953 Mat B = *newmat; 954 Mat_SeqAIJMKL *aijmkl; 955 PetscBool set; 956 PetscBool sametype; 957 958 PetscFunctionBegin; 959 if (reuse == MAT_INITIAL_MATRIX) { 960 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 961 } 962 963 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 964 if (sametype) PetscFunctionReturn(0); 965 966 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 967 B->spptr = (void*) aijmkl; 968 969 /* Set function pointers for methods that we inherit from AIJ but override. 970 * We also parse some command line options below, since those determine some of the methods we point to. */ 971 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 972 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 973 B->ops->destroy = MatDestroy_SeqAIJMKL; 974 975 aijmkl->sparse_optimized = PETSC_FALSE; 976 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 977 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 978 #else 979 aijmkl->no_SpMV2 = PETSC_TRUE; 980 #endif 981 aijmkl->eager_inspection = PETSC_FALSE; 982 983 /* Parse command line options. */ 984 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 985 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 986 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 987 ierr = PetscOptionsEnd();CHKERRQ(ierr); 988 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 989 if(!aijmkl->no_SpMV2) { 990 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 991 aijmkl->no_SpMV2 = PETSC_TRUE; 992 } 993 #endif 994 995 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 996 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 997 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 998 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 999 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 1000 B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 1001 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1002 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 1003 # if !defined(PETSC_USE_COMPLEX) 1004 B->ops->ptap = MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2; 1005 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2; 1006 # endif 1007 # endif 1008 B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2; 1009 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1010 1011 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1012 /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1013 * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1014 * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1015 * versions in which the older interface has not been deprecated, we use the old interface. */ 1016 if (aijmkl->no_SpMV2) { 1017 B->ops->mult = MatMult_SeqAIJMKL; 1018 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 1019 B->ops->multadd = MatMultAdd_SeqAIJMKL; 1020 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1021 } 1022 #endif 1023 1024 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 1025 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 1026 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 1027 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 1028 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqaijmkl_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 1029 if(!aijmkl->no_SpMV2) { 1030 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1031 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1032 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1033 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1034 #endif 1035 ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr); 1036 #endif 1037 } 1038 1039 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 1040 *newmat = B; 1041 PetscFunctionReturn(0); 1042 } 1043 1044 /*@C 1045 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 1046 This type inherits from AIJ and is largely identical, but uses sparse BLAS 1047 routines from Intel MKL whenever possible. 1048 If the installed version of MKL supports the "SpMV2" sparse 1049 inspector-executor routines, then those are used by default. 1050 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 1051 symmetric A) operations are currently supported. 1052 Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 1053 1054 Collective 1055 1056 Input Parameters: 1057 + comm - MPI communicator, set to PETSC_COMM_SELF 1058 . m - number of rows 1059 . n - number of columns 1060 . nz - number of nonzeros per row (same for all rows) 1061 - nnz - array containing the number of nonzeros in the various rows 1062 (possibly different for each row) or NULL 1063 1064 Output Parameter: 1065 . A - the matrix 1066 1067 Options Database Keys: 1068 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1069 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 1070 1071 Notes: 1072 If nnz is given then nz is ignored 1073 1074 Level: intermediate 1075 1076 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 1077 @*/ 1078 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1079 { 1080 PetscErrorCode ierr; 1081 1082 PetscFunctionBegin; 1083 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1084 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1085 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 1086 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 1087 PetscFunctionReturn(0); 1088 } 1089 1090 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1091 { 1092 PetscErrorCode ierr; 1093 1094 PetscFunctionBegin; 1095 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1096 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099