1 2 /* 3 Defines basic operations for the MATSEQAIJMKL matrix class. 4 This class is derived from the MATSEQAIJ class and retains the 5 compressed row storage (aka Yale sparse matrix format) but uses 6 sparse BLAS operations from the Intel Math Kernel Library (MKL) 7 wherever possible. 8 */ 9 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12 #include <mkl_spblas.h> 13 14 typedef struct { 15 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 16 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 PetscObjectState state; 19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 28 { 29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 30 /* so we will ignore 'MatType type'. */ 31 Mat B = *newmat; 32 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 33 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 34 #endif 35 36 PetscFunctionBegin; 37 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B)); 38 39 /* Reset the original function pointers. */ 40 B->ops->duplicate = MatDuplicate_SeqAIJ; 41 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 42 B->ops->destroy = MatDestroy_SeqAIJ; 43 B->ops->mult = MatMult_SeqAIJ; 44 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 45 B->ops->multadd = MatMultAdd_SeqAIJ; 46 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 47 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ; 48 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 49 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 50 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 51 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; 52 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 53 54 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL)); 55 56 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 57 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 58 * simply involves destroying the MKL sparse matrix handle and then freeing 59 * the spptr pointer. */ 60 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 61 62 if (aijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA); 63 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 64 PetscCall(PetscFree(B->spptr)); 65 66 /* Change the type of B to MATSEQAIJ. */ 67 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ)); 68 69 *newmat = B; 70 PetscFunctionReturn(0); 71 } 72 73 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 74 { 75 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 76 77 PetscFunctionBegin; 78 79 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */ 80 if (aijmkl) { 81 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 82 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 83 if (aijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA); 84 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 85 PetscCall(PetscFree(A->spptr)); 86 } 87 88 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 89 * to destroy everything that remains. */ 90 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ)); 91 /* Note that I don't call MatSetType(). I believe this is because that 92 * is only to be called when *building* a matrix. I could be wrong, but 93 * that is how things work for the SuperLU matrix class. */ 94 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaijmkl_seqaij_C",NULL)); 95 PetscCall(MatDestroy_SeqAIJ(A)); 96 PetscFunctionReturn(0); 97 } 98 99 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 100 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 101 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 102 * handle, creates a new one, and then calls mkl_sparse_optimize(). 103 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 104 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 105 * an unoptimized matrix handle here. */ 106 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 107 { 108 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 109 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 110 * does nothing. We make it callable anyway in this case because it cuts 111 * down on littering the code with #ifdefs. */ 112 PetscFunctionBegin; 113 PetscFunctionReturn(0); 114 #else 115 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 116 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 117 PetscInt m,n; 118 MatScalar *aa; 119 PetscInt *aj,*ai; 120 121 PetscFunctionBegin; 122 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 123 /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 124 * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 125 * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 126 * mkl_sparse_optimize() later. */ 127 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 128 #endif 129 130 if (aijmkl->sparse_optimized) { 131 /* Matrix has been previously assembled and optimized. Must destroy old 132 * matrix handle before running the optimization step again. */ 133 PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA); 134 } 135 aijmkl->sparse_optimized = PETSC_FALSE; 136 137 /* Now perform the SpMV2 setup and matrix optimization. */ 138 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 139 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 140 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 141 m = A->rmap->n; 142 n = A->cmap->n; 143 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 144 aa = a->a; /* Nonzero elements stored row-by-row. */ 145 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 146 if (a->nz && aa && !A->structure_only) { 147 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 148 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 149 PetscStackCallStandard(mkl_sparse_x_create_csr,&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 150 PetscStackCallStandard(mkl_sparse_set_mv_hint,aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 151 PetscStackCallStandard(mkl_sparse_set_memory_hint,aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 152 if (!aijmkl->no_SpMV2) { 153 PetscStackCallStandard(mkl_sparse_optimize,aijmkl->csrA); 154 } 155 aijmkl->sparse_optimized = PETSC_TRUE; 156 PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state))); 157 } else { 158 aijmkl->csrA = PETSC_NULL; 159 } 160 161 PetscFunctionReturn(0); 162 #endif 163 } 164 165 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 166 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */ 167 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A) 168 { 169 sparse_index_base_t indexing; 170 PetscInt m,n; 171 PetscInt *aj,*ai,*dummy; 172 MatScalar *aa; 173 Mat_SeqAIJMKL *aijmkl; 174 175 PetscFunctionBegin; 176 if (csrA) { 177 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 178 PetscStackCallStandard(mkl_sparse_x_export_csr,csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa); 179 PetscCheck((m == nrows) && (n == ncols),PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()"); 180 } else { 181 aj = ai = PETSC_NULL; 182 aa = PETSC_NULL; 183 } 184 185 PetscCall(MatSetType(A,MATSEQAIJ)); 186 PetscCall(MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols)); 187 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 188 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 189 * they will be destroyed when the MKL handle is destroyed. 190 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 191 if (csrA) { 192 PetscCall(MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL)); 193 } else { 194 /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */ 195 PetscCall(MatSetUp(A)); 196 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 197 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 198 } 199 200 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 201 * Now turn it into a MATSEQAIJMKL. */ 202 PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A)); 203 204 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 205 aijmkl->csrA = csrA; 206 207 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 208 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 209 * and just need to be able to run the MKL optimization step. */ 210 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 211 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 212 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 213 if (csrA) { 214 PetscStackCallStandard(mkl_sparse_set_mv_hint,aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 215 PetscStackCallStandard(mkl_sparse_set_memory_hint,aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 216 } 217 PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state))); 218 PetscFunctionReturn(0); 219 } 220 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 221 222 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 223 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 224 * MatMatMultNumeric(). */ 225 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 226 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 227 { 228 PetscInt i; 229 PetscInt nrows,ncols; 230 PetscInt nz; 231 PetscInt *ai,*aj,*dummy; 232 PetscScalar *aa; 233 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 234 sparse_index_base_t indexing; 235 236 PetscFunctionBegin; 237 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 238 if (!aijmkl->csrA) PetscFunctionReturn(0); 239 240 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 241 PetscStackCallStandard(mkl_sparse_x_export_csr,aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 242 243 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 244 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 245 for (i=0; i<nrows; i++) { 246 nz = ai[i+1] - ai[i]; 247 PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES)); 248 } 249 250 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 251 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 252 253 PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state))); 254 /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation. 255 * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed 256 * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */ 257 aijmkl->sparse_optimized = PETSC_FALSE; 258 PetscFunctionReturn(0); 259 } 260 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 261 262 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 263 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer) 264 { 265 PetscInt i,j,k; 266 PetscInt nrows,ncols; 267 PetscInt nz; 268 PetscInt *ai,*aj,*dummy; 269 PetscScalar *aa; 270 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 271 sparse_index_base_t indexing; 272 273 PetscFunctionBegin; 274 PetscCall(PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n")); 275 276 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 277 if (!aijmkl->csrA) { 278 PetscCall(PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n")); 279 PetscFunctionReturn(0); 280 } 281 282 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 283 PetscStackCallStandard(mkl_sparse_x_export_csr,aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 284 285 k = 0; 286 for (i=0; i<nrows; i++) { 287 PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ": ",i)); 288 nz = ai[i+1] - ai[i]; 289 for (j=0; j<nz; j++) { 290 if (aa) { 291 PetscCall(PetscViewerASCIIPrintf(viewer,"(%" PetscInt_FMT ", %g) ",aj[k],PetscRealPart(aa[k]))); 292 } else { 293 PetscCall(PetscViewerASCIIPrintf(viewer,"(%" PetscInt_FMT ", NULL)",aj[k])); 294 } 295 k++; 296 } 297 PetscCall(PetscViewerASCIIPrintf(viewer,"\n")); 298 } 299 PetscFunctionReturn(0); 300 } 301 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 302 303 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 304 { 305 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 306 Mat_SeqAIJMKL *aijmkl_dest; 307 308 PetscFunctionBegin; 309 PetscCall(MatDuplicate_SeqAIJ(A,op,M)); 310 aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr; 311 PetscCall(PetscArraycpy(aijmkl_dest,aijmkl,1)); 312 aijmkl_dest->sparse_optimized = PETSC_FALSE; 313 if (aijmkl->eager_inspection) { 314 PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 315 } 316 PetscFunctionReturn(0); 317 } 318 319 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 320 { 321 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 322 Mat_SeqAIJMKL *aijmkl; 323 324 PetscFunctionBegin; 325 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 326 327 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 328 * extra information and some different methods, call the AssemblyEnd 329 * routine for a MATSEQAIJ. 330 * I'm not sure if this is the best way to do this, but it avoids 331 * a lot of code duplication. */ 332 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 333 PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 334 335 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 336 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 337 aijmkl = (Mat_SeqAIJMKL*)A->spptr; 338 if (aijmkl->eager_inspection) { 339 PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 340 } 341 342 PetscFunctionReturn(0); 343 } 344 345 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 346 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 347 { 348 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 349 const PetscScalar *x; 350 PetscScalar *y; 351 const MatScalar *aa; 352 PetscInt m = A->rmap->n; 353 PetscInt n = A->cmap->n; 354 PetscScalar alpha = 1.0; 355 PetscScalar beta = 0.0; 356 const PetscInt *aj,*ai; 357 char matdescra[6]; 358 359 /* Variables not in MatMult_SeqAIJ. */ 360 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 361 362 PetscFunctionBegin; 363 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 364 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 365 PetscCall(VecGetArrayRead(xx,&x)); 366 PetscCall(VecGetArray(yy,&y)); 367 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 368 aa = a->a; /* Nonzero elements stored row-by-row. */ 369 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 370 371 /* Call MKL sparse BLAS routine to do the MatMult. */ 372 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 373 374 PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt)); 375 PetscCall(VecRestoreArrayRead(xx,&x)); 376 PetscCall(VecRestoreArray(yy,&y)); 377 PetscFunctionReturn(0); 378 } 379 #endif 380 381 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 382 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 383 { 384 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 385 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 386 const PetscScalar *x; 387 PetscScalar *y; 388 PetscObjectState state; 389 390 PetscFunctionBegin; 391 392 /* If there are no nonzero entries, zero yy and return immediately. */ 393 if (!a->nz) { 394 PetscCall(VecGetArray(yy,&y)); 395 PetscCall(PetscArrayzero(y,A->rmap->n)); 396 PetscCall(VecRestoreArray(yy,&y)); 397 PetscFunctionReturn(0); 398 } 399 400 PetscCall(VecGetArrayRead(xx,&x)); 401 PetscCall(VecGetArray(yy,&y)); 402 403 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 404 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 405 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 406 PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 407 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 408 409 /* Call MKL SpMV2 executor routine to do the MatMult. */ 410 PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 411 412 PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt)); 413 PetscCall(VecRestoreArrayRead(xx,&x)); 414 PetscCall(VecRestoreArray(yy,&y)); 415 PetscFunctionReturn(0); 416 } 417 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 418 419 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 420 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 421 { 422 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 423 const PetscScalar *x; 424 PetscScalar *y; 425 const MatScalar *aa; 426 PetscInt m = A->rmap->n; 427 PetscInt n = A->cmap->n; 428 PetscScalar alpha = 1.0; 429 PetscScalar beta = 0.0; 430 const PetscInt *aj,*ai; 431 char matdescra[6]; 432 433 /* Variables not in MatMultTranspose_SeqAIJ. */ 434 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 435 436 PetscFunctionBegin; 437 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 438 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 439 PetscCall(VecGetArrayRead(xx,&x)); 440 PetscCall(VecGetArray(yy,&y)); 441 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 442 aa = a->a; /* Nonzero elements stored row-by-row. */ 443 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 444 445 /* Call MKL sparse BLAS routine to do the MatMult. */ 446 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 447 448 PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt)); 449 PetscCall(VecRestoreArrayRead(xx,&x)); 450 PetscCall(VecRestoreArray(yy,&y)); 451 PetscFunctionReturn(0); 452 } 453 #endif 454 455 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 456 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 457 { 458 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 459 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 460 const PetscScalar *x; 461 PetscScalar *y; 462 PetscObjectState state; 463 464 PetscFunctionBegin; 465 466 /* If there are no nonzero entries, zero yy and return immediately. */ 467 if (!a->nz) { 468 PetscCall(VecGetArray(yy,&y)); 469 PetscCall(PetscArrayzero(y,A->cmap->n)); 470 PetscCall(VecRestoreArray(yy,&y)); 471 PetscFunctionReturn(0); 472 } 473 474 PetscCall(VecGetArrayRead(xx,&x)); 475 PetscCall(VecGetArray(yy,&y)); 476 477 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 478 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 479 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 480 PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 481 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 482 483 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 484 PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 485 486 PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt)); 487 PetscCall(VecRestoreArrayRead(xx,&x)); 488 PetscCall(VecRestoreArray(yy,&y)); 489 PetscFunctionReturn(0); 490 } 491 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 492 493 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 494 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 495 { 496 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 497 const PetscScalar *x; 498 PetscScalar *y,*z; 499 const MatScalar *aa; 500 PetscInt m = A->rmap->n; 501 PetscInt n = A->cmap->n; 502 const PetscInt *aj,*ai; 503 PetscInt i; 504 505 /* Variables not in MatMultAdd_SeqAIJ. */ 506 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 507 PetscScalar alpha = 1.0; 508 PetscScalar beta; 509 char matdescra[6]; 510 511 PetscFunctionBegin; 512 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 513 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 514 515 PetscCall(VecGetArrayRead(xx,&x)); 516 PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 517 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 518 aa = a->a; /* Nonzero elements stored row-by-row. */ 519 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 520 521 /* Call MKL sparse BLAS routine to do the MatMult. */ 522 if (zz == yy) { 523 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 524 beta = 1.0; 525 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 526 } else { 527 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 528 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 529 beta = 0.0; 530 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 531 for (i=0; i<m; i++) { 532 z[i] += y[i]; 533 } 534 } 535 536 PetscCall(PetscLogFlops(2.0*a->nz)); 537 PetscCall(VecRestoreArrayRead(xx,&x)); 538 PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 539 PetscFunctionReturn(0); 540 } 541 #endif 542 543 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 544 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 545 { 546 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 547 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 548 const PetscScalar *x; 549 PetscScalar *y,*z; 550 PetscInt m = A->rmap->n; 551 PetscInt i; 552 553 /* Variables not in MatMultAdd_SeqAIJ. */ 554 PetscObjectState state; 555 556 PetscFunctionBegin; 557 558 /* If there are no nonzero entries, set zz = yy and return immediately. */ 559 if (!a->nz) { 560 PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 561 PetscCall(PetscArraycpy(z,y,m)); 562 PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 563 PetscFunctionReturn(0); 564 } 565 566 PetscCall(VecGetArrayRead(xx,&x)); 567 PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 568 569 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 570 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 571 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 572 PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 573 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 574 575 /* Call MKL sparse BLAS routine to do the MatMult. */ 576 if (zz == yy) { 577 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 578 * with alpha and beta both set to 1.0. */ 579 PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 580 } else { 581 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 582 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 583 PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 584 for (i=0; i<m; i++) z[i] += y[i]; 585 } 586 587 PetscCall(PetscLogFlops(2.0*a->nz)); 588 PetscCall(VecRestoreArrayRead(xx,&x)); 589 PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 590 PetscFunctionReturn(0); 591 } 592 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 593 594 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 595 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 596 { 597 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 598 const PetscScalar *x; 599 PetscScalar *y,*z; 600 const MatScalar *aa; 601 PetscInt m = A->rmap->n; 602 PetscInt n = A->cmap->n; 603 const PetscInt *aj,*ai; 604 PetscInt i; 605 606 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 607 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 608 PetscScalar alpha = 1.0; 609 PetscScalar beta; 610 char matdescra[6]; 611 612 PetscFunctionBegin; 613 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 614 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 615 616 PetscCall(VecGetArrayRead(xx,&x)); 617 PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 618 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 619 aa = a->a; /* Nonzero elements stored row-by-row. */ 620 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 621 622 /* Call MKL sparse BLAS routine to do the MatMult. */ 623 if (zz == yy) { 624 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 625 beta = 1.0; 626 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 627 } else { 628 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 629 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 630 beta = 0.0; 631 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 632 for (i=0; i<n; i++) { 633 z[i] += y[i]; 634 } 635 } 636 637 PetscCall(PetscLogFlops(2.0*a->nz)); 638 PetscCall(VecRestoreArrayRead(xx,&x)); 639 PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 640 PetscFunctionReturn(0); 641 } 642 #endif 643 644 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 645 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 646 { 647 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 648 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 649 const PetscScalar *x; 650 PetscScalar *y,*z; 651 PetscInt n = A->cmap->n; 652 PetscInt i; 653 PetscObjectState state; 654 655 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 656 657 PetscFunctionBegin; 658 659 /* If there are no nonzero entries, set zz = yy and return immediately. */ 660 if (!a->nz) { 661 PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 662 PetscCall(PetscArraycpy(z,y,n)); 663 PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 664 PetscFunctionReturn(0); 665 } 666 667 PetscCall(VecGetArrayRead(xx,&x)); 668 PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 669 670 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 671 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 672 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 673 PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 674 if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A); 675 676 /* Call MKL sparse BLAS routine to do the MatMult. */ 677 if (zz == yy) { 678 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 679 * with alpha and beta both set to 1.0. */ 680 PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 681 } else { 682 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 683 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 684 PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 685 for (i=0; i<n; i++) z[i] += y[i]; 686 } 687 688 PetscCall(PetscLogFlops(2.0*a->nz)); 689 PetscCall(VecRestoreArrayRead(xx,&x)); 690 PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 691 PetscFunctionReturn(0); 692 } 693 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 694 695 /* -------------------------- MatProduct code -------------------------- */ 696 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 697 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 698 { 699 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr; 700 sparse_matrix_t csrA,csrB,csrC; 701 PetscInt nrows,ncols; 702 struct matrix_descr descr_type_gen; 703 PetscObjectState state; 704 705 PetscFunctionBegin; 706 /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does 707 * not handle sparse matrices with zero rows or columns. */ 708 if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N; 709 else nrows = A->cmap->N; 710 if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N; 711 else ncols = B->rmap->N; 712 713 PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 714 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 715 PetscCall(PetscObjectStateGet((PetscObject)B,&state)); 716 if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 717 csrA = a->csrA; 718 csrB = b->csrA; 719 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 720 721 if (csrA && csrB) { 722 PetscStackCallStandard(mkl_sparse_sp2m,transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC); 723 } else { 724 csrC = PETSC_NULL; 725 } 726 727 PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C)); 728 729 PetscFunctionReturn(0); 730 } 731 732 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 733 { 734 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 735 sparse_matrix_t csrA, csrB, csrC; 736 struct matrix_descr descr_type_gen; 737 PetscObjectState state; 738 739 PetscFunctionBegin; 740 PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 741 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 742 PetscCall(PetscObjectStateGet((PetscObject)B,&state)); 743 if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 744 csrA = a->csrA; 745 csrB = b->csrA; 746 csrC = c->csrA; 747 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 748 749 if (csrA && csrB) { 750 PetscStackCallStandard(mkl_sparse_sp2m,transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC); 751 } else { 752 csrC = PETSC_NULL; 753 } 754 755 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 756 PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 757 758 PetscFunctionReturn(0); 759 } 760 761 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 762 { 763 PetscFunctionBegin; 764 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C)); 765 PetscFunctionReturn(0); 766 } 767 768 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 769 { 770 PetscFunctionBegin; 771 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C)); 772 PetscFunctionReturn(0); 773 } 774 775 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 776 { 777 PetscFunctionBegin; 778 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C)); 779 PetscFunctionReturn(0); 780 } 781 782 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 783 { 784 PetscFunctionBegin; 785 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C)); 786 PetscFunctionReturn(0); 787 } 788 789 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 790 { 791 PetscFunctionBegin; 792 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C)); 793 PetscFunctionReturn(0); 794 } 795 796 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 797 { 798 PetscFunctionBegin; 799 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C)); 800 PetscFunctionReturn(0); 801 } 802 803 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 804 { 805 Mat_Product *product = C->product; 806 Mat A = product->A,B = product->B; 807 808 PetscFunctionBegin; 809 PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C)); 810 PetscFunctionReturn(0); 811 } 812 813 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 814 { 815 Mat_Product *product = C->product; 816 Mat A = product->A,B = product->B; 817 PetscReal fill = product->fill; 818 819 PetscFunctionBegin; 820 PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C)); 821 C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL; 822 PetscFunctionReturn(0); 823 } 824 825 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C) 826 { 827 Mat Ct; 828 Vec zeros; 829 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 830 sparse_matrix_t csrA, csrP, csrC; 831 PetscBool set, flag; 832 struct matrix_descr descr_type_sym; 833 PetscObjectState state; 834 835 PetscFunctionBegin; 836 PetscCall(MatIsSymmetricKnown(A,&set,&flag)); 837 PetscCheck(set && !(set && !flag),PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric"); 838 839 PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 840 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 841 PetscCall(PetscObjectStateGet((PetscObject)P,&state)); 842 if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 843 csrA = a->csrA; 844 csrP = p->csrA; 845 csrC = c->csrA; 846 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 847 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 848 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 849 850 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 851 PetscStackCallStandard(mkl_sparse_sypr,SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT); 852 853 /* Update the PETSc AIJ representation for matrix C from contents of MKL handle. 854 * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix, 855 * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix. 856 * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY 857 * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might 858 * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output 859 * the full matrix. */ 860 PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 861 PetscCall(MatTranspose(C,MAT_INITIAL_MATRIX,&Ct)); 862 PetscCall(MatCreateVecs(C,&zeros,NULL)); 863 PetscCall(VecSetFromOptions(zeros)); 864 PetscCall(VecZeroEntries(zeros)); 865 PetscCall(MatDiagonalSet(Ct,zeros,INSERT_VALUES)); 866 PetscCall(MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN)); 867 /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */ 868 PetscCall(MatProductCreateWithMat(A,P,PETSC_NULL,C)); 869 PetscCall(MatProductSetType(C,MATPRODUCT_PtAP)); 870 PetscCall(MatSeqAIJMKL_create_mkl_handle(C)); 871 PetscCall(VecDestroy(&zeros)); 872 PetscCall(MatDestroy(&Ct)); 873 PetscFunctionReturn(0); 874 } 875 876 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) 877 { 878 Mat_Product *product = C->product; 879 Mat A = product->A,P = product->B; 880 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr; 881 sparse_matrix_t csrA,csrP,csrC; 882 struct matrix_descr descr_type_sym; 883 PetscObjectState state; 884 885 PetscFunctionBegin; 886 PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 887 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 888 PetscCall(PetscObjectStateGet((PetscObject)P,&state)); 889 if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 890 csrA = a->csrA; 891 csrP = p->csrA; 892 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 893 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 894 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 895 896 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 897 if (csrP && csrA) { 898 PetscStackCallStandard(mkl_sparse_sypr,SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL); 899 } else { 900 csrC = PETSC_NULL; 901 } 902 903 /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle. 904 * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain 905 * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that 906 * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this 907 * is guaranteed. */ 908 PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C)); 909 910 C->ops->productnumeric = MatProductNumeric_PtAP; 911 PetscFunctionReturn(0); 912 } 913 914 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) 915 { 916 PetscFunctionBegin; 917 C->ops->productsymbolic = MatProductSymbolic_AB; 918 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 919 PetscFunctionReturn(0); 920 } 921 922 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) 923 { 924 PetscFunctionBegin; 925 C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL; 926 PetscFunctionReturn(0); 927 } 928 929 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) 930 { 931 PetscFunctionBegin; 932 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 933 C->ops->productsymbolic = MatProductSymbolic_ABt; 934 PetscFunctionReturn(0); 935 } 936 937 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) 938 { 939 Mat_Product *product = C->product; 940 Mat A = product->A; 941 PetscBool set, flag; 942 943 PetscFunctionBegin; 944 if (PetscDefined(USE_COMPLEX)) { 945 /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used. 946 * We do this in several other locations in this file. This works for the time being, but these 947 * routines are considered unsafe and may be removed from the MatProduct code in the future. 948 * TODO: Add proper MATSEQAIJMKL implementations */ 949 C->ops->productsymbolic = NULL; 950 } else { 951 /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */ 952 PetscCall(MatIsSymmetricKnown(A,&set,&flag)); 953 if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 954 else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 955 /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(), 956 * depending on whether the algorithm for the general case vs. the real symmetric one is used. */ 957 } 958 PetscFunctionReturn(0); 959 } 960 961 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) 962 { 963 PetscFunctionBegin; 964 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 965 PetscFunctionReturn(0); 966 } 967 968 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) 969 { 970 PetscFunctionBegin; 971 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 972 PetscFunctionReturn(0); 973 } 974 975 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) 976 { 977 Mat_Product *product = C->product; 978 979 PetscFunctionBegin; 980 switch (product->type) { 981 case MATPRODUCT_AB: 982 PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C)); 983 break; 984 case MATPRODUCT_AtB: 985 PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C)); 986 break; 987 case MATPRODUCT_ABt: 988 PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C)); 989 break; 990 case MATPRODUCT_PtAP: 991 PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C)); 992 break; 993 case MATPRODUCT_RARt: 994 PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C)); 995 break; 996 case MATPRODUCT_ABC: 997 PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C)); 998 break; 999 default: 1000 break; 1001 } 1002 PetscFunctionReturn(0); 1003 } 1004 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 1005 /* ------------------------ End MatProduct code ------------------------ */ 1006 1007 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 1008 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 1009 * routine, but can also be used to convert an assembled SeqAIJ matrix 1010 * into a SeqAIJMKL one. */ 1011 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 1012 { 1013 Mat B = *newmat; 1014 Mat_SeqAIJMKL *aijmkl; 1015 PetscBool set; 1016 PetscBool sametype; 1017 1018 PetscFunctionBegin; 1019 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B)); 1020 1021 PetscCall(PetscObjectTypeCompare((PetscObject)A,type,&sametype)); 1022 if (sametype) PetscFunctionReturn(0); 1023 1024 PetscCall(PetscNewLog(B,&aijmkl)); 1025 B->spptr = (void*) aijmkl; 1026 1027 /* Set function pointers for methods that we inherit from AIJ but override. 1028 * We also parse some command line options below, since those determine some of the methods we point to. */ 1029 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 1030 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 1031 B->ops->destroy = MatDestroy_SeqAIJMKL; 1032 1033 aijmkl->sparse_optimized = PETSC_FALSE; 1034 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1035 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 1036 #else 1037 aijmkl->no_SpMV2 = PETSC_TRUE; 1038 #endif 1039 aijmkl->eager_inspection = PETSC_FALSE; 1040 1041 /* Parse command line options. */ 1042 PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat"); 1043 PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set)); 1044 PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set)); 1045 PetscOptionsEnd(); 1046 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1047 if (!aijmkl->no_SpMV2) { 1048 PetscCall(PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n")); 1049 aijmkl->no_SpMV2 = PETSC_TRUE; 1050 } 1051 #endif 1052 1053 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1054 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 1055 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 1056 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1057 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 1058 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1059 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL; 1060 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1061 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1062 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL; 1063 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1064 # if !defined(PETSC_USE_COMPLEX) 1065 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1066 # else 1067 B->ops->ptapnumeric = NULL; 1068 # endif 1069 # endif 1070 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1071 1072 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1073 /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1074 * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1075 * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1076 * versions in which the older interface has not been deprecated, we use the old interface. */ 1077 if (aijmkl->no_SpMV2) { 1078 B->ops->mult = MatMult_SeqAIJMKL; 1079 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 1080 B->ops->multadd = MatMultAdd_SeqAIJMKL; 1081 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1082 } 1083 #endif 1084 1085 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ)); 1086 1087 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL)); 1088 *newmat = B; 1089 PetscFunctionReturn(0); 1090 } 1091 1092 /*@C 1093 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 1094 This type inherits from AIJ and is largely identical, but uses sparse BLAS 1095 routines from Intel MKL whenever possible. 1096 If the installed version of MKL supports the "SpMV2" sparse 1097 inspector-executor routines, then those are used by default. 1098 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 1099 symmetric A) operations are currently supported. 1100 Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 1101 1102 Collective 1103 1104 Input Parameters: 1105 + comm - MPI communicator, set to PETSC_COMM_SELF 1106 . m - number of rows 1107 . n - number of columns 1108 . nz - number of nonzeros per row (same for all rows) 1109 - nnz - array containing the number of nonzeros in the various rows 1110 (possibly different for each row) or NULL 1111 1112 Output Parameter: 1113 . A - the matrix 1114 1115 Options Database Keys: 1116 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1117 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 1118 1119 Notes: 1120 If nnz is given then nz is ignored 1121 1122 Level: intermediate 1123 1124 .seealso: `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()` 1125 @*/ 1126 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1127 { 1128 PetscFunctionBegin; 1129 PetscCall(MatCreate(comm,A)); 1130 PetscCall(MatSetSizes(*A,m,n,m,n)); 1131 PetscCall(MatSetType(*A,MATSEQAIJMKL)); 1132 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz)); 1133 PetscFunctionReturn(0); 1134 } 1135 1136 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1137 { 1138 PetscFunctionBegin; 1139 PetscCall(MatSetType(A,MATSEQAIJ)); 1140 PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A)); 1141 PetscFunctionReturn(0); 1142 } 1143