1 2 /* 3 Defines basic operations for the MATSEQAIJMKL matrix class. 4 This class is derived from the MATSEQAIJ class and retains the 5 compressed row storage (aka Yale sparse matrix format) but uses 6 sparse BLAS operations from the Intel Math Kernel Library (MKL) 7 wherever possible. 8 */ 9 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12 #include <mkl_spblas.h> 13 14 typedef struct { 15 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 16 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 PetscObjectState state; 19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat) 28 { 29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 30 /* so we will ignore 'MatType type'. */ 31 Mat B = *newmat; 32 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 33 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 34 #endif 35 36 PetscFunctionBegin; 37 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 38 39 /* Reset the original function pointers. */ 40 B->ops->duplicate = MatDuplicate_SeqAIJ; 41 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 42 B->ops->destroy = MatDestroy_SeqAIJ; 43 B->ops->mult = MatMult_SeqAIJ; 44 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 45 B->ops->multadd = MatMultAdd_SeqAIJ; 46 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 47 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ; 48 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 49 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 50 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 51 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; 52 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 53 54 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL)); 55 56 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 57 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 58 * simply involves destroying the MKL sparse matrix handle and then freeing 59 * the spptr pointer. */ 60 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr; 61 62 if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 63 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 64 PetscCall(PetscFree(B->spptr)); 65 66 /* Change the type of B to MATSEQAIJ. */ 67 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ)); 68 69 *newmat = B; 70 PetscFunctionReturn(0); 71 } 72 73 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 74 { 75 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 76 77 PetscFunctionBegin; 78 79 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */ 80 if (aijmkl) { 81 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 82 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 83 if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 84 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 85 PetscCall(PetscFree(A->spptr)); 86 } 87 88 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 89 * to destroy everything that remains. */ 90 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ)); 91 /* Note that I don't call MatSetType(). I believe this is because that 92 * is only to be called when *building* a matrix. I could be wrong, but 93 * that is how things work for the SuperLU matrix class. */ 94 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL)); 95 PetscCall(MatDestroy_SeqAIJ(A)); 96 PetscFunctionReturn(0); 97 } 98 99 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 100 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 101 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 102 * handle, creates a new one, and then calls mkl_sparse_optimize(). 103 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 104 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 105 * an unoptimized matrix handle here. */ 106 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 107 { 108 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 109 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 110 * does nothing. We make it callable anyway in this case because it cuts 111 * down on littering the code with #ifdefs. */ 112 PetscFunctionBegin; 113 PetscFunctionReturn(0); 114 #else 115 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 116 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 117 PetscInt m, n; 118 MatScalar *aa; 119 PetscInt *aj, *ai; 120 121 PetscFunctionBegin; 122 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 123 /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 124 * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 125 * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 126 * mkl_sparse_optimize() later. */ 127 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 128 #endif 129 130 if (aijmkl->sparse_optimized) { 131 /* Matrix has been previously assembled and optimized. Must destroy old 132 * matrix handle before running the optimization step again. */ 133 PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 134 } 135 aijmkl->sparse_optimized = PETSC_FALSE; 136 137 /* Now perform the SpMV2 setup and matrix optimization. */ 138 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 139 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 140 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 141 m = A->rmap->n; 142 n = A->cmap->n; 143 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 144 aa = a->a; /* Nonzero elements stored row-by-row. */ 145 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 146 if (a->nz && aa && !A->structure_only) { 147 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 148 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 149 PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, m, n, ai, ai + 1, aj, aa); 150 PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000); 151 PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE); 152 if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA); 153 aijmkl->sparse_optimized = PETSC_TRUE; 154 PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state))); 155 } else { 156 aijmkl->csrA = PETSC_NULL; 157 } 158 159 PetscFunctionReturn(0); 160 #endif 161 } 162 163 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 164 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */ 165 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A) 166 { 167 sparse_index_base_t indexing; 168 PetscInt m, n; 169 PetscInt *aj, *ai, *dummy; 170 MatScalar *aa; 171 Mat_SeqAIJMKL *aijmkl; 172 173 PetscFunctionBegin; 174 if (csrA) { 175 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 176 PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, &m, &n, &ai, &dummy, &aj, &aa); 177 PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()"); 178 } else { 179 aj = ai = PETSC_NULL; 180 aa = PETSC_NULL; 181 } 182 183 PetscCall(MatSetType(A, MATSEQAIJ)); 184 PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols)); 185 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 186 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 187 * they will be destroyed when the MKL handle is destroyed. 188 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 189 if (csrA) { 190 PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL)); 191 } else { 192 /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */ 193 PetscCall(MatSetUp(A)); 194 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 195 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 196 } 197 198 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 199 * Now turn it into a MATSEQAIJMKL. */ 200 PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A)); 201 202 aijmkl = (Mat_SeqAIJMKL *)A->spptr; 203 aijmkl->csrA = csrA; 204 205 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 206 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 207 * and just need to be able to run the MKL optimization step. */ 208 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 209 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 210 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 211 if (csrA) { 212 PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000); 213 PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE); 214 } 215 PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state))); 216 PetscFunctionReturn(0); 217 } 218 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 219 220 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 221 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 222 * MatMatMultNumeric(). */ 223 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 224 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 225 { 226 PetscInt i; 227 PetscInt nrows, ncols; 228 PetscInt nz; 229 PetscInt *ai, *aj, *dummy; 230 PetscScalar *aa; 231 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 232 sparse_index_base_t indexing; 233 234 PetscFunctionBegin; 235 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 236 if (!aijmkl->csrA) PetscFunctionReturn(0); 237 238 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 239 PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, &nrows, &ncols, &ai, &dummy, &aj, &aa); 240 241 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 242 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 243 for (i = 0; i < nrows; i++) { 244 nz = ai[i + 1] - ai[i]; 245 PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES)); 246 } 247 248 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 249 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 250 251 PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state))); 252 /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation. 253 * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed 254 * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */ 255 aijmkl->sparse_optimized = PETSC_FALSE; 256 PetscFunctionReturn(0); 257 } 258 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 259 260 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 261 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer) 262 { 263 PetscInt i, j, k; 264 PetscInt nrows, ncols; 265 PetscInt nz; 266 PetscInt *ai, *aj, *dummy; 267 PetscScalar *aa; 268 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 269 sparse_index_base_t indexing; 270 271 PetscFunctionBegin; 272 PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n")); 273 274 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 275 if (!aijmkl->csrA) { 276 PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n")); 277 PetscFunctionReturn(0); 278 } 279 280 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 281 PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, &nrows, &ncols, &ai, &dummy, &aj, &aa); 282 283 k = 0; 284 for (i = 0; i < nrows; i++) { 285 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i)); 286 nz = ai[i + 1] - ai[i]; 287 for (j = 0; j < nz; j++) { 288 if (aa) { 289 PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g) ", aj[k], PetscRealPart(aa[k]))); 290 } else { 291 PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k])); 292 } 293 k++; 294 } 295 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 296 } 297 PetscFunctionReturn(0); 298 } 299 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 300 301 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 302 { 303 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 304 Mat_SeqAIJMKL *aijmkl_dest; 305 306 PetscFunctionBegin; 307 PetscCall(MatDuplicate_SeqAIJ(A, op, M)); 308 aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr; 309 PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1)); 310 aijmkl_dest->sparse_optimized = PETSC_FALSE; 311 if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 312 PetscFunctionReturn(0); 313 } 314 315 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 316 { 317 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 318 Mat_SeqAIJMKL *aijmkl; 319 320 PetscFunctionBegin; 321 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 322 323 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 324 * extra information and some different methods, call the AssemblyEnd 325 * routine for a MATSEQAIJ. 326 * I'm not sure if this is the best way to do this, but it avoids 327 * a lot of code duplication. */ 328 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 329 PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 330 331 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 332 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 333 aijmkl = (Mat_SeqAIJMKL *)A->spptr; 334 if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 335 336 PetscFunctionReturn(0); 337 } 338 339 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 340 PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy) 341 { 342 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 343 const PetscScalar *x; 344 PetscScalar *y; 345 const MatScalar *aa; 346 PetscInt m = A->rmap->n; 347 PetscInt n = A->cmap->n; 348 PetscScalar alpha = 1.0; 349 PetscScalar beta = 0.0; 350 const PetscInt *aj, *ai; 351 char matdescra[6]; 352 353 /* Variables not in MatMult_SeqAIJ. */ 354 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 355 356 PetscFunctionBegin; 357 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 358 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 359 PetscCall(VecGetArrayRead(xx, &x)); 360 PetscCall(VecGetArray(yy, &y)); 361 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 362 aa = a->a; /* Nonzero elements stored row-by-row. */ 363 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 364 365 /* Call MKL sparse BLAS routine to do the MatMult. */ 366 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y); 367 368 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 369 PetscCall(VecRestoreArrayRead(xx, &x)); 370 PetscCall(VecRestoreArray(yy, &y)); 371 PetscFunctionReturn(0); 372 } 373 #endif 374 375 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 376 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) 377 { 378 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 379 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 380 const PetscScalar *x; 381 PetscScalar *y; 382 PetscObjectState state; 383 384 PetscFunctionBegin; 385 386 /* If there are no nonzero entries, zero yy and return immediately. */ 387 if (!a->nz) { 388 PetscCall(VecGetArray(yy, &y)); 389 PetscCall(PetscArrayzero(y, A->rmap->n)); 390 PetscCall(VecRestoreArray(yy, &y)); 391 PetscFunctionReturn(0); 392 } 393 394 PetscCall(VecGetArrayRead(xx, &x)); 395 PetscCall(VecGetArray(yy, &y)); 396 397 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 398 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 399 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 400 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 401 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 402 403 /* Call MKL SpMV2 executor routine to do the MatMult. */ 404 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y); 405 406 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 407 PetscCall(VecRestoreArrayRead(xx, &x)); 408 PetscCall(VecRestoreArray(yy, &y)); 409 PetscFunctionReturn(0); 410 } 411 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 412 413 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 414 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy) 415 { 416 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 417 const PetscScalar *x; 418 PetscScalar *y; 419 const MatScalar *aa; 420 PetscInt m = A->rmap->n; 421 PetscInt n = A->cmap->n; 422 PetscScalar alpha = 1.0; 423 PetscScalar beta = 0.0; 424 const PetscInt *aj, *ai; 425 char matdescra[6]; 426 427 /* Variables not in MatMultTranspose_SeqAIJ. */ 428 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 429 430 PetscFunctionBegin; 431 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 432 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 433 PetscCall(VecGetArrayRead(xx, &x)); 434 PetscCall(VecGetArray(yy, &y)); 435 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 436 aa = a->a; /* Nonzero elements stored row-by-row. */ 437 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 438 439 /* Call MKL sparse BLAS routine to do the MatMult. */ 440 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y); 441 442 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 443 PetscCall(VecRestoreArrayRead(xx, &x)); 444 PetscCall(VecRestoreArray(yy, &y)); 445 PetscFunctionReturn(0); 446 } 447 #endif 448 449 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 450 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) 451 { 452 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 453 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 454 const PetscScalar *x; 455 PetscScalar *y; 456 PetscObjectState state; 457 458 PetscFunctionBegin; 459 460 /* If there are no nonzero entries, zero yy and return immediately. */ 461 if (!a->nz) { 462 PetscCall(VecGetArray(yy, &y)); 463 PetscCall(PetscArrayzero(y, A->cmap->n)); 464 PetscCall(VecRestoreArray(yy, &y)); 465 PetscFunctionReturn(0); 466 } 467 468 PetscCall(VecGetArrayRead(xx, &x)); 469 PetscCall(VecGetArray(yy, &y)); 470 471 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 472 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 473 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 474 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 475 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 476 477 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 478 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y); 479 480 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 481 PetscCall(VecRestoreArrayRead(xx, &x)); 482 PetscCall(VecRestoreArray(yy, &y)); 483 PetscFunctionReturn(0); 484 } 485 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 486 487 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 488 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) 489 { 490 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 491 const PetscScalar *x; 492 PetscScalar *y, *z; 493 const MatScalar *aa; 494 PetscInt m = A->rmap->n; 495 PetscInt n = A->cmap->n; 496 const PetscInt *aj, *ai; 497 PetscInt i; 498 499 /* Variables not in MatMultAdd_SeqAIJ. */ 500 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 501 PetscScalar alpha = 1.0; 502 PetscScalar beta; 503 char matdescra[6]; 504 505 PetscFunctionBegin; 506 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 507 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 508 509 PetscCall(VecGetArrayRead(xx, &x)); 510 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 511 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 512 aa = a->a; /* Nonzero elements stored row-by-row. */ 513 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 514 515 /* Call MKL sparse BLAS routine to do the MatMult. */ 516 if (zz == yy) { 517 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 518 beta = 1.0; 519 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 520 } else { 521 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 522 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 523 beta = 0.0; 524 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 525 for (i = 0; i < m; i++) z[i] += y[i]; 526 } 527 528 PetscCall(PetscLogFlops(2.0 * a->nz)); 529 PetscCall(VecRestoreArrayRead(xx, &x)); 530 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 531 PetscFunctionReturn(0); 532 } 533 #endif 534 535 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 536 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) 537 { 538 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 539 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 540 const PetscScalar *x; 541 PetscScalar *y, *z; 542 PetscInt m = A->rmap->n; 543 PetscInt i; 544 545 /* Variables not in MatMultAdd_SeqAIJ. */ 546 PetscObjectState state; 547 548 PetscFunctionBegin; 549 550 /* If there are no nonzero entries, set zz = yy and return immediately. */ 551 if (!a->nz) { 552 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 553 PetscCall(PetscArraycpy(z, y, m)); 554 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 555 PetscFunctionReturn(0); 556 } 557 558 PetscCall(VecGetArrayRead(xx, &x)); 559 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 560 561 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 562 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 563 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 564 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 565 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 566 567 /* Call MKL sparse BLAS routine to do the MatMult. */ 568 if (zz == yy) { 569 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 570 * with alpha and beta both set to 1.0. */ 571 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z); 572 } else { 573 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 574 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 575 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z); 576 for (i = 0; i < m; i++) z[i] += y[i]; 577 } 578 579 PetscCall(PetscLogFlops(2.0 * a->nz)); 580 PetscCall(VecRestoreArrayRead(xx, &x)); 581 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 582 PetscFunctionReturn(0); 583 } 584 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 585 586 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 587 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) 588 { 589 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 590 const PetscScalar *x; 591 PetscScalar *y, *z; 592 const MatScalar *aa; 593 PetscInt m = A->rmap->n; 594 PetscInt n = A->cmap->n; 595 const PetscInt *aj, *ai; 596 PetscInt i; 597 598 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 599 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 600 PetscScalar alpha = 1.0; 601 PetscScalar beta; 602 char matdescra[6]; 603 604 PetscFunctionBegin; 605 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 606 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 607 608 PetscCall(VecGetArrayRead(xx, &x)); 609 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 610 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 611 aa = a->a; /* Nonzero elements stored row-by-row. */ 612 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 613 614 /* Call MKL sparse BLAS routine to do the MatMult. */ 615 if (zz == yy) { 616 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 617 beta = 1.0; 618 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 619 } else { 620 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 621 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 622 beta = 0.0; 623 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 624 for (i = 0; i < n; i++) z[i] += y[i]; 625 } 626 627 PetscCall(PetscLogFlops(2.0 * a->nz)); 628 PetscCall(VecRestoreArrayRead(xx, &x)); 629 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 630 PetscFunctionReturn(0); 631 } 632 #endif 633 634 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 635 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) 636 { 637 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 638 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 639 const PetscScalar *x; 640 PetscScalar *y, *z; 641 PetscInt n = A->cmap->n; 642 PetscInt i; 643 PetscObjectState state; 644 645 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 646 647 PetscFunctionBegin; 648 649 /* If there are no nonzero entries, set zz = yy and return immediately. */ 650 if (!a->nz) { 651 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 652 PetscCall(PetscArraycpy(z, y, n)); 653 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 654 PetscFunctionReturn(0); 655 } 656 657 PetscCall(VecGetArrayRead(xx, &x)); 658 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 659 660 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 661 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 662 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 663 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 664 if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A); 665 666 /* Call MKL sparse BLAS routine to do the MatMult. */ 667 if (zz == yy) { 668 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 669 * with alpha and beta both set to 1.0. */ 670 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z); 671 } else { 672 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 673 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 674 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z); 675 for (i = 0; i < n; i++) z[i] += y[i]; 676 } 677 678 PetscCall(PetscLogFlops(2.0 * a->nz)); 679 PetscCall(VecRestoreArrayRead(xx, &x)); 680 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 681 PetscFunctionReturn(0); 682 } 683 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 684 685 /* -------------------------- MatProduct code -------------------------- */ 686 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 687 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) 688 { 689 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr; 690 sparse_matrix_t csrA, csrB, csrC; 691 PetscInt nrows, ncols; 692 struct matrix_descr descr_type_gen; 693 PetscObjectState state; 694 695 PetscFunctionBegin; 696 /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does 697 * not handle sparse matrices with zero rows or columns. */ 698 if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N; 699 else nrows = A->cmap->N; 700 if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N; 701 else ncols = B->rmap->N; 702 703 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 704 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 705 PetscCall(PetscObjectStateGet((PetscObject)B, &state)); 706 if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 707 csrA = a->csrA; 708 csrB = b->csrA; 709 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 710 711 if (csrA && csrB) { 712 PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC); 713 } else { 714 csrC = PETSC_NULL; 715 } 716 717 PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C)); 718 719 PetscFunctionReturn(0); 720 } 721 722 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) 723 { 724 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr; 725 sparse_matrix_t csrA, csrB, csrC; 726 struct matrix_descr descr_type_gen; 727 PetscObjectState state; 728 729 PetscFunctionBegin; 730 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 731 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 732 PetscCall(PetscObjectStateGet((PetscObject)B, &state)); 733 if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 734 csrA = a->csrA; 735 csrB = b->csrA; 736 csrC = c->csrA; 737 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 738 739 if (csrA && csrB) { 740 PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC); 741 } else { 742 csrC = PETSC_NULL; 743 } 744 745 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 746 PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 747 748 PetscFunctionReturn(0); 749 } 750 751 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) 752 { 753 PetscFunctionBegin; 754 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 755 PetscFunctionReturn(0); 756 } 757 758 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) 759 { 760 PetscFunctionBegin; 761 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 762 PetscFunctionReturn(0); 763 } 764 765 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) 766 { 767 PetscFunctionBegin; 768 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 769 PetscFunctionReturn(0); 770 } 771 772 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) 773 { 774 PetscFunctionBegin; 775 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 776 PetscFunctionReturn(0); 777 } 778 779 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) 780 { 781 PetscFunctionBegin; 782 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C)); 783 PetscFunctionReturn(0); 784 } 785 786 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) 787 { 788 PetscFunctionBegin; 789 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C)); 790 PetscFunctionReturn(0); 791 } 792 793 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 794 { 795 Mat_Product *product = C->product; 796 Mat A = product->A, B = product->B; 797 798 PetscFunctionBegin; 799 PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C)); 800 PetscFunctionReturn(0); 801 } 802 803 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 804 { 805 Mat_Product *product = C->product; 806 Mat A = product->A, B = product->B; 807 PetscReal fill = product->fill; 808 809 PetscFunctionBegin; 810 PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C)); 811 C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL; 812 PetscFunctionReturn(0); 813 } 814 815 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C) 816 { 817 Mat Ct; 818 Vec zeros; 819 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr; 820 sparse_matrix_t csrA, csrP, csrC; 821 PetscBool set, flag; 822 struct matrix_descr descr_type_sym; 823 PetscObjectState state; 824 825 PetscFunctionBegin; 826 PetscCall(MatIsSymmetricKnown(A, &set, &flag)); 827 PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric"); 828 829 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 830 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 831 PetscCall(PetscObjectStateGet((PetscObject)P, &state)); 832 if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 833 csrA = a->csrA; 834 csrP = p->csrA; 835 csrC = c->csrA; 836 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 837 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 838 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 839 840 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 841 PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT); 842 843 /* Update the PETSc AIJ representation for matrix C from contents of MKL handle. 844 * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix, 845 * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix. 846 * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY 847 * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might 848 * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output 849 * the full matrix. */ 850 PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 851 PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct)); 852 PetscCall(MatCreateVecs(C, &zeros, NULL)); 853 PetscCall(VecSetFromOptions(zeros)); 854 PetscCall(VecZeroEntries(zeros)); 855 PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES)); 856 PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN)); 857 /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */ 858 PetscCall(MatProductCreateWithMat(A, P, PETSC_NULL, C)); 859 PetscCall(MatProductSetType(C, MATPRODUCT_PtAP)); 860 PetscCall(MatSeqAIJMKL_create_mkl_handle(C)); 861 PetscCall(VecDestroy(&zeros)); 862 PetscCall(MatDestroy(&Ct)); 863 PetscFunctionReturn(0); 864 } 865 866 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) 867 { 868 Mat_Product *product = C->product; 869 Mat A = product->A, P = product->B; 870 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr; 871 sparse_matrix_t csrA, csrP, csrC; 872 struct matrix_descr descr_type_sym; 873 PetscObjectState state; 874 875 PetscFunctionBegin; 876 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 877 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 878 PetscCall(PetscObjectStateGet((PetscObject)P, &state)); 879 if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 880 csrA = a->csrA; 881 csrP = p->csrA; 882 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 883 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 884 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 885 886 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 887 if (csrP && csrA) { 888 PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL); 889 } else { 890 csrC = PETSC_NULL; 891 } 892 893 /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle. 894 * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain 895 * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that 896 * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this 897 * is guaranteed. */ 898 PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C)); 899 900 C->ops->productnumeric = MatProductNumeric_PtAP; 901 PetscFunctionReturn(0); 902 } 903 904 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) 905 { 906 PetscFunctionBegin; 907 C->ops->productsymbolic = MatProductSymbolic_AB; 908 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 909 PetscFunctionReturn(0); 910 } 911 912 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) 913 { 914 PetscFunctionBegin; 915 C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL; 916 PetscFunctionReturn(0); 917 } 918 919 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) 920 { 921 PetscFunctionBegin; 922 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 923 C->ops->productsymbolic = MatProductSymbolic_ABt; 924 PetscFunctionReturn(0); 925 } 926 927 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) 928 { 929 Mat_Product *product = C->product; 930 Mat A = product->A; 931 PetscBool set, flag; 932 933 PetscFunctionBegin; 934 if (PetscDefined(USE_COMPLEX)) { 935 /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used. 936 * We do this in several other locations in this file. This works for the time being, but these 937 * routines are considered unsafe and may be removed from the MatProduct code in the future. 938 * TODO: Add proper MATSEQAIJMKL implementations */ 939 C->ops->productsymbolic = NULL; 940 } else { 941 /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */ 942 PetscCall(MatIsSymmetricKnown(A, &set, &flag)); 943 if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 944 else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 945 /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(), 946 * depending on whether the algorithm for the general case vs. the real symmetric one is used. */ 947 } 948 PetscFunctionReturn(0); 949 } 950 951 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) 952 { 953 PetscFunctionBegin; 954 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 955 PetscFunctionReturn(0); 956 } 957 958 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) 959 { 960 PetscFunctionBegin; 961 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 962 PetscFunctionReturn(0); 963 } 964 965 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) 966 { 967 Mat_Product *product = C->product; 968 969 PetscFunctionBegin; 970 switch (product->type) { 971 case MATPRODUCT_AB: 972 PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C)); 973 break; 974 case MATPRODUCT_AtB: 975 PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C)); 976 break; 977 case MATPRODUCT_ABt: 978 PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C)); 979 break; 980 case MATPRODUCT_PtAP: 981 PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C)); 982 break; 983 case MATPRODUCT_RARt: 984 PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C)); 985 break; 986 case MATPRODUCT_ABC: 987 PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C)); 988 break; 989 default: 990 break; 991 } 992 PetscFunctionReturn(0); 993 } 994 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 995 /* ------------------------ End MatProduct code ------------------------ */ 996 997 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 998 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 999 * routine, but can also be used to convert an assembled SeqAIJ matrix 1000 * into a SeqAIJMKL one. */ 1001 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat) 1002 { 1003 Mat B = *newmat; 1004 Mat_SeqAIJMKL *aijmkl; 1005 PetscBool set; 1006 PetscBool sametype; 1007 1008 PetscFunctionBegin; 1009 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 1010 1011 PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype)); 1012 if (sametype) PetscFunctionReturn(0); 1013 1014 PetscCall(PetscNew(&aijmkl)); 1015 B->spptr = (void *)aijmkl; 1016 1017 /* Set function pointers for methods that we inherit from AIJ but override. 1018 * We also parse some command line options below, since those determine some of the methods we point to. */ 1019 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 1020 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 1021 B->ops->destroy = MatDestroy_SeqAIJMKL; 1022 1023 aijmkl->sparse_optimized = PETSC_FALSE; 1024 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1025 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 1026 #else 1027 aijmkl->no_SpMV2 = PETSC_TRUE; 1028 #endif 1029 aijmkl->eager_inspection = PETSC_FALSE; 1030 1031 /* Parse command line options. */ 1032 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat"); 1033 PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set)); 1034 PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set)); 1035 PetscOptionsEnd(); 1036 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1037 if (!aijmkl->no_SpMV2) { 1038 PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n")); 1039 aijmkl->no_SpMV2 = PETSC_TRUE; 1040 } 1041 #endif 1042 1043 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1044 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 1045 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 1046 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1047 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 1048 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1049 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL; 1050 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1051 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1052 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL; 1053 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1054 #if !defined(PETSC_USE_COMPLEX) 1055 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1056 #else 1057 B->ops->ptapnumeric = NULL; 1058 #endif 1059 #endif 1060 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1061 1062 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1063 /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1064 * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1065 * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1066 * versions in which the older interface has not been deprecated, we use the old interface. */ 1067 if (aijmkl->no_SpMV2) { 1068 B->ops->mult = MatMult_SeqAIJMKL; 1069 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 1070 B->ops->multadd = MatMultAdd_SeqAIJMKL; 1071 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1072 } 1073 #endif 1074 1075 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ)); 1076 1077 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL)); 1078 *newmat = B; 1079 PetscFunctionReturn(0); 1080 } 1081 1082 /*@C 1083 MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`. 1084 This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS 1085 routines from Intel MKL whenever possible. 1086 If the installed version of MKL supports the "SpMV2" sparse 1087 inspector-executor routines, then those are used by default. 1088 `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()` 1089 (for symmetric A) operations are currently supported. 1090 Note that MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`. 1091 1092 Collective 1093 1094 Input Parameters: 1095 + comm - MPI communicator, set to `PETSC_COMM_SELF` 1096 . m - number of rows 1097 . n - number of columns 1098 . nz - number of nonzeros per row (same for all rows) 1099 - nnz - array containing the number of nonzeros in the various rows 1100 (possibly different for each row) or NULL 1101 1102 Output Parameter: 1103 . A - the matrix 1104 1105 Options Database Keys: 1106 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1107 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 1108 1109 Note: 1110 If nnz is given then nz is ignored 1111 1112 Level: intermediate 1113 1114 .seealso: `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()` 1115 @*/ 1116 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 1117 { 1118 PetscFunctionBegin; 1119 PetscCall(MatCreate(comm, A)); 1120 PetscCall(MatSetSizes(*A, m, n, m, n)); 1121 PetscCall(MatSetType(*A, MATSEQAIJMKL)); 1122 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz)); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1127 { 1128 PetscFunctionBegin; 1129 PetscCall(MatSetType(A, MATSEQAIJ)); 1130 PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A)); 1131 PetscFunctionReturn(0); 1132 } 1133