1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 #if defined(PETSC_HAVE_MKL_INTEL_ILP64) 12 #define MKL_ILP64 13 #endif 14 #include <mkl_spblas.h> 15 16 typedef struct { 17 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 18 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 19 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 20 PetscObjectState state; 21 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 22 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 23 struct matrix_descr descr; 24 #endif 25 } Mat_SeqAIJMKL; 26 27 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType); 28 29 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat) 30 { 31 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 32 /* so we will ignore 'MatType type'. */ 33 Mat B = *newmat; 34 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 35 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 36 #endif 37 38 PetscFunctionBegin; 39 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 40 41 /* Reset the original function pointers. */ 42 B->ops->duplicate = MatDuplicate_SeqAIJ; 43 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 44 B->ops->destroy = MatDestroy_SeqAIJ; 45 B->ops->mult = MatMult_SeqAIJ; 46 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 47 B->ops->multadd = MatMultAdd_SeqAIJ; 48 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 49 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ; 50 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 51 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 52 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 53 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; 54 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 55 56 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL)); 57 58 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 59 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 60 * simply involves destroying the MKL sparse matrix handle and then freeing 61 * the spptr pointer. */ 62 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr; 63 64 if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 65 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 66 PetscCall(PetscFree(B->spptr)); 67 68 /* Change the type of B to MATSEQAIJ. */ 69 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ)); 70 71 *newmat = B; 72 PetscFunctionReturn(PETSC_SUCCESS); 73 } 74 75 static PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 76 { 77 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 78 79 PetscFunctionBegin; 80 81 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */ 82 if (aijmkl) { 83 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 84 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 85 if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 86 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 87 PetscCall(PetscFree(A->spptr)); 88 } 89 90 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 91 * to destroy everything that remains. */ 92 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ)); 93 /* I don't call MatSetType(). I believe this is because that 94 * is only to be called when *building* a matrix. I could be wrong, but 95 * that is how things work for the SuperLU matrix class. */ 96 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL)); 97 PetscCall(MatDestroy_SeqAIJ(A)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 102 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 103 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 104 * handle, creates a new one, and then calls mkl_sparse_optimize(). 105 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 106 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 107 * an unoptimized matrix handle here. */ 108 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 109 { 110 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 111 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 112 * does nothing. We make it callable anyway in this case because it cuts 113 * down on littering the code with #ifdefs. */ 114 PetscFunctionBegin; 115 PetscFunctionReturn(PETSC_SUCCESS); 116 #else 117 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 118 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 119 PetscInt m, n; 120 MatScalar *aa; 121 PetscInt *aj, *ai; 122 123 PetscFunctionBegin; 124 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 125 /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 126 * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 127 * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 128 * mkl_sparse_optimize() later. */ 129 if (aijmkl->no_SpMV2) PetscFunctionReturn(PETSC_SUCCESS); 130 #endif 131 132 if (aijmkl->sparse_optimized) { 133 /* Matrix has been previously assembled and optimized. Must destroy old 134 * matrix handle before running the optimization step again. */ 135 PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 136 } 137 aijmkl->sparse_optimized = PETSC_FALSE; 138 139 /* Now perform the SpMV2 setup and matrix optimization. */ 140 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 141 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 142 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 143 m = A->rmap->n; 144 n = A->cmap->n; 145 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 146 aa = a->a; /* Nonzero elements stored row-by-row. */ 147 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 148 if (a->nz && aa && !A->structure_only) { 149 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 150 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 151 PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, (MKL_INT)m, (MKL_INT)n, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa); 152 PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000); 153 PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE); 154 if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA); 155 aijmkl->sparse_optimized = PETSC_TRUE; 156 PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state))); 157 } else { 158 aijmkl->csrA = NULL; 159 } 160 161 PetscFunctionReturn(PETSC_SUCCESS); 162 #endif 163 } 164 165 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 166 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */ 167 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A) 168 { 169 sparse_index_base_t indexing; 170 PetscInt m, n; 171 PetscInt *aj, *ai, *dummy; 172 MatScalar *aa; 173 Mat_SeqAIJMKL *aijmkl; 174 175 PetscFunctionBegin; 176 if (csrA) { 177 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 178 PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, (MKL_INT *)&m, (MKL_INT *)&n, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa); 179 PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()"); 180 } else { 181 aj = ai = NULL; 182 aa = NULL; 183 } 184 185 PetscCall(MatSetType(A, MATSEQAIJ)); 186 PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols)); 187 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 188 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 189 * they will be destroyed when the MKL handle is destroyed. 190 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 191 if (csrA) { 192 PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL)); 193 } else { 194 /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */ 195 PetscCall(MatSetUp(A)); 196 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 197 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 198 } 199 200 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 201 * Now turn it into a MATSEQAIJMKL. */ 202 PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A)); 203 204 aijmkl = (Mat_SeqAIJMKL *)A->spptr; 205 aijmkl->csrA = csrA; 206 207 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 208 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 209 * and just need to be able to run the MKL optimization step. */ 210 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 211 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 212 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 213 if (csrA) { 214 PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000); 215 PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE); 216 } 217 PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state))); 218 PetscFunctionReturn(PETSC_SUCCESS); 219 } 220 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 221 222 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 223 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 224 * MatMatMultNumeric(). */ 225 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 226 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 227 { 228 PetscInt i; 229 PetscInt nrows, ncols; 230 PetscInt nz; 231 PetscInt *ai, *aj, *dummy; 232 PetscScalar *aa; 233 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 234 sparse_index_base_t indexing; 235 236 PetscFunctionBegin; 237 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 238 if (!aijmkl->csrA) PetscFunctionReturn(PETSC_SUCCESS); 239 240 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 241 PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa); 242 243 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 244 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 245 for (i = 0; i < nrows; i++) { 246 nz = ai[i + 1] - ai[i]; 247 PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES)); 248 } 249 250 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 251 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 252 253 PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state))); 254 /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation. 255 * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed 256 * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */ 257 aijmkl->sparse_optimized = PETSC_FALSE; 258 PetscFunctionReturn(PETSC_SUCCESS); 259 } 260 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 261 262 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 263 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer) 264 { 265 PetscInt i, j, k; 266 PetscInt nrows, ncols; 267 PetscInt nz; 268 PetscInt *ai, *aj, *dummy; 269 PetscScalar *aa; 270 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 271 sparse_index_base_t indexing; 272 273 PetscFunctionBegin; 274 PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n")); 275 276 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 277 if (!aijmkl->csrA) { 278 PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n")); 279 PetscFunctionReturn(PETSC_SUCCESS); 280 } 281 282 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 283 PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa); 284 285 k = 0; 286 for (i = 0; i < nrows; i++) { 287 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i)); 288 nz = ai[i + 1] - ai[i]; 289 for (j = 0; j < nz; j++) { 290 if (aa) { 291 PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g) ", aj[k], PetscRealPart(aa[k]))); 292 } else { 293 PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k])); 294 } 295 k++; 296 } 297 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 298 } 299 PetscFunctionReturn(PETSC_SUCCESS); 300 } 301 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 302 303 static PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 304 { 305 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 306 Mat_SeqAIJMKL *aijmkl_dest; 307 308 PetscFunctionBegin; 309 PetscCall(MatDuplicate_SeqAIJ(A, op, M)); 310 aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr; 311 PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1)); 312 aijmkl_dest->sparse_optimized = PETSC_FALSE; 313 if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 314 PetscFunctionReturn(PETSC_SUCCESS); 315 } 316 317 static PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 318 { 319 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 320 Mat_SeqAIJMKL *aijmkl; 321 322 PetscFunctionBegin; 323 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 324 325 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 326 * extra information and some different methods, call the AssemblyEnd 327 * routine for a MATSEQAIJ. 328 * I'm not sure if this is the best way to do this, but it avoids 329 * a lot of code duplication. */ 330 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 331 PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 332 333 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 334 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 335 aijmkl = (Mat_SeqAIJMKL *)A->spptr; 336 if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 337 338 PetscFunctionReturn(PETSC_SUCCESS); 339 } 340 341 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 342 static PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy) 343 { 344 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 345 const PetscScalar *x; 346 PetscScalar *y; 347 const MatScalar *aa; 348 PetscInt m = A->rmap->n; 349 PetscInt n = A->cmap->n; 350 PetscScalar alpha = 1.0; 351 PetscScalar beta = 0.0; 352 const PetscInt *aj, *ai; 353 char matdescra[6]; 354 355 /* Variables not in MatMult_SeqAIJ. */ 356 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 357 358 PetscFunctionBegin; 359 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 360 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 361 PetscCall(VecGetArrayRead(xx, &x)); 362 PetscCall(VecGetArray(yy, &y)); 363 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 364 aa = a->a; /* Nonzero elements stored row-by-row. */ 365 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 366 367 /* Call MKL sparse BLAS routine to do the MatMult. */ 368 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y); 369 370 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 371 PetscCall(VecRestoreArrayRead(xx, &x)); 372 PetscCall(VecRestoreArray(yy, &y)); 373 PetscFunctionReturn(PETSC_SUCCESS); 374 } 375 #endif 376 377 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 378 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) 379 { 380 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 381 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 382 const PetscScalar *x; 383 PetscScalar *y; 384 PetscObjectState state; 385 386 PetscFunctionBegin; 387 388 /* If there are no nonzero entries, zero yy and return immediately. */ 389 if (!a->nz) { 390 PetscCall(VecGetArray(yy, &y)); 391 PetscCall(PetscArrayzero(y, A->rmap->n)); 392 PetscCall(VecRestoreArray(yy, &y)); 393 PetscFunctionReturn(PETSC_SUCCESS); 394 } 395 396 PetscCall(VecGetArrayRead(xx, &x)); 397 PetscCall(VecGetArray(yy, &y)); 398 399 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 400 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 401 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 402 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 403 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 404 405 /* Call MKL SpMV2 executor routine to do the MatMult. */ 406 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y); 407 408 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 409 PetscCall(VecRestoreArrayRead(xx, &x)); 410 PetscCall(VecRestoreArray(yy, &y)); 411 PetscFunctionReturn(PETSC_SUCCESS); 412 } 413 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 414 415 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 416 static PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy) 417 { 418 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 419 const PetscScalar *x; 420 PetscScalar *y; 421 const MatScalar *aa; 422 PetscInt m = A->rmap->n; 423 PetscInt n = A->cmap->n; 424 PetscScalar alpha = 1.0; 425 PetscScalar beta = 0.0; 426 const PetscInt *aj, *ai; 427 char matdescra[6]; 428 429 /* Variables not in MatMultTranspose_SeqAIJ. */ 430 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 431 432 PetscFunctionBegin; 433 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 434 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 435 PetscCall(VecGetArrayRead(xx, &x)); 436 PetscCall(VecGetArray(yy, &y)); 437 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 438 aa = a->a; /* Nonzero elements stored row-by-row. */ 439 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 440 441 /* Call MKL sparse BLAS routine to do the MatMult. */ 442 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y); 443 444 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 445 PetscCall(VecRestoreArrayRead(xx, &x)); 446 PetscCall(VecRestoreArray(yy, &y)); 447 PetscFunctionReturn(PETSC_SUCCESS); 448 } 449 #endif 450 451 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 452 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) 453 { 454 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 455 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 456 const PetscScalar *x; 457 PetscScalar *y; 458 PetscObjectState state; 459 460 PetscFunctionBegin; 461 462 /* If there are no nonzero entries, zero yy and return immediately. */ 463 if (!a->nz) { 464 PetscCall(VecGetArray(yy, &y)); 465 PetscCall(PetscArrayzero(y, A->cmap->n)); 466 PetscCall(VecRestoreArray(yy, &y)); 467 PetscFunctionReturn(PETSC_SUCCESS); 468 } 469 470 PetscCall(VecGetArrayRead(xx, &x)); 471 PetscCall(VecGetArray(yy, &y)); 472 473 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 474 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 475 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 476 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 477 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 478 479 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 480 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y); 481 482 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 483 PetscCall(VecRestoreArrayRead(xx, &x)); 484 PetscCall(VecRestoreArray(yy, &y)); 485 PetscFunctionReturn(PETSC_SUCCESS); 486 } 487 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 488 489 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 490 static PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) 491 { 492 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 493 const PetscScalar *x; 494 PetscScalar *y, *z; 495 const MatScalar *aa; 496 PetscInt m = A->rmap->n; 497 PetscInt n = A->cmap->n; 498 const PetscInt *aj, *ai; 499 PetscInt i; 500 501 /* Variables not in MatMultAdd_SeqAIJ. */ 502 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 503 PetscScalar alpha = 1.0; 504 PetscScalar beta; 505 char matdescra[6]; 506 507 PetscFunctionBegin; 508 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 509 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 510 511 PetscCall(VecGetArrayRead(xx, &x)); 512 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 513 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 514 aa = a->a; /* Nonzero elements stored row-by-row. */ 515 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 516 517 /* Call MKL sparse BLAS routine to do the MatMult. */ 518 if (zz == yy) { 519 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 520 beta = 1.0; 521 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 522 } else { 523 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 524 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 525 beta = 0.0; 526 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 527 for (i = 0; i < m; i++) z[i] += y[i]; 528 } 529 530 PetscCall(PetscLogFlops(2.0 * a->nz)); 531 PetscCall(VecRestoreArrayRead(xx, &x)); 532 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 533 PetscFunctionReturn(PETSC_SUCCESS); 534 } 535 #endif 536 537 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 538 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) 539 { 540 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 541 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 542 const PetscScalar *x; 543 PetscScalar *y, *z; 544 PetscInt m = A->rmap->n; 545 PetscInt i; 546 547 /* Variables not in MatMultAdd_SeqAIJ. */ 548 PetscObjectState state; 549 550 PetscFunctionBegin; 551 552 /* If there are no nonzero entries, set zz = yy and return immediately. */ 553 if (!a->nz) { 554 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 555 PetscCall(PetscArraycpy(z, y, m)); 556 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 557 PetscFunctionReturn(PETSC_SUCCESS); 558 } 559 560 PetscCall(VecGetArrayRead(xx, &x)); 561 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 562 563 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 564 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 565 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 566 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 567 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 568 569 /* Call MKL sparse BLAS routine to do the MatMult. */ 570 if (zz == yy) { 571 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 572 * with alpha and beta both set to 1.0. */ 573 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z); 574 } else { 575 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 576 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 577 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z); 578 for (i = 0; i < m; i++) z[i] += y[i]; 579 } 580 581 PetscCall(PetscLogFlops(2.0 * a->nz)); 582 PetscCall(VecRestoreArrayRead(xx, &x)); 583 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 584 PetscFunctionReturn(PETSC_SUCCESS); 585 } 586 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 587 588 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 589 static PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) 590 { 591 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 592 const PetscScalar *x; 593 PetscScalar *y, *z; 594 const MatScalar *aa; 595 PetscInt m = A->rmap->n; 596 PetscInt n = A->cmap->n; 597 const PetscInt *aj, *ai; 598 PetscInt i; 599 600 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 601 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 602 PetscScalar alpha = 1.0; 603 PetscScalar beta; 604 char matdescra[6]; 605 606 PetscFunctionBegin; 607 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 608 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 609 610 PetscCall(VecGetArrayRead(xx, &x)); 611 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 612 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 613 aa = a->a; /* Nonzero elements stored row-by-row. */ 614 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 615 616 /* Call MKL sparse BLAS routine to do the MatMult. */ 617 if (zz == yy) { 618 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 619 beta = 1.0; 620 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 621 } else { 622 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 623 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 624 beta = 0.0; 625 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 626 for (i = 0; i < n; i++) z[i] += y[i]; 627 } 628 629 PetscCall(PetscLogFlops(2.0 * a->nz)); 630 PetscCall(VecRestoreArrayRead(xx, &x)); 631 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 632 PetscFunctionReturn(PETSC_SUCCESS); 633 } 634 #endif 635 636 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 637 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) 638 { 639 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 640 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 641 const PetscScalar *x; 642 PetscScalar *y, *z; 643 PetscInt n = A->cmap->n; 644 PetscInt i; 645 PetscObjectState state; 646 647 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 648 649 PetscFunctionBegin; 650 651 /* If there are no nonzero entries, set zz = yy and return immediately. */ 652 if (!a->nz) { 653 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 654 PetscCall(PetscArraycpy(z, y, n)); 655 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 656 PetscFunctionReturn(PETSC_SUCCESS); 657 } 658 659 PetscCall(VecGetArrayRead(xx, &x)); 660 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 661 662 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 663 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 664 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 665 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 666 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 667 668 /* Call MKL sparse BLAS routine to do the MatMult. */ 669 if (zz == yy) { 670 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 671 * with alpha and beta both set to 1.0. */ 672 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z); 673 } else { 674 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 675 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 676 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z); 677 for (i = 0; i < n; i++) z[i] += y[i]; 678 } 679 680 PetscCall(PetscLogFlops(2.0 * a->nz)); 681 PetscCall(VecRestoreArrayRead(xx, &x)); 682 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 683 PetscFunctionReturn(PETSC_SUCCESS); 684 } 685 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 686 687 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 688 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) 689 { 690 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr; 691 sparse_matrix_t csrA, csrB, csrC; 692 PetscInt nrows, ncols; 693 struct matrix_descr descr_type_gen; 694 PetscObjectState state; 695 696 PetscFunctionBegin; 697 /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does 698 * not handle sparse matrices with zero rows or columns. */ 699 if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N; 700 else nrows = A->cmap->N; 701 if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N; 702 else ncols = B->rmap->N; 703 704 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 705 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 706 PetscCall(PetscObjectStateGet((PetscObject)B, &state)); 707 if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 708 csrA = a->csrA; 709 csrB = b->csrA; 710 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 711 712 if (csrA && csrB) { 713 PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC); 714 } else { 715 csrC = NULL; 716 } 717 718 PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C)); 719 720 PetscFunctionReturn(PETSC_SUCCESS); 721 } 722 723 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) 724 { 725 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr; 726 sparse_matrix_t csrA, csrB, csrC; 727 struct matrix_descr descr_type_gen; 728 PetscObjectState state; 729 730 PetscFunctionBegin; 731 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 732 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 733 PetscCall(PetscObjectStateGet((PetscObject)B, &state)); 734 if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 735 csrA = a->csrA; 736 csrB = b->csrA; 737 csrC = c->csrA; 738 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 739 740 if (csrA && csrB) { 741 PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC); 742 } else { 743 csrC = NULL; 744 } 745 746 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 747 PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 748 749 PetscFunctionReturn(PETSC_SUCCESS); 750 } 751 752 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) 753 { 754 PetscFunctionBegin; 755 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 756 PetscFunctionReturn(PETSC_SUCCESS); 757 } 758 759 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) 760 { 761 PetscFunctionBegin; 762 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 763 PetscFunctionReturn(PETSC_SUCCESS); 764 } 765 766 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) 767 { 768 PetscFunctionBegin; 769 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 770 PetscFunctionReturn(PETSC_SUCCESS); 771 } 772 773 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) 774 { 775 PetscFunctionBegin; 776 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 777 PetscFunctionReturn(PETSC_SUCCESS); 778 } 779 780 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) 781 { 782 PetscFunctionBegin; 783 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C)); 784 PetscFunctionReturn(PETSC_SUCCESS); 785 } 786 787 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) 788 { 789 PetscFunctionBegin; 790 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C)); 791 PetscFunctionReturn(PETSC_SUCCESS); 792 } 793 794 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 795 { 796 Mat_Product *product = C->product; 797 Mat A = product->A, B = product->B; 798 799 PetscFunctionBegin; 800 PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C)); 801 PetscFunctionReturn(PETSC_SUCCESS); 802 } 803 804 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 805 { 806 Mat_Product *product = C->product; 807 Mat A = product->A, B = product->B; 808 PetscReal fill = product->fill; 809 810 PetscFunctionBegin; 811 PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C)); 812 C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL; 813 PetscFunctionReturn(PETSC_SUCCESS); 814 } 815 816 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C) 817 { 818 Mat Ct; 819 Vec zeros; 820 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr; 821 sparse_matrix_t csrA, csrP, csrC; 822 PetscBool set, flag; 823 struct matrix_descr descr_type_sym; 824 PetscObjectState state; 825 826 PetscFunctionBegin; 827 PetscCall(MatIsSymmetricKnown(A, &set, &flag)); 828 PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric"); 829 830 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 831 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 832 PetscCall(PetscObjectStateGet((PetscObject)P, &state)); 833 if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 834 csrA = a->csrA; 835 csrP = p->csrA; 836 csrC = c->csrA; 837 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 838 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 839 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 840 841 /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 842 PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT); 843 844 /* Update the PETSc AIJ representation for matrix C from contents of MKL handle. 845 * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix, 846 * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix. 847 * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY 848 * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might 849 * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output 850 * the full matrix. */ 851 PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 852 PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct)); 853 PetscCall(MatCreateVecs(C, &zeros, NULL)); 854 PetscCall(VecSetFromOptions(zeros)); 855 PetscCall(VecZeroEntries(zeros)); 856 PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES)); 857 PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN)); 858 /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */ 859 PetscCall(MatProductCreateWithMat(A, P, NULL, C)); 860 PetscCall(MatProductSetType(C, MATPRODUCT_PtAP)); 861 PetscCall(MatSeqAIJMKL_create_mkl_handle(C)); 862 PetscCall(VecDestroy(&zeros)); 863 PetscCall(MatDestroy(&Ct)); 864 PetscFunctionReturn(PETSC_SUCCESS); 865 } 866 867 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) 868 { 869 Mat_Product *product = C->product; 870 Mat A = product->A, P = product->B; 871 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr; 872 sparse_matrix_t csrA, csrP, csrC; 873 struct matrix_descr descr_type_sym; 874 PetscObjectState state; 875 876 PetscFunctionBegin; 877 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 878 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 879 PetscCall(PetscObjectStateGet((PetscObject)P, &state)); 880 if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 881 csrA = a->csrA; 882 csrP = p->csrA; 883 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 884 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 885 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 886 887 /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 888 if (csrP && csrA) { 889 PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL); 890 } else { 891 csrC = NULL; 892 } 893 894 /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle. 895 * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain 896 * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that 897 * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this 898 * is guaranteed. */ 899 PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C)); 900 901 C->ops->productnumeric = MatProductNumeric_PtAP; 902 PetscFunctionReturn(PETSC_SUCCESS); 903 } 904 905 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) 906 { 907 PetscFunctionBegin; 908 C->ops->productsymbolic = MatProductSymbolic_AB; 909 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 910 PetscFunctionReturn(PETSC_SUCCESS); 911 } 912 913 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) 914 { 915 PetscFunctionBegin; 916 C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL; 917 PetscFunctionReturn(PETSC_SUCCESS); 918 } 919 920 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) 921 { 922 PetscFunctionBegin; 923 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 924 C->ops->productsymbolic = MatProductSymbolic_ABt; 925 PetscFunctionReturn(PETSC_SUCCESS); 926 } 927 928 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) 929 { 930 Mat_Product *product = C->product; 931 Mat A = product->A; 932 PetscBool set, flag; 933 934 PetscFunctionBegin; 935 if (PetscDefined(USE_COMPLEX)) { 936 /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used. 937 * We do this in several other locations in this file. This works for the time being, but these 938 * routines are considered unsafe and may be removed from the MatProduct code in the future. 939 * TODO: Add proper MATSEQAIJMKL implementations */ 940 C->ops->productsymbolic = NULL; 941 } else { 942 /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */ 943 PetscCall(MatIsSymmetricKnown(A, &set, &flag)); 944 if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 945 else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 946 /* we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(), 947 * depending on whether the algorithm for the general case vs. the real symmetric one is used. */ 948 } 949 PetscFunctionReturn(PETSC_SUCCESS); 950 } 951 952 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) 953 { 954 PetscFunctionBegin; 955 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 956 PetscFunctionReturn(PETSC_SUCCESS); 957 } 958 959 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) 960 { 961 PetscFunctionBegin; 962 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 963 PetscFunctionReturn(PETSC_SUCCESS); 964 } 965 966 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) 967 { 968 Mat_Product *product = C->product; 969 970 PetscFunctionBegin; 971 switch (product->type) { 972 case MATPRODUCT_AB: 973 PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C)); 974 break; 975 case MATPRODUCT_AtB: 976 PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C)); 977 break; 978 case MATPRODUCT_ABt: 979 PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C)); 980 break; 981 case MATPRODUCT_PtAP: 982 PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C)); 983 break; 984 case MATPRODUCT_RARt: 985 PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C)); 986 break; 987 case MATPRODUCT_ABC: 988 PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C)); 989 break; 990 default: 991 break; 992 } 993 PetscFunctionReturn(PETSC_SUCCESS); 994 } 995 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 996 997 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 998 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 999 * routine, but can also be used to convert an assembled SeqAIJ matrix 1000 * into a SeqAIJMKL one. */ 1001 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat) 1002 { 1003 Mat B = *newmat; 1004 Mat_SeqAIJMKL *aijmkl; 1005 PetscBool set; 1006 PetscBool sametype; 1007 1008 PetscFunctionBegin; 1009 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 1010 1011 PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype)); 1012 if (sametype) PetscFunctionReturn(PETSC_SUCCESS); 1013 1014 PetscCall(PetscNew(&aijmkl)); 1015 B->spptr = (void *)aijmkl; 1016 1017 /* Set function pointers for methods that we inherit from AIJ but override. 1018 * We also parse some command line options below, since those determine some of the methods we point to. */ 1019 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 1020 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 1021 B->ops->destroy = MatDestroy_SeqAIJMKL; 1022 1023 aijmkl->sparse_optimized = PETSC_FALSE; 1024 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1025 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 1026 #else 1027 aijmkl->no_SpMV2 = PETSC_TRUE; 1028 #endif 1029 aijmkl->eager_inspection = PETSC_FALSE; 1030 1031 /* Parse command line options. */ 1032 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat"); 1033 PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set)); 1034 PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set)); 1035 PetscOptionsEnd(); 1036 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1037 if (!aijmkl->no_SpMV2) { 1038 PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n")); 1039 aijmkl->no_SpMV2 = PETSC_TRUE; 1040 } 1041 #endif 1042 1043 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1044 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 1045 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 1046 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1047 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 1048 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1049 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL; 1050 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1051 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1052 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL; 1053 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1054 #if !defined(PETSC_USE_COMPLEX) 1055 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1056 #else 1057 B->ops->ptapnumeric = NULL; 1058 #endif 1059 #endif 1060 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1061 1062 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1063 /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1064 * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1065 * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1066 * versions in which the older interface has not been deprecated, we use the old interface. */ 1067 if (aijmkl->no_SpMV2) { 1068 B->ops->mult = MatMult_SeqAIJMKL; 1069 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 1070 B->ops->multadd = MatMultAdd_SeqAIJMKL; 1071 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1072 } 1073 #endif 1074 1075 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ)); 1076 1077 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL)); 1078 *newmat = B; 1079 PetscFunctionReturn(PETSC_SUCCESS); 1080 } 1081 1082 /*@C 1083 MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`. 1084 1085 Collective 1086 1087 Input Parameters: 1088 + comm - MPI communicator, set to `PETSC_COMM_SELF` 1089 . m - number of rows 1090 . n - number of columns 1091 . nz - number of nonzeros per row (same for all rows) 1092 - nnz - array containing the number of nonzeros in the various rows 1093 (possibly different for each row) or `NULL` 1094 1095 Output Parameter: 1096 . A - the matrix 1097 1098 Options Database Keys: 1099 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1100 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, 1101 performing this step the first time the matrix is applied 1102 1103 Level: intermediate 1104 1105 Notes: 1106 If `nnz` is given then `nz` is ignored 1107 1108 This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS 1109 routines from Intel MKL whenever possible. 1110 1111 If the installed version of MKL supports the "SpMV2" sparse 1112 inspector-executor routines, then those are used by default. 1113 1114 `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()` 1115 (for symmetric A) operations are currently supported. 1116 MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`. 1117 1118 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()` 1119 @*/ 1120 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 1121 { 1122 PetscFunctionBegin; 1123 PetscCall(MatCreate(comm, A)); 1124 PetscCall(MatSetSizes(*A, m, n, m, n)); 1125 PetscCall(MatSetType(*A, MATSEQAIJMKL)); 1126 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz)); 1127 PetscFunctionReturn(PETSC_SUCCESS); 1128 } 1129 1130 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1131 { 1132 PetscFunctionBegin; 1133 PetscCall(MatSetType(A, MATSEQAIJ)); 1134 PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A)); 1135 PetscFunctionReturn(PETSC_SUCCESS); 1136 } 1137