1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 #if defined(PETSC_HAVE_MKL_INTEL_ILP64) 12 #define MKL_ILP64 13 #endif 14 #include <mkl_spblas.h> 15 16 typedef struct { 17 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 18 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 19 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 20 PetscObjectState state; 21 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 22 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 23 struct matrix_descr descr; 24 #endif 25 } Mat_SeqAIJMKL; 26 27 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType); 28 29 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat) 30 { 31 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 32 /* so we will ignore 'MatType type'. */ 33 Mat B = *newmat; 34 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 35 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 36 #endif 37 38 PetscFunctionBegin; 39 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 40 41 /* Reset the original function pointers. */ 42 B->ops->duplicate = MatDuplicate_SeqAIJ; 43 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 44 B->ops->destroy = MatDestroy_SeqAIJ; 45 B->ops->mult = MatMult_SeqAIJ; 46 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 47 B->ops->multadd = MatMultAdd_SeqAIJ; 48 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 49 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ; 50 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 51 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 52 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 53 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; 54 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 55 56 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL)); 57 58 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 59 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 60 * simply involves destroying the MKL sparse matrix handle and then freeing 61 * the spptr pointer. */ 62 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr; 63 64 if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 65 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 66 PetscCall(PetscFree(B->spptr)); 67 68 /* Change the type of B to MATSEQAIJ. */ 69 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ)); 70 71 *newmat = B; 72 PetscFunctionReturn(PETSC_SUCCESS); 73 } 74 75 static PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 76 { 77 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 78 79 PetscFunctionBegin; 80 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */ 81 if (aijmkl) { 82 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 83 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 84 if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 85 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 86 PetscCall(PetscFree(A->spptr)); 87 } 88 89 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 90 * to destroy everything that remains. */ 91 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ)); 92 /* I don't call MatSetType(). I believe this is because that 93 * is only to be called when *building* a matrix. I could be wrong, but 94 * that is how things work for the SuperLU matrix class. */ 95 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL)); 96 PetscCall(MatDestroy_SeqAIJ(A)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 101 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 102 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 103 * handle, creates a new one, and then calls mkl_sparse_optimize(). 104 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 105 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 106 * an unoptimized matrix handle here. */ 107 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 108 { 109 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 110 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 111 * does nothing. We make it callable anyway in this case because it cuts 112 * down on littering the code with #ifdefs. */ 113 PetscFunctionBegin; 114 PetscFunctionReturn(PETSC_SUCCESS); 115 #else 116 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 117 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 118 PetscInt m, n; 119 MatScalar *aa; 120 PetscInt *aj, *ai; 121 122 PetscFunctionBegin; 123 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 124 /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 125 * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 126 * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 127 * mkl_sparse_optimize() later. */ 128 if (aijmkl->no_SpMV2) PetscFunctionReturn(PETSC_SUCCESS); 129 #endif 130 131 if (aijmkl->sparse_optimized) { 132 /* Matrix has been previously assembled and optimized. Must destroy old 133 * matrix handle before running the optimization step again. */ 134 PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 135 } 136 aijmkl->sparse_optimized = PETSC_FALSE; 137 138 /* Now perform the SpMV2 setup and matrix optimization. */ 139 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 140 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 141 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 142 m = A->rmap->n; 143 n = A->cmap->n; 144 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 145 aa = a->a; /* Nonzero elements stored row-by-row. */ 146 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 147 if (a->nz && aa && !A->structure_only) { 148 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 149 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 150 PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, (MKL_INT)m, (MKL_INT)n, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa); 151 PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000); 152 PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE); 153 if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA); 154 aijmkl->sparse_optimized = PETSC_TRUE; 155 PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state)); 156 } else { 157 aijmkl->csrA = NULL; 158 } 159 PetscFunctionReturn(PETSC_SUCCESS); 160 #endif 161 } 162 163 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 164 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */ 165 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A) 166 { 167 sparse_index_base_t indexing; 168 PetscInt m, n; 169 PetscInt *aj, *ai, *dummy; 170 MatScalar *aa; 171 Mat_SeqAIJMKL *aijmkl; 172 173 PetscFunctionBegin; 174 if (csrA) { 175 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 176 PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, (MKL_INT *)&m, (MKL_INT *)&n, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa); 177 PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()"); 178 } else { 179 aj = ai = NULL; 180 aa = NULL; 181 } 182 183 PetscCall(MatSetType(A, MATSEQAIJ)); 184 PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols)); 185 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 186 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 187 * they will be destroyed when the MKL handle is destroyed. 188 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 189 if (csrA) { 190 PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL)); 191 } else { 192 /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */ 193 PetscCall(MatSetUp(A)); 194 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 195 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 196 } 197 198 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 199 * Now turn it into a MATSEQAIJMKL. */ 200 PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A)); 201 202 aijmkl = (Mat_SeqAIJMKL *)A->spptr; 203 aijmkl->csrA = csrA; 204 205 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 206 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 207 * and just need to be able to run the MKL optimization step. */ 208 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 209 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 210 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 211 if (csrA) { 212 PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000); 213 PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE); 214 } 215 PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state)); 216 PetscFunctionReturn(PETSC_SUCCESS); 217 } 218 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 219 220 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 221 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 222 * MatMatMultNumeric(). */ 223 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 224 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 225 { 226 PetscInt i; 227 PetscInt nrows, ncols; 228 PetscInt nz; 229 PetscInt *ai, *aj, *dummy; 230 PetscScalar *aa; 231 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 232 sparse_index_base_t indexing; 233 234 PetscFunctionBegin; 235 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 236 if (!aijmkl->csrA) PetscFunctionReturn(PETSC_SUCCESS); 237 238 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 239 PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa); 240 241 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 242 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 243 for (i = 0; i < nrows; i++) { 244 nz = ai[i + 1] - ai[i]; 245 PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES)); 246 } 247 248 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 249 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 250 251 PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state)); 252 /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation. 253 * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed 254 * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */ 255 aijmkl->sparse_optimized = PETSC_FALSE; 256 PetscFunctionReturn(PETSC_SUCCESS); 257 } 258 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 259 260 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 261 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer) 262 { 263 PetscInt i, j, k; 264 PetscInt nrows, ncols; 265 PetscInt nz; 266 PetscInt *ai, *aj, *dummy; 267 PetscScalar *aa; 268 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 269 sparse_index_base_t indexing; 270 271 PetscFunctionBegin; 272 PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n")); 273 274 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 275 if (!aijmkl->csrA) { 276 PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n")); 277 PetscFunctionReturn(PETSC_SUCCESS); 278 } 279 280 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 281 PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa); 282 283 k = 0; 284 for (i = 0; i < nrows; i++) { 285 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i)); 286 nz = ai[i + 1] - ai[i]; 287 for (j = 0; j < nz; j++) { 288 if (aa) { 289 PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g) ", aj[k], PetscRealPart(aa[k]))); 290 } else { 291 PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k])); 292 } 293 k++; 294 } 295 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 296 } 297 PetscFunctionReturn(PETSC_SUCCESS); 298 } 299 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 300 301 static PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 302 { 303 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 304 Mat_SeqAIJMKL *aijmkl_dest; 305 306 PetscFunctionBegin; 307 PetscCall(MatDuplicate_SeqAIJ(A, op, M)); 308 aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr; 309 PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1)); 310 aijmkl_dest->sparse_optimized = PETSC_FALSE; 311 if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 312 PetscFunctionReturn(PETSC_SUCCESS); 313 } 314 315 static PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 316 { 317 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 318 Mat_SeqAIJMKL *aijmkl; 319 320 PetscFunctionBegin; 321 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 322 323 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 324 * extra information and some different methods, call the AssemblyEnd 325 * routine for a MATSEQAIJ. 326 * I'm not sure if this is the best way to do this, but it avoids 327 * a lot of code duplication. */ 328 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 329 PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 330 331 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 332 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 333 aijmkl = (Mat_SeqAIJMKL *)A->spptr; 334 if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 335 PetscFunctionReturn(PETSC_SUCCESS); 336 } 337 338 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 339 static PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy) 340 { 341 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 342 const PetscScalar *x; 343 PetscScalar *y; 344 const MatScalar *aa; 345 PetscInt m = A->rmap->n; 346 PetscInt n = A->cmap->n; 347 PetscScalar alpha = 1.0; 348 PetscScalar beta = 0.0; 349 const PetscInt *aj, *ai; 350 char matdescra[6]; 351 352 /* Variables not in MatMult_SeqAIJ. */ 353 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 354 355 PetscFunctionBegin; 356 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 357 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 358 PetscCall(VecGetArrayRead(xx, &x)); 359 PetscCall(VecGetArray(yy, &y)); 360 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 361 aa = a->a; /* Nonzero elements stored row-by-row. */ 362 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 363 364 /* Call MKL sparse BLAS routine to do the MatMult. */ 365 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y); 366 367 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 368 PetscCall(VecRestoreArrayRead(xx, &x)); 369 PetscCall(VecRestoreArray(yy, &y)); 370 PetscFunctionReturn(PETSC_SUCCESS); 371 } 372 #endif 373 374 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 375 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) 376 { 377 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 378 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 379 const PetscScalar *x; 380 PetscScalar *y; 381 PetscObjectState state; 382 383 PetscFunctionBegin; 384 /* If there are no nonzero entries, zero yy and return immediately. */ 385 if (!a->nz) { 386 PetscCall(VecGetArray(yy, &y)); 387 PetscCall(PetscArrayzero(y, A->rmap->n)); 388 PetscCall(VecRestoreArray(yy, &y)); 389 PetscFunctionReturn(PETSC_SUCCESS); 390 } 391 392 PetscCall(VecGetArrayRead(xx, &x)); 393 PetscCall(VecGetArray(yy, &y)); 394 395 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 396 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 397 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 398 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 399 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 400 401 /* Call MKL SpMV2 executor routine to do the MatMult. */ 402 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y); 403 404 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 405 PetscCall(VecRestoreArrayRead(xx, &x)); 406 PetscCall(VecRestoreArray(yy, &y)); 407 PetscFunctionReturn(PETSC_SUCCESS); 408 } 409 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 410 411 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 412 static PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy) 413 { 414 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 415 const PetscScalar *x; 416 PetscScalar *y; 417 const MatScalar *aa; 418 PetscInt m = A->rmap->n; 419 PetscInt n = A->cmap->n; 420 PetscScalar alpha = 1.0; 421 PetscScalar beta = 0.0; 422 const PetscInt *aj, *ai; 423 char matdescra[6]; 424 425 /* Variables not in MatMultTranspose_SeqAIJ. */ 426 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 427 428 PetscFunctionBegin; 429 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 430 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 431 PetscCall(VecGetArrayRead(xx, &x)); 432 PetscCall(VecGetArray(yy, &y)); 433 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 434 aa = a->a; /* Nonzero elements stored row-by-row. */ 435 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 436 437 /* Call MKL sparse BLAS routine to do the MatMult. */ 438 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y); 439 440 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 441 PetscCall(VecRestoreArrayRead(xx, &x)); 442 PetscCall(VecRestoreArray(yy, &y)); 443 PetscFunctionReturn(PETSC_SUCCESS); 444 } 445 #endif 446 447 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 448 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) 449 { 450 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 451 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 452 const PetscScalar *x; 453 PetscScalar *y; 454 PetscObjectState state; 455 456 PetscFunctionBegin; 457 /* If there are no nonzero entries, zero yy and return immediately. */ 458 if (!a->nz) { 459 PetscCall(VecGetArray(yy, &y)); 460 PetscCall(PetscArrayzero(y, A->cmap->n)); 461 PetscCall(VecRestoreArray(yy, &y)); 462 PetscFunctionReturn(PETSC_SUCCESS); 463 } 464 465 PetscCall(VecGetArrayRead(xx, &x)); 466 PetscCall(VecGetArray(yy, &y)); 467 468 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 469 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 470 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 471 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 472 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 473 474 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 475 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y); 476 477 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 478 PetscCall(VecRestoreArrayRead(xx, &x)); 479 PetscCall(VecRestoreArray(yy, &y)); 480 PetscFunctionReturn(PETSC_SUCCESS); 481 } 482 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 483 484 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 485 static PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) 486 { 487 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 488 const PetscScalar *x; 489 PetscScalar *y, *z; 490 const MatScalar *aa; 491 PetscInt m = A->rmap->n; 492 PetscInt n = A->cmap->n; 493 const PetscInt *aj, *ai; 494 PetscInt i; 495 496 /* Variables not in MatMultAdd_SeqAIJ. */ 497 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 498 PetscScalar alpha = 1.0; 499 PetscScalar beta; 500 char matdescra[6]; 501 502 PetscFunctionBegin; 503 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 504 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 505 506 PetscCall(VecGetArrayRead(xx, &x)); 507 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 508 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 509 aa = a->a; /* Nonzero elements stored row-by-row. */ 510 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 511 512 /* Call MKL sparse BLAS routine to do the MatMult. */ 513 if (zz == yy) { 514 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 515 beta = 1.0; 516 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 517 } else { 518 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 519 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 520 beta = 0.0; 521 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 522 for (i = 0; i < m; i++) z[i] += y[i]; 523 } 524 525 PetscCall(PetscLogFlops(2.0 * a->nz)); 526 PetscCall(VecRestoreArrayRead(xx, &x)); 527 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 528 PetscFunctionReturn(PETSC_SUCCESS); 529 } 530 #endif 531 532 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 533 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) 534 { 535 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 536 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 537 const PetscScalar *x; 538 PetscScalar *y, *z; 539 PetscInt m = A->rmap->n; 540 PetscInt i; 541 542 /* Variables not in MatMultAdd_SeqAIJ. */ 543 PetscObjectState state; 544 545 PetscFunctionBegin; 546 /* If there are no nonzero entries, set zz = yy and return immediately. */ 547 if (!a->nz) { 548 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 549 PetscCall(PetscArraycpy(z, y, m)); 550 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 551 PetscFunctionReturn(PETSC_SUCCESS); 552 } 553 554 PetscCall(VecGetArrayRead(xx, &x)); 555 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 556 557 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 558 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 559 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 560 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 561 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 562 563 /* Call MKL sparse BLAS routine to do the MatMult. */ 564 if (zz == yy) { 565 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 566 * with alpha and beta both set to 1.0. */ 567 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z); 568 } else { 569 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 570 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 571 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z); 572 for (i = 0; i < m; i++) z[i] += y[i]; 573 } 574 575 PetscCall(PetscLogFlops(2.0 * a->nz)); 576 PetscCall(VecRestoreArrayRead(xx, &x)); 577 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 578 PetscFunctionReturn(PETSC_SUCCESS); 579 } 580 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 581 582 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 583 static PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) 584 { 585 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 586 const PetscScalar *x; 587 PetscScalar *y, *z; 588 const MatScalar *aa; 589 PetscInt m = A->rmap->n; 590 PetscInt n = A->cmap->n; 591 const PetscInt *aj, *ai; 592 PetscInt i; 593 594 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 595 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 596 PetscScalar alpha = 1.0; 597 PetscScalar beta; 598 char matdescra[6]; 599 600 PetscFunctionBegin; 601 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 602 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 603 604 PetscCall(VecGetArrayRead(xx, &x)); 605 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 606 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 607 aa = a->a; /* Nonzero elements stored row-by-row. */ 608 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 609 610 /* Call MKL sparse BLAS routine to do the MatMult. */ 611 if (zz == yy) { 612 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 613 beta = 1.0; 614 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 615 } else { 616 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 617 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 618 beta = 0.0; 619 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 620 for (i = 0; i < n; i++) z[i] += y[i]; 621 } 622 623 PetscCall(PetscLogFlops(2.0 * a->nz)); 624 PetscCall(VecRestoreArrayRead(xx, &x)); 625 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 626 PetscFunctionReturn(PETSC_SUCCESS); 627 } 628 #endif 629 630 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 631 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) 632 { 633 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 634 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 635 const PetscScalar *x; 636 PetscScalar *y, *z; 637 PetscInt n = A->cmap->n; 638 PetscInt i; 639 PetscObjectState state; 640 641 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 642 643 PetscFunctionBegin; 644 /* If there are no nonzero entries, set zz = yy and return immediately. */ 645 if (!a->nz) { 646 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 647 PetscCall(PetscArraycpy(z, y, n)); 648 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 649 PetscFunctionReturn(PETSC_SUCCESS); 650 } 651 652 PetscCall(VecGetArrayRead(xx, &x)); 653 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 654 655 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 656 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 657 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 658 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 659 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 660 661 /* Call MKL sparse BLAS routine to do the MatMult. */ 662 if (zz == yy) { 663 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 664 * with alpha and beta both set to 1.0. */ 665 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z); 666 } else { 667 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 668 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 669 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z); 670 for (i = 0; i < n; i++) z[i] += y[i]; 671 } 672 673 PetscCall(PetscLogFlops(2.0 * a->nz)); 674 PetscCall(VecRestoreArrayRead(xx, &x)); 675 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 676 PetscFunctionReturn(PETSC_SUCCESS); 677 } 678 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 679 680 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 681 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) 682 { 683 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr; 684 sparse_matrix_t csrA, csrB, csrC; 685 PetscInt nrows, ncols; 686 struct matrix_descr descr_type_gen; 687 PetscObjectState state; 688 689 PetscFunctionBegin; 690 /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does 691 * not handle sparse matrices with zero rows or columns. */ 692 if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N; 693 else nrows = A->cmap->N; 694 if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N; 695 else ncols = B->rmap->N; 696 697 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 698 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 699 PetscCall(PetscObjectStateGet((PetscObject)B, &state)); 700 if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 701 csrA = a->csrA; 702 csrB = b->csrA; 703 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 704 705 if (csrA && csrB) { 706 PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC); 707 } else { 708 csrC = NULL; 709 } 710 711 PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C)); 712 PetscFunctionReturn(PETSC_SUCCESS); 713 } 714 715 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) 716 { 717 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr; 718 sparse_matrix_t csrA, csrB, csrC; 719 struct matrix_descr descr_type_gen; 720 PetscObjectState state; 721 722 PetscFunctionBegin; 723 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 724 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 725 PetscCall(PetscObjectStateGet((PetscObject)B, &state)); 726 if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 727 csrA = a->csrA; 728 csrB = b->csrA; 729 csrC = c->csrA; 730 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 731 732 if (csrA && csrB) { 733 PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC); 734 } else { 735 csrC = NULL; 736 } 737 738 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 739 PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 740 PetscFunctionReturn(PETSC_SUCCESS); 741 } 742 743 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) 744 { 745 PetscFunctionBegin; 746 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 747 PetscFunctionReturn(PETSC_SUCCESS); 748 } 749 750 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) 751 { 752 PetscFunctionBegin; 753 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 754 PetscFunctionReturn(PETSC_SUCCESS); 755 } 756 757 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) 758 { 759 PetscFunctionBegin; 760 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 761 PetscFunctionReturn(PETSC_SUCCESS); 762 } 763 764 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) 765 { 766 PetscFunctionBegin; 767 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 768 PetscFunctionReturn(PETSC_SUCCESS); 769 } 770 771 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) 772 { 773 PetscFunctionBegin; 774 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C)); 775 PetscFunctionReturn(PETSC_SUCCESS); 776 } 777 778 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) 779 { 780 PetscFunctionBegin; 781 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C)); 782 PetscFunctionReturn(PETSC_SUCCESS); 783 } 784 785 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 786 { 787 Mat_Product *product = C->product; 788 Mat A = product->A, B = product->B; 789 790 PetscFunctionBegin; 791 PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C)); 792 PetscFunctionReturn(PETSC_SUCCESS); 793 } 794 795 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 796 { 797 Mat_Product *product = C->product; 798 Mat A = product->A, B = product->B; 799 PetscReal fill = product->fill; 800 801 PetscFunctionBegin; 802 PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C)); 803 C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL; 804 PetscFunctionReturn(PETSC_SUCCESS); 805 } 806 807 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C) 808 { 809 Mat Ct; 810 Vec zeros; 811 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr; 812 sparse_matrix_t csrA, csrP, csrC; 813 PetscBool set, flag; 814 struct matrix_descr descr_type_sym; 815 PetscObjectState state; 816 817 PetscFunctionBegin; 818 PetscCall(MatIsSymmetricKnown(A, &set, &flag)); 819 PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric"); 820 821 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 822 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 823 PetscCall(PetscObjectStateGet((PetscObject)P, &state)); 824 if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 825 csrA = a->csrA; 826 csrP = p->csrA; 827 csrC = c->csrA; 828 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 829 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 830 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 831 832 /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 833 PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT); 834 835 /* Update the PETSc AIJ representation for matrix C from contents of MKL handle. 836 * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix, 837 * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix. 838 * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY 839 * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might 840 * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output 841 * the full matrix. */ 842 PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 843 PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct)); 844 PetscCall(MatCreateVecs(C, &zeros, NULL)); 845 PetscCall(VecSetFromOptions(zeros)); 846 PetscCall(VecZeroEntries(zeros)); 847 PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES)); 848 PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN)); 849 /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */ 850 PetscCall(MatProductCreateWithMat(A, P, NULL, C)); 851 PetscCall(MatProductSetType(C, MATPRODUCT_PtAP)); 852 PetscCall(MatSeqAIJMKL_create_mkl_handle(C)); 853 PetscCall(VecDestroy(&zeros)); 854 PetscCall(MatDestroy(&Ct)); 855 PetscFunctionReturn(PETSC_SUCCESS); 856 } 857 858 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) 859 { 860 Mat_Product *product = C->product; 861 Mat A = product->A, P = product->B; 862 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr; 863 sparse_matrix_t csrA, csrP, csrC; 864 struct matrix_descr descr_type_sym; 865 PetscObjectState state; 866 867 PetscFunctionBegin; 868 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 869 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 870 PetscCall(PetscObjectStateGet((PetscObject)P, &state)); 871 if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 872 csrA = a->csrA; 873 csrP = p->csrA; 874 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 875 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 876 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 877 878 /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 879 if (csrP && csrA) { 880 PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL); 881 } else { 882 csrC = NULL; 883 } 884 885 /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle. 886 * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain 887 * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that 888 * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this 889 * is guaranteed. */ 890 PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C)); 891 892 C->ops->productnumeric = MatProductNumeric_PtAP; 893 PetscFunctionReturn(PETSC_SUCCESS); 894 } 895 896 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) 897 { 898 PetscFunctionBegin; 899 C->ops->productsymbolic = MatProductSymbolic_AB; 900 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 901 PetscFunctionReturn(PETSC_SUCCESS); 902 } 903 904 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) 905 { 906 PetscFunctionBegin; 907 C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL; 908 PetscFunctionReturn(PETSC_SUCCESS); 909 } 910 911 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) 912 { 913 PetscFunctionBegin; 914 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 915 C->ops->productsymbolic = MatProductSymbolic_ABt; 916 PetscFunctionReturn(PETSC_SUCCESS); 917 } 918 919 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) 920 { 921 Mat_Product *product = C->product; 922 Mat A = product->A; 923 PetscBool set, flag; 924 925 PetscFunctionBegin; 926 if (PetscDefined(USE_COMPLEX)) { 927 /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used. 928 * We do this in several other locations in this file. This works for the time being, but these 929 * routines are considered unsafe and may be removed from the MatProduct code in the future. 930 * TODO: Add proper MATSEQAIJMKL implementations */ 931 C->ops->productsymbolic = NULL; 932 } else { 933 /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */ 934 PetscCall(MatIsSymmetricKnown(A, &set, &flag)); 935 if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 936 else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 937 /* we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(), 938 * depending on whether the algorithm for the general case vs. the real symmetric one is used. */ 939 } 940 PetscFunctionReturn(PETSC_SUCCESS); 941 } 942 943 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) 944 { 945 PetscFunctionBegin; 946 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 947 PetscFunctionReturn(PETSC_SUCCESS); 948 } 949 950 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) 951 { 952 PetscFunctionBegin; 953 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 954 PetscFunctionReturn(PETSC_SUCCESS); 955 } 956 957 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) 958 { 959 Mat_Product *product = C->product; 960 961 PetscFunctionBegin; 962 switch (product->type) { 963 case MATPRODUCT_AB: 964 PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C)); 965 break; 966 case MATPRODUCT_AtB: 967 PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C)); 968 break; 969 case MATPRODUCT_ABt: 970 PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C)); 971 break; 972 case MATPRODUCT_PtAP: 973 PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C)); 974 break; 975 case MATPRODUCT_RARt: 976 PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C)); 977 break; 978 case MATPRODUCT_ABC: 979 PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C)); 980 break; 981 default: 982 break; 983 } 984 PetscFunctionReturn(PETSC_SUCCESS); 985 } 986 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 987 988 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 989 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 990 * routine, but can also be used to convert an assembled SeqAIJ matrix 991 * into a SeqAIJMKL one. */ 992 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat) 993 { 994 Mat B = *newmat; 995 Mat_SeqAIJMKL *aijmkl; 996 PetscBool set; 997 PetscBool sametype; 998 999 PetscFunctionBegin; 1000 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 1001 1002 PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype)); 1003 if (sametype) PetscFunctionReturn(PETSC_SUCCESS); 1004 1005 PetscCall(PetscNew(&aijmkl)); 1006 B->spptr = (void *)aijmkl; 1007 1008 /* Set function pointers for methods that we inherit from AIJ but override. 1009 * We also parse some command line options below, since those determine some of the methods we point to. */ 1010 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 1011 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 1012 B->ops->destroy = MatDestroy_SeqAIJMKL; 1013 1014 aijmkl->sparse_optimized = PETSC_FALSE; 1015 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1016 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 1017 #else 1018 aijmkl->no_SpMV2 = PETSC_TRUE; 1019 #endif 1020 aijmkl->eager_inspection = PETSC_FALSE; 1021 1022 /* Parse command line options. */ 1023 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat"); 1024 PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set)); 1025 PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set)); 1026 PetscOptionsEnd(); 1027 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1028 if (!aijmkl->no_SpMV2) { 1029 PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n")); 1030 aijmkl->no_SpMV2 = PETSC_TRUE; 1031 } 1032 #endif 1033 1034 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1035 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 1036 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 1037 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1038 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 1039 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1040 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL; 1041 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1042 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1043 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL; 1044 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1045 #if !defined(PETSC_USE_COMPLEX) 1046 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1047 #else 1048 B->ops->ptapnumeric = NULL; 1049 #endif 1050 #endif 1051 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1052 1053 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1054 /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1055 * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1056 * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1057 * versions in which the older interface has not been deprecated, we use the old interface. */ 1058 if (aijmkl->no_SpMV2) { 1059 B->ops->mult = MatMult_SeqAIJMKL; 1060 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 1061 B->ops->multadd = MatMultAdd_SeqAIJMKL; 1062 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1063 } 1064 #endif 1065 1066 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ)); 1067 1068 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL)); 1069 *newmat = B; 1070 PetscFunctionReturn(PETSC_SUCCESS); 1071 } 1072 1073 /*@C 1074 MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`. 1075 1076 Collective 1077 1078 Input Parameters: 1079 + comm - MPI communicator, set to `PETSC_COMM_SELF` 1080 . m - number of rows 1081 . n - number of columns 1082 . nz - number of nonzeros per row (same for all rows) 1083 - nnz - array containing the number of nonzeros in the various rows 1084 (possibly different for each row) or `NULL` 1085 1086 Output Parameter: 1087 . A - the matrix 1088 1089 Options Database Keys: 1090 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1091 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, 1092 performing this step the first time the matrix is applied 1093 1094 Level: intermediate 1095 1096 Notes: 1097 If `nnz` is given then `nz` is ignored 1098 1099 This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS 1100 routines from Intel MKL whenever possible. 1101 1102 If the installed version of MKL supports the "SpMV2" sparse 1103 inspector-executor routines, then those are used by default. 1104 1105 `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()` 1106 (for symmetric A) operations are currently supported. 1107 MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`. 1108 1109 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()` 1110 @*/ 1111 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 1112 { 1113 PetscFunctionBegin; 1114 PetscCall(MatCreate(comm, A)); 1115 PetscCall(MatSetSizes(*A, m, n, m, n)); 1116 PetscCall(MatSetType(*A, MATSEQAIJMKL)); 1117 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz)); 1118 PetscFunctionReturn(PETSC_SUCCESS); 1119 } 1120 1121 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1122 { 1123 PetscFunctionBegin; 1124 PetscCall(MatSetType(A, MATSEQAIJ)); 1125 PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A)); 1126 PetscFunctionReturn(PETSC_SUCCESS); 1127 } 1128