1 2 /* 3 Defines basic operations for the MATSEQAIJMKL matrix class. 4 This class is derived from the MATSEQAIJ class and retains the 5 compressed row storage (aka Yale sparse matrix format) but uses 6 sparse BLAS operations from the Intel Math Kernel Library (MKL) 7 wherever possible. 8 */ 9 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12 #if defined(PETSC_HAVE_MKL_INTEL_ILP64) 13 #define MKL_ILP64 14 #endif 15 #include <mkl_spblas.h> 16 17 typedef struct { 18 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 19 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 20 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 21 PetscObjectState state; 22 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 23 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 24 struct matrix_descr descr; 25 #endif 26 } Mat_SeqAIJMKL; 27 28 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType); 29 30 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat) 31 { 32 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 33 /* so we will ignore 'MatType type'. */ 34 Mat B = *newmat; 35 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 36 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 37 #endif 38 39 PetscFunctionBegin; 40 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 45 B->ops->destroy = MatDestroy_SeqAIJ; 46 B->ops->mult = MatMult_SeqAIJ; 47 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 48 B->ops->multadd = MatMultAdd_SeqAIJ; 49 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ; 51 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 52 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 53 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 54 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; 55 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 56 57 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL)); 58 59 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 60 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 61 * simply involves destroying the MKL sparse matrix handle and then freeing 62 * the spptr pointer. */ 63 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr; 64 65 if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 66 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 67 PetscCall(PetscFree(B->spptr)); 68 69 /* Change the type of B to MATSEQAIJ. */ 70 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ)); 71 72 *newmat = B; 73 PetscFunctionReturn(0); 74 } 75 76 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 77 { 78 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 79 80 PetscFunctionBegin; 81 82 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */ 83 if (aijmkl) { 84 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 85 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 86 if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 87 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 88 PetscCall(PetscFree(A->spptr)); 89 } 90 91 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 92 * to destroy everything that remains. */ 93 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ)); 94 /* Note that I don't call MatSetType(). I believe this is because that 95 * is only to be called when *building* a matrix. I could be wrong, but 96 * that is how things work for the SuperLU matrix class. */ 97 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL)); 98 PetscCall(MatDestroy_SeqAIJ(A)); 99 PetscFunctionReturn(0); 100 } 101 102 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 103 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 104 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 105 * handle, creates a new one, and then calls mkl_sparse_optimize(). 106 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 107 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 108 * an unoptimized matrix handle here. */ 109 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 110 { 111 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 112 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 113 * does nothing. We make it callable anyway in this case because it cuts 114 * down on littering the code with #ifdefs. */ 115 PetscFunctionBegin; 116 PetscFunctionReturn(0); 117 #else 118 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 119 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 120 PetscInt m, n; 121 MatScalar *aa; 122 PetscInt *aj, *ai; 123 124 PetscFunctionBegin; 125 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 126 /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 127 * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 128 * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 129 * mkl_sparse_optimize() later. */ 130 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 131 #endif 132 133 if (aijmkl->sparse_optimized) { 134 /* Matrix has been previously assembled and optimized. Must destroy old 135 * matrix handle before running the optimization step again. */ 136 PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 137 } 138 aijmkl->sparse_optimized = PETSC_FALSE; 139 140 /* Now perform the SpMV2 setup and matrix optimization. */ 141 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 142 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 143 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 144 m = A->rmap->n; 145 n = A->cmap->n; 146 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 147 aa = a->a; /* Nonzero elements stored row-by-row. */ 148 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 149 if (a->nz && aa && !A->structure_only) { 150 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 151 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 152 PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, (MKL_INT)m, (MKL_INT)n, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa); 153 PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000); 154 PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE); 155 if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA); 156 aijmkl->sparse_optimized = PETSC_TRUE; 157 PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state))); 158 } else { 159 aijmkl->csrA = PETSC_NULL; 160 } 161 162 PetscFunctionReturn(0); 163 #endif 164 } 165 166 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 167 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */ 168 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A) 169 { 170 sparse_index_base_t indexing; 171 PetscInt m, n; 172 PetscInt *aj, *ai, *dummy; 173 MatScalar *aa; 174 Mat_SeqAIJMKL *aijmkl; 175 176 PetscFunctionBegin; 177 if (csrA) { 178 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 179 PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, (MKL_INT *)&m, (MKL_INT *)&n, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa); 180 PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()"); 181 } else { 182 aj = ai = PETSC_NULL; 183 aa = PETSC_NULL; 184 } 185 186 PetscCall(MatSetType(A, MATSEQAIJ)); 187 PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols)); 188 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 189 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 190 * they will be destroyed when the MKL handle is destroyed. 191 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 192 if (csrA) { 193 PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL)); 194 } else { 195 /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */ 196 PetscCall(MatSetUp(A)); 197 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 198 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 199 } 200 201 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 202 * Now turn it into a MATSEQAIJMKL. */ 203 PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A)); 204 205 aijmkl = (Mat_SeqAIJMKL *)A->spptr; 206 aijmkl->csrA = csrA; 207 208 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 209 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 210 * and just need to be able to run the MKL optimization step. */ 211 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 212 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 213 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 214 if (csrA) { 215 PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000); 216 PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE); 217 } 218 PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state))); 219 PetscFunctionReturn(0); 220 } 221 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 222 223 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 224 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 225 * MatMatMultNumeric(). */ 226 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 227 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 228 { 229 PetscInt i; 230 PetscInt nrows, ncols; 231 PetscInt nz; 232 PetscInt *ai, *aj, *dummy; 233 PetscScalar *aa; 234 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 235 sparse_index_base_t indexing; 236 237 PetscFunctionBegin; 238 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 239 if (!aijmkl->csrA) PetscFunctionReturn(0); 240 241 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 242 PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa); 243 244 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 245 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 246 for (i = 0; i < nrows; i++) { 247 nz = ai[i + 1] - ai[i]; 248 PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES)); 249 } 250 251 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 252 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 253 254 PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state))); 255 /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation. 256 * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed 257 * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */ 258 aijmkl->sparse_optimized = PETSC_FALSE; 259 PetscFunctionReturn(0); 260 } 261 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 262 263 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 264 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer) 265 { 266 PetscInt i, j, k; 267 PetscInt nrows, ncols; 268 PetscInt nz; 269 PetscInt *ai, *aj, *dummy; 270 PetscScalar *aa; 271 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 272 sparse_index_base_t indexing; 273 274 PetscFunctionBegin; 275 PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n")); 276 277 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 278 if (!aijmkl->csrA) { 279 PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n")); 280 PetscFunctionReturn(0); 281 } 282 283 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 284 PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa); 285 286 k = 0; 287 for (i = 0; i < nrows; i++) { 288 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i)); 289 nz = ai[i + 1] - ai[i]; 290 for (j = 0; j < nz; j++) { 291 if (aa) { 292 PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g) ", aj[k], PetscRealPart(aa[k]))); 293 } else { 294 PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k])); 295 } 296 k++; 297 } 298 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 299 } 300 PetscFunctionReturn(0); 301 } 302 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 303 304 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 305 { 306 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 307 Mat_SeqAIJMKL *aijmkl_dest; 308 309 PetscFunctionBegin; 310 PetscCall(MatDuplicate_SeqAIJ(A, op, M)); 311 aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr; 312 PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1)); 313 aijmkl_dest->sparse_optimized = PETSC_FALSE; 314 if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 315 PetscFunctionReturn(0); 316 } 317 318 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 319 { 320 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 321 Mat_SeqAIJMKL *aijmkl; 322 323 PetscFunctionBegin; 324 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 325 326 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 327 * extra information and some different methods, call the AssemblyEnd 328 * routine for a MATSEQAIJ. 329 * I'm not sure if this is the best way to do this, but it avoids 330 * a lot of code duplication. */ 331 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 332 PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 333 334 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 335 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 336 aijmkl = (Mat_SeqAIJMKL *)A->spptr; 337 if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 338 339 PetscFunctionReturn(0); 340 } 341 342 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 343 PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy) 344 { 345 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 346 const PetscScalar *x; 347 PetscScalar *y; 348 const MatScalar *aa; 349 PetscInt m = A->rmap->n; 350 PetscInt n = A->cmap->n; 351 PetscScalar alpha = 1.0; 352 PetscScalar beta = 0.0; 353 const PetscInt *aj, *ai; 354 char matdescra[6]; 355 356 /* Variables not in MatMult_SeqAIJ. */ 357 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 358 359 PetscFunctionBegin; 360 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 361 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 362 PetscCall(VecGetArrayRead(xx, &x)); 363 PetscCall(VecGetArray(yy, &y)); 364 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 365 aa = a->a; /* Nonzero elements stored row-by-row. */ 366 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 367 368 /* Call MKL sparse BLAS routine to do the MatMult. */ 369 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y); 370 371 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 372 PetscCall(VecRestoreArrayRead(xx, &x)); 373 PetscCall(VecRestoreArray(yy, &y)); 374 PetscFunctionReturn(0); 375 } 376 #endif 377 378 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 379 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) 380 { 381 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 382 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 383 const PetscScalar *x; 384 PetscScalar *y; 385 PetscObjectState state; 386 387 PetscFunctionBegin; 388 389 /* If there are no nonzero entries, zero yy and return immediately. */ 390 if (!a->nz) { 391 PetscCall(VecGetArray(yy, &y)); 392 PetscCall(PetscArrayzero(y, A->rmap->n)); 393 PetscCall(VecRestoreArray(yy, &y)); 394 PetscFunctionReturn(0); 395 } 396 397 PetscCall(VecGetArrayRead(xx, &x)); 398 PetscCall(VecGetArray(yy, &y)); 399 400 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 401 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 402 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 403 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 404 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 405 406 /* Call MKL SpMV2 executor routine to do the MatMult. */ 407 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y); 408 409 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 410 PetscCall(VecRestoreArrayRead(xx, &x)); 411 PetscCall(VecRestoreArray(yy, &y)); 412 PetscFunctionReturn(0); 413 } 414 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 415 416 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 417 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy) 418 { 419 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 420 const PetscScalar *x; 421 PetscScalar *y; 422 const MatScalar *aa; 423 PetscInt m = A->rmap->n; 424 PetscInt n = A->cmap->n; 425 PetscScalar alpha = 1.0; 426 PetscScalar beta = 0.0; 427 const PetscInt *aj, *ai; 428 char matdescra[6]; 429 430 /* Variables not in MatMultTranspose_SeqAIJ. */ 431 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 432 433 PetscFunctionBegin; 434 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 435 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 436 PetscCall(VecGetArrayRead(xx, &x)); 437 PetscCall(VecGetArray(yy, &y)); 438 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 439 aa = a->a; /* Nonzero elements stored row-by-row. */ 440 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 441 442 /* Call MKL sparse BLAS routine to do the MatMult. */ 443 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y); 444 445 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 446 PetscCall(VecRestoreArrayRead(xx, &x)); 447 PetscCall(VecRestoreArray(yy, &y)); 448 PetscFunctionReturn(0); 449 } 450 #endif 451 452 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 453 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) 454 { 455 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 456 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 457 const PetscScalar *x; 458 PetscScalar *y; 459 PetscObjectState state; 460 461 PetscFunctionBegin; 462 463 /* If there are no nonzero entries, zero yy and return immediately. */ 464 if (!a->nz) { 465 PetscCall(VecGetArray(yy, &y)); 466 PetscCall(PetscArrayzero(y, A->cmap->n)); 467 PetscCall(VecRestoreArray(yy, &y)); 468 PetscFunctionReturn(0); 469 } 470 471 PetscCall(VecGetArrayRead(xx, &x)); 472 PetscCall(VecGetArray(yy, &y)); 473 474 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 475 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 476 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 477 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 478 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 479 480 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 481 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y); 482 483 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 484 PetscCall(VecRestoreArrayRead(xx, &x)); 485 PetscCall(VecRestoreArray(yy, &y)); 486 PetscFunctionReturn(0); 487 } 488 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 489 490 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 491 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) 492 { 493 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 494 const PetscScalar *x; 495 PetscScalar *y, *z; 496 const MatScalar *aa; 497 PetscInt m = A->rmap->n; 498 PetscInt n = A->cmap->n; 499 const PetscInt *aj, *ai; 500 PetscInt i; 501 502 /* Variables not in MatMultAdd_SeqAIJ. */ 503 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 504 PetscScalar alpha = 1.0; 505 PetscScalar beta; 506 char matdescra[6]; 507 508 PetscFunctionBegin; 509 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 510 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 511 512 PetscCall(VecGetArrayRead(xx, &x)); 513 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 514 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 515 aa = a->a; /* Nonzero elements stored row-by-row. */ 516 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 517 518 /* Call MKL sparse BLAS routine to do the MatMult. */ 519 if (zz == yy) { 520 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 521 beta = 1.0; 522 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 523 } else { 524 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 525 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 526 beta = 0.0; 527 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 528 for (i = 0; i < m; i++) z[i] += y[i]; 529 } 530 531 PetscCall(PetscLogFlops(2.0 * a->nz)); 532 PetscCall(VecRestoreArrayRead(xx, &x)); 533 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 534 PetscFunctionReturn(0); 535 } 536 #endif 537 538 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 539 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) 540 { 541 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 542 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 543 const PetscScalar *x; 544 PetscScalar *y, *z; 545 PetscInt m = A->rmap->n; 546 PetscInt i; 547 548 /* Variables not in MatMultAdd_SeqAIJ. */ 549 PetscObjectState state; 550 551 PetscFunctionBegin; 552 553 /* If there are no nonzero entries, set zz = yy and return immediately. */ 554 if (!a->nz) { 555 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 556 PetscCall(PetscArraycpy(z, y, m)); 557 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 558 PetscFunctionReturn(0); 559 } 560 561 PetscCall(VecGetArrayRead(xx, &x)); 562 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 563 564 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 565 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 566 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 567 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 568 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 569 570 /* Call MKL sparse BLAS routine to do the MatMult. */ 571 if (zz == yy) { 572 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 573 * with alpha and beta both set to 1.0. */ 574 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z); 575 } else { 576 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 577 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 578 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z); 579 for (i = 0; i < m; i++) z[i] += y[i]; 580 } 581 582 PetscCall(PetscLogFlops(2.0 * a->nz)); 583 PetscCall(VecRestoreArrayRead(xx, &x)); 584 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 585 PetscFunctionReturn(0); 586 } 587 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 588 589 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 590 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) 591 { 592 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 593 const PetscScalar *x; 594 PetscScalar *y, *z; 595 const MatScalar *aa; 596 PetscInt m = A->rmap->n; 597 PetscInt n = A->cmap->n; 598 const PetscInt *aj, *ai; 599 PetscInt i; 600 601 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 602 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 603 PetscScalar alpha = 1.0; 604 PetscScalar beta; 605 char matdescra[6]; 606 607 PetscFunctionBegin; 608 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 609 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 610 611 PetscCall(VecGetArrayRead(xx, &x)); 612 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 613 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 614 aa = a->a; /* Nonzero elements stored row-by-row. */ 615 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 616 617 /* Call MKL sparse BLAS routine to do the MatMult. */ 618 if (zz == yy) { 619 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 620 beta = 1.0; 621 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 622 } else { 623 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 624 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 625 beta = 0.0; 626 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 627 for (i = 0; i < n; i++) z[i] += y[i]; 628 } 629 630 PetscCall(PetscLogFlops(2.0 * a->nz)); 631 PetscCall(VecRestoreArrayRead(xx, &x)); 632 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 633 PetscFunctionReturn(0); 634 } 635 #endif 636 637 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 638 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) 639 { 640 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 641 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 642 const PetscScalar *x; 643 PetscScalar *y, *z; 644 PetscInt n = A->cmap->n; 645 PetscInt i; 646 PetscObjectState state; 647 648 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 649 650 PetscFunctionBegin; 651 652 /* If there are no nonzero entries, set zz = yy and return immediately. */ 653 if (!a->nz) { 654 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 655 PetscCall(PetscArraycpy(z, y, n)); 656 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 657 PetscFunctionReturn(0); 658 } 659 660 PetscCall(VecGetArrayRead(xx, &x)); 661 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 662 663 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 664 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 665 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 666 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 667 if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A); 668 669 /* Call MKL sparse BLAS routine to do the MatMult. */ 670 if (zz == yy) { 671 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 672 * with alpha and beta both set to 1.0. */ 673 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z); 674 } else { 675 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 676 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 677 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z); 678 for (i = 0; i < n; i++) z[i] += y[i]; 679 } 680 681 PetscCall(PetscLogFlops(2.0 * a->nz)); 682 PetscCall(VecRestoreArrayRead(xx, &x)); 683 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 684 PetscFunctionReturn(0); 685 } 686 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 687 688 /* -------------------------- MatProduct code -------------------------- */ 689 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 690 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) 691 { 692 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr; 693 sparse_matrix_t csrA, csrB, csrC; 694 PetscInt nrows, ncols; 695 struct matrix_descr descr_type_gen; 696 PetscObjectState state; 697 698 PetscFunctionBegin; 699 /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does 700 * not handle sparse matrices with zero rows or columns. */ 701 if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N; 702 else nrows = A->cmap->N; 703 if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N; 704 else ncols = B->rmap->N; 705 706 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 707 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 708 PetscCall(PetscObjectStateGet((PetscObject)B, &state)); 709 if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 710 csrA = a->csrA; 711 csrB = b->csrA; 712 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 713 714 if (csrA && csrB) { 715 PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC); 716 } else { 717 csrC = PETSC_NULL; 718 } 719 720 PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C)); 721 722 PetscFunctionReturn(0); 723 } 724 725 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) 726 { 727 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr; 728 sparse_matrix_t csrA, csrB, csrC; 729 struct matrix_descr descr_type_gen; 730 PetscObjectState state; 731 732 PetscFunctionBegin; 733 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 734 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 735 PetscCall(PetscObjectStateGet((PetscObject)B, &state)); 736 if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 737 csrA = a->csrA; 738 csrB = b->csrA; 739 csrC = c->csrA; 740 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 741 742 if (csrA && csrB) { 743 PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC); 744 } else { 745 csrC = PETSC_NULL; 746 } 747 748 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 749 PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 750 751 PetscFunctionReturn(0); 752 } 753 754 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) 755 { 756 PetscFunctionBegin; 757 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 758 PetscFunctionReturn(0); 759 } 760 761 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) 762 { 763 PetscFunctionBegin; 764 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 765 PetscFunctionReturn(0); 766 } 767 768 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) 769 { 770 PetscFunctionBegin; 771 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 772 PetscFunctionReturn(0); 773 } 774 775 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) 776 { 777 PetscFunctionBegin; 778 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 779 PetscFunctionReturn(0); 780 } 781 782 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) 783 { 784 PetscFunctionBegin; 785 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C)); 786 PetscFunctionReturn(0); 787 } 788 789 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) 790 { 791 PetscFunctionBegin; 792 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C)); 793 PetscFunctionReturn(0); 794 } 795 796 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 797 { 798 Mat_Product *product = C->product; 799 Mat A = product->A, B = product->B; 800 801 PetscFunctionBegin; 802 PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C)); 803 PetscFunctionReturn(0); 804 } 805 806 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 807 { 808 Mat_Product *product = C->product; 809 Mat A = product->A, B = product->B; 810 PetscReal fill = product->fill; 811 812 PetscFunctionBegin; 813 PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C)); 814 C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL; 815 PetscFunctionReturn(0); 816 } 817 818 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C) 819 { 820 Mat Ct; 821 Vec zeros; 822 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr; 823 sparse_matrix_t csrA, csrP, csrC; 824 PetscBool set, flag; 825 struct matrix_descr descr_type_sym; 826 PetscObjectState state; 827 828 PetscFunctionBegin; 829 PetscCall(MatIsSymmetricKnown(A, &set, &flag)); 830 PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric"); 831 832 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 833 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 834 PetscCall(PetscObjectStateGet((PetscObject)P, &state)); 835 if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 836 csrA = a->csrA; 837 csrP = p->csrA; 838 csrC = c->csrA; 839 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 840 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 841 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 842 843 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 844 PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT); 845 846 /* Update the PETSc AIJ representation for matrix C from contents of MKL handle. 847 * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix, 848 * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix. 849 * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY 850 * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might 851 * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output 852 * the full matrix. */ 853 PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 854 PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct)); 855 PetscCall(MatCreateVecs(C, &zeros, NULL)); 856 PetscCall(VecSetFromOptions(zeros)); 857 PetscCall(VecZeroEntries(zeros)); 858 PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES)); 859 PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN)); 860 /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */ 861 PetscCall(MatProductCreateWithMat(A, P, PETSC_NULL, C)); 862 PetscCall(MatProductSetType(C, MATPRODUCT_PtAP)); 863 PetscCall(MatSeqAIJMKL_create_mkl_handle(C)); 864 PetscCall(VecDestroy(&zeros)); 865 PetscCall(MatDestroy(&Ct)); 866 PetscFunctionReturn(0); 867 } 868 869 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) 870 { 871 Mat_Product *product = C->product; 872 Mat A = product->A, P = product->B; 873 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr; 874 sparse_matrix_t csrA, csrP, csrC; 875 struct matrix_descr descr_type_sym; 876 PetscObjectState state; 877 878 PetscFunctionBegin; 879 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 880 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 881 PetscCall(PetscObjectStateGet((PetscObject)P, &state)); 882 if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 883 csrA = a->csrA; 884 csrP = p->csrA; 885 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 886 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 887 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 888 889 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 890 if (csrP && csrA) { 891 PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL); 892 } else { 893 csrC = PETSC_NULL; 894 } 895 896 /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle. 897 * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain 898 * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that 899 * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this 900 * is guaranteed. */ 901 PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C)); 902 903 C->ops->productnumeric = MatProductNumeric_PtAP; 904 PetscFunctionReturn(0); 905 } 906 907 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) 908 { 909 PetscFunctionBegin; 910 C->ops->productsymbolic = MatProductSymbolic_AB; 911 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 912 PetscFunctionReturn(0); 913 } 914 915 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) 916 { 917 PetscFunctionBegin; 918 C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL; 919 PetscFunctionReturn(0); 920 } 921 922 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) 923 { 924 PetscFunctionBegin; 925 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 926 C->ops->productsymbolic = MatProductSymbolic_ABt; 927 PetscFunctionReturn(0); 928 } 929 930 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) 931 { 932 Mat_Product *product = C->product; 933 Mat A = product->A; 934 PetscBool set, flag; 935 936 PetscFunctionBegin; 937 if (PetscDefined(USE_COMPLEX)) { 938 /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used. 939 * We do this in several other locations in this file. This works for the time being, but these 940 * routines are considered unsafe and may be removed from the MatProduct code in the future. 941 * TODO: Add proper MATSEQAIJMKL implementations */ 942 C->ops->productsymbolic = NULL; 943 } else { 944 /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */ 945 PetscCall(MatIsSymmetricKnown(A, &set, &flag)); 946 if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 947 else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 948 /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(), 949 * depending on whether the algorithm for the general case vs. the real symmetric one is used. */ 950 } 951 PetscFunctionReturn(0); 952 } 953 954 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) 955 { 956 PetscFunctionBegin; 957 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 958 PetscFunctionReturn(0); 959 } 960 961 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) 962 { 963 PetscFunctionBegin; 964 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 965 PetscFunctionReturn(0); 966 } 967 968 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) 969 { 970 Mat_Product *product = C->product; 971 972 PetscFunctionBegin; 973 switch (product->type) { 974 case MATPRODUCT_AB: 975 PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C)); 976 break; 977 case MATPRODUCT_AtB: 978 PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C)); 979 break; 980 case MATPRODUCT_ABt: 981 PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C)); 982 break; 983 case MATPRODUCT_PtAP: 984 PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C)); 985 break; 986 case MATPRODUCT_RARt: 987 PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C)); 988 break; 989 case MATPRODUCT_ABC: 990 PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C)); 991 break; 992 default: 993 break; 994 } 995 PetscFunctionReturn(0); 996 } 997 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 998 /* ------------------------ End MatProduct code ------------------------ */ 999 1000 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 1001 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 1002 * routine, but can also be used to convert an assembled SeqAIJ matrix 1003 * into a SeqAIJMKL one. */ 1004 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat) 1005 { 1006 Mat B = *newmat; 1007 Mat_SeqAIJMKL *aijmkl; 1008 PetscBool set; 1009 PetscBool sametype; 1010 1011 PetscFunctionBegin; 1012 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 1013 1014 PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype)); 1015 if (sametype) PetscFunctionReturn(0); 1016 1017 PetscCall(PetscNew(&aijmkl)); 1018 B->spptr = (void *)aijmkl; 1019 1020 /* Set function pointers for methods that we inherit from AIJ but override. 1021 * We also parse some command line options below, since those determine some of the methods we point to. */ 1022 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 1023 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 1024 B->ops->destroy = MatDestroy_SeqAIJMKL; 1025 1026 aijmkl->sparse_optimized = PETSC_FALSE; 1027 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1028 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 1029 #else 1030 aijmkl->no_SpMV2 = PETSC_TRUE; 1031 #endif 1032 aijmkl->eager_inspection = PETSC_FALSE; 1033 1034 /* Parse command line options. */ 1035 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat"); 1036 PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set)); 1037 PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set)); 1038 PetscOptionsEnd(); 1039 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1040 if (!aijmkl->no_SpMV2) { 1041 PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n")); 1042 aijmkl->no_SpMV2 = PETSC_TRUE; 1043 } 1044 #endif 1045 1046 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1047 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 1048 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 1049 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1050 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 1051 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1052 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL; 1053 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1054 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1055 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL; 1056 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1057 #if !defined(PETSC_USE_COMPLEX) 1058 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1059 #else 1060 B->ops->ptapnumeric = NULL; 1061 #endif 1062 #endif 1063 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1064 1065 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1066 /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1067 * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1068 * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1069 * versions in which the older interface has not been deprecated, we use the old interface. */ 1070 if (aijmkl->no_SpMV2) { 1071 B->ops->mult = MatMult_SeqAIJMKL; 1072 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 1073 B->ops->multadd = MatMultAdd_SeqAIJMKL; 1074 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1075 } 1076 #endif 1077 1078 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ)); 1079 1080 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL)); 1081 *newmat = B; 1082 PetscFunctionReturn(0); 1083 } 1084 1085 /*@C 1086 MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`. 1087 This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS 1088 routines from Intel MKL whenever possible. 1089 If the installed version of MKL supports the "SpMV2" sparse 1090 inspector-executor routines, then those are used by default. 1091 `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()` 1092 (for symmetric A) operations are currently supported. 1093 Note that MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`. 1094 1095 Collective 1096 1097 Input Parameters: 1098 + comm - MPI communicator, set to `PETSC_COMM_SELF` 1099 . m - number of rows 1100 . n - number of columns 1101 . nz - number of nonzeros per row (same for all rows) 1102 - nnz - array containing the number of nonzeros in the various rows 1103 (possibly different for each row) or NULL 1104 1105 Output Parameter: 1106 . A - the matrix 1107 1108 Options Database Keys: 1109 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1110 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 1111 1112 Note: 1113 If nnz is given then nz is ignored 1114 1115 Level: intermediate 1116 1117 .seealso: `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()` 1118 @*/ 1119 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 1120 { 1121 PetscFunctionBegin; 1122 PetscCall(MatCreate(comm, A)); 1123 PetscCall(MatSetSizes(*A, m, n, m, n)); 1124 PetscCall(MatSetType(*A, MATSEQAIJMKL)); 1125 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz)); 1126 PetscFunctionReturn(0); 1127 } 1128 1129 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1130 { 1131 PetscFunctionBegin; 1132 PetscCall(MatSetType(A, MATSEQAIJ)); 1133 PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A)); 1134 PetscFunctionReturn(0); 1135 } 1136