1 2 /* 3 Defines basic operations for the MATSEQAIJMKL matrix class. 4 This class is derived from the MATSEQAIJ class and retains the 5 compressed row storage (aka Yale sparse matrix format) but uses 6 sparse BLAS operations from the Intel Math Kernel Library (MKL) 7 wherever possible. 8 */ 9 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12 #include <mkl_spblas.h> 13 14 typedef struct { 15 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 16 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 PetscObjectState state; 19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat) { 28 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 29 /* so we will ignore 'MatType type'. */ 30 Mat B = *newmat; 31 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 32 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 33 #endif 34 35 PetscFunctionBegin; 36 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 37 38 /* Reset the original function pointers. */ 39 B->ops->duplicate = MatDuplicate_SeqAIJ; 40 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 41 B->ops->destroy = MatDestroy_SeqAIJ; 42 B->ops->mult = MatMult_SeqAIJ; 43 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 44 B->ops->multadd = MatMultAdd_SeqAIJ; 45 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 46 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ; 47 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 48 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 49 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 50 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; 51 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 52 53 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL)); 54 55 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 56 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 57 * simply involves destroying the MKL sparse matrix handle and then freeing 58 * the spptr pointer. */ 59 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr; 60 61 if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 62 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 63 PetscCall(PetscFree(B->spptr)); 64 65 /* Change the type of B to MATSEQAIJ. */ 66 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ)); 67 68 *newmat = B; 69 PetscFunctionReturn(0); 70 } 71 72 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) { 73 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 74 75 PetscFunctionBegin; 76 77 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */ 78 if (aijmkl) { 79 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 80 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 81 if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 82 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 83 PetscCall(PetscFree(A->spptr)); 84 } 85 86 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 87 * to destroy everything that remains. */ 88 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ)); 89 /* Note that I don't call MatSetType(). I believe this is because that 90 * is only to be called when *building* a matrix. I could be wrong, but 91 * that is how things work for the SuperLU matrix class. */ 92 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL)); 93 PetscCall(MatDestroy_SeqAIJ(A)); 94 PetscFunctionReturn(0); 95 } 96 97 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 98 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 99 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 100 * handle, creates a new one, and then calls mkl_sparse_optimize(). 101 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 102 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 103 * an unoptimized matrix handle here. */ 104 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) { 105 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 106 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 107 * does nothing. We make it callable anyway in this case because it cuts 108 * down on littering the code with #ifdefs. */ 109 PetscFunctionBegin; 110 PetscFunctionReturn(0); 111 #else 112 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 113 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 114 PetscInt m, n; 115 MatScalar *aa; 116 PetscInt *aj, *ai; 117 118 PetscFunctionBegin; 119 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 120 /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 121 * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 122 * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 123 * mkl_sparse_optimize() later. */ 124 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 125 #endif 126 127 if (aijmkl->sparse_optimized) { 128 /* Matrix has been previously assembled and optimized. Must destroy old 129 * matrix handle before running the optimization step again. */ 130 PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 131 } 132 aijmkl->sparse_optimized = PETSC_FALSE; 133 134 /* Now perform the SpMV2 setup and matrix optimization. */ 135 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 136 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 137 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 138 m = A->rmap->n; 139 n = A->cmap->n; 140 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 141 aa = a->a; /* Nonzero elements stored row-by-row. */ 142 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 143 if (a->nz && aa && !A->structure_only) { 144 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 145 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 146 PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, m, n, ai, ai + 1, aj, aa); 147 PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000); 148 PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE); 149 if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA); 150 aijmkl->sparse_optimized = PETSC_TRUE; 151 PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state))); 152 } else { 153 aijmkl->csrA = PETSC_NULL; 154 } 155 156 PetscFunctionReturn(0); 157 #endif 158 } 159 160 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 161 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */ 162 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A) { 163 sparse_index_base_t indexing; 164 PetscInt m, n; 165 PetscInt *aj, *ai, *dummy; 166 MatScalar *aa; 167 Mat_SeqAIJMKL *aijmkl; 168 169 PetscFunctionBegin; 170 if (csrA) { 171 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 172 PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, &m, &n, &ai, &dummy, &aj, &aa); 173 PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()"); 174 } else { 175 aj = ai = PETSC_NULL; 176 aa = PETSC_NULL; 177 } 178 179 PetscCall(MatSetType(A, MATSEQAIJ)); 180 PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols)); 181 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 182 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 183 * they will be destroyed when the MKL handle is destroyed. 184 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 185 if (csrA) { 186 PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL)); 187 } else { 188 /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */ 189 PetscCall(MatSetUp(A)); 190 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 191 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 192 } 193 194 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 195 * Now turn it into a MATSEQAIJMKL. */ 196 PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A)); 197 198 aijmkl = (Mat_SeqAIJMKL *)A->spptr; 199 aijmkl->csrA = csrA; 200 201 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 202 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 203 * and just need to be able to run the MKL optimization step. */ 204 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 205 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 206 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 207 if (csrA) { 208 PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000); 209 PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE); 210 } 211 PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state))); 212 PetscFunctionReturn(0); 213 } 214 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 215 216 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 217 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 218 * MatMatMultNumeric(). */ 219 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 220 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) { 221 PetscInt i; 222 PetscInt nrows, ncols; 223 PetscInt nz; 224 PetscInt *ai, *aj, *dummy; 225 PetscScalar *aa; 226 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 227 sparse_index_base_t indexing; 228 229 PetscFunctionBegin; 230 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 231 if (!aijmkl->csrA) PetscFunctionReturn(0); 232 233 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 234 PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, &nrows, &ncols, &ai, &dummy, &aj, &aa); 235 236 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 237 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 238 for (i = 0; i < nrows; i++) { 239 nz = ai[i + 1] - ai[i]; 240 PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES)); 241 } 242 243 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 244 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 245 246 PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state))); 247 /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation. 248 * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed 249 * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */ 250 aijmkl->sparse_optimized = PETSC_FALSE; 251 PetscFunctionReturn(0); 252 } 253 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 254 255 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 256 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer) { 257 PetscInt i, j, k; 258 PetscInt nrows, ncols; 259 PetscInt nz; 260 PetscInt *ai, *aj, *dummy; 261 PetscScalar *aa; 262 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 263 sparse_index_base_t indexing; 264 265 PetscFunctionBegin; 266 PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n")); 267 268 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 269 if (!aijmkl->csrA) { 270 PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n")); 271 PetscFunctionReturn(0); 272 } 273 274 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 275 PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, &nrows, &ncols, &ai, &dummy, &aj, &aa); 276 277 k = 0; 278 for (i = 0; i < nrows; i++) { 279 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i)); 280 nz = ai[i + 1] - ai[i]; 281 for (j = 0; j < nz; j++) { 282 if (aa) { 283 PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g) ", aj[k], PetscRealPart(aa[k]))); 284 } else { 285 PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k])); 286 } 287 k++; 288 } 289 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 290 } 291 PetscFunctionReturn(0); 292 } 293 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 294 295 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) { 296 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 297 Mat_SeqAIJMKL *aijmkl_dest; 298 299 PetscFunctionBegin; 300 PetscCall(MatDuplicate_SeqAIJ(A, op, M)); 301 aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr; 302 PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1)); 303 aijmkl_dest->sparse_optimized = PETSC_FALSE; 304 if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 305 PetscFunctionReturn(0); 306 } 307 308 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) { 309 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 310 Mat_SeqAIJMKL *aijmkl; 311 312 PetscFunctionBegin; 313 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 314 315 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 316 * extra information and some different methods, call the AssemblyEnd 317 * routine for a MATSEQAIJ. 318 * I'm not sure if this is the best way to do this, but it avoids 319 * a lot of code duplication. */ 320 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 321 PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 322 323 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 324 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 325 aijmkl = (Mat_SeqAIJMKL *)A->spptr; 326 if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 327 328 PetscFunctionReturn(0); 329 } 330 331 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 332 PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy) { 333 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 334 const PetscScalar *x; 335 PetscScalar *y; 336 const MatScalar *aa; 337 PetscInt m = A->rmap->n; 338 PetscInt n = A->cmap->n; 339 PetscScalar alpha = 1.0; 340 PetscScalar beta = 0.0; 341 const PetscInt *aj, *ai; 342 char matdescra[6]; 343 344 /* Variables not in MatMult_SeqAIJ. */ 345 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 346 347 PetscFunctionBegin; 348 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 349 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 350 PetscCall(VecGetArrayRead(xx, &x)); 351 PetscCall(VecGetArray(yy, &y)); 352 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 353 aa = a->a; /* Nonzero elements stored row-by-row. */ 354 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 355 356 /* Call MKL sparse BLAS routine to do the MatMult. */ 357 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y); 358 359 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 360 PetscCall(VecRestoreArrayRead(xx, &x)); 361 PetscCall(VecRestoreArray(yy, &y)); 362 PetscFunctionReturn(0); 363 } 364 #endif 365 366 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 367 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) { 368 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 369 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 370 const PetscScalar *x; 371 PetscScalar *y; 372 PetscObjectState state; 373 374 PetscFunctionBegin; 375 376 /* If there are no nonzero entries, zero yy and return immediately. */ 377 if (!a->nz) { 378 PetscCall(VecGetArray(yy, &y)); 379 PetscCall(PetscArrayzero(y, A->rmap->n)); 380 PetscCall(VecRestoreArray(yy, &y)); 381 PetscFunctionReturn(0); 382 } 383 384 PetscCall(VecGetArrayRead(xx, &x)); 385 PetscCall(VecGetArray(yy, &y)); 386 387 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 388 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 389 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 390 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 391 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 392 393 /* Call MKL SpMV2 executor routine to do the MatMult. */ 394 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y); 395 396 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 397 PetscCall(VecRestoreArrayRead(xx, &x)); 398 PetscCall(VecRestoreArray(yy, &y)); 399 PetscFunctionReturn(0); 400 } 401 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 402 403 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 404 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy) { 405 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 406 const PetscScalar *x; 407 PetscScalar *y; 408 const MatScalar *aa; 409 PetscInt m = A->rmap->n; 410 PetscInt n = A->cmap->n; 411 PetscScalar alpha = 1.0; 412 PetscScalar beta = 0.0; 413 const PetscInt *aj, *ai; 414 char matdescra[6]; 415 416 /* Variables not in MatMultTranspose_SeqAIJ. */ 417 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 418 419 PetscFunctionBegin; 420 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 421 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 422 PetscCall(VecGetArrayRead(xx, &x)); 423 PetscCall(VecGetArray(yy, &y)); 424 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 425 aa = a->a; /* Nonzero elements stored row-by-row. */ 426 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 427 428 /* Call MKL sparse BLAS routine to do the MatMult. */ 429 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y); 430 431 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 432 PetscCall(VecRestoreArrayRead(xx, &x)); 433 PetscCall(VecRestoreArray(yy, &y)); 434 PetscFunctionReturn(0); 435 } 436 #endif 437 438 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 439 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) { 440 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 441 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 442 const PetscScalar *x; 443 PetscScalar *y; 444 PetscObjectState state; 445 446 PetscFunctionBegin; 447 448 /* If there are no nonzero entries, zero yy and return immediately. */ 449 if (!a->nz) { 450 PetscCall(VecGetArray(yy, &y)); 451 PetscCall(PetscArrayzero(y, A->cmap->n)); 452 PetscCall(VecRestoreArray(yy, &y)); 453 PetscFunctionReturn(0); 454 } 455 456 PetscCall(VecGetArrayRead(xx, &x)); 457 PetscCall(VecGetArray(yy, &y)); 458 459 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 460 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 461 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 462 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 463 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 464 465 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 466 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y); 467 468 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 469 PetscCall(VecRestoreArrayRead(xx, &x)); 470 PetscCall(VecRestoreArray(yy, &y)); 471 PetscFunctionReturn(0); 472 } 473 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 474 475 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 476 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) { 477 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 478 const PetscScalar *x; 479 PetscScalar *y, *z; 480 const MatScalar *aa; 481 PetscInt m = A->rmap->n; 482 PetscInt n = A->cmap->n; 483 const PetscInt *aj, *ai; 484 PetscInt i; 485 486 /* Variables not in MatMultAdd_SeqAIJ. */ 487 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 488 PetscScalar alpha = 1.0; 489 PetscScalar beta; 490 char matdescra[6]; 491 492 PetscFunctionBegin; 493 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 494 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 495 496 PetscCall(VecGetArrayRead(xx, &x)); 497 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 498 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 499 aa = a->a; /* Nonzero elements stored row-by-row. */ 500 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 501 502 /* Call MKL sparse BLAS routine to do the MatMult. */ 503 if (zz == yy) { 504 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 505 beta = 1.0; 506 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 507 } else { 508 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 509 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 510 beta = 0.0; 511 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 512 for (i = 0; i < m; i++) z[i] += y[i]; 513 } 514 515 PetscCall(PetscLogFlops(2.0 * a->nz)); 516 PetscCall(VecRestoreArrayRead(xx, &x)); 517 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 518 PetscFunctionReturn(0); 519 } 520 #endif 521 522 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 523 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) { 524 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 525 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 526 const PetscScalar *x; 527 PetscScalar *y, *z; 528 PetscInt m = A->rmap->n; 529 PetscInt i; 530 531 /* Variables not in MatMultAdd_SeqAIJ. */ 532 PetscObjectState state; 533 534 PetscFunctionBegin; 535 536 /* If there are no nonzero entries, set zz = yy and return immediately. */ 537 if (!a->nz) { 538 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 539 PetscCall(PetscArraycpy(z, y, m)); 540 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 541 PetscFunctionReturn(0); 542 } 543 544 PetscCall(VecGetArrayRead(xx, &x)); 545 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 546 547 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 548 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 549 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 550 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 551 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 552 553 /* Call MKL sparse BLAS routine to do the MatMult. */ 554 if (zz == yy) { 555 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 556 * with alpha and beta both set to 1.0. */ 557 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z); 558 } else { 559 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 560 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 561 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z); 562 for (i = 0; i < m; i++) z[i] += y[i]; 563 } 564 565 PetscCall(PetscLogFlops(2.0 * a->nz)); 566 PetscCall(VecRestoreArrayRead(xx, &x)); 567 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 568 PetscFunctionReturn(0); 569 } 570 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 571 572 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 573 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) { 574 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 575 const PetscScalar *x; 576 PetscScalar *y, *z; 577 const MatScalar *aa; 578 PetscInt m = A->rmap->n; 579 PetscInt n = A->cmap->n; 580 const PetscInt *aj, *ai; 581 PetscInt i; 582 583 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 584 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 585 PetscScalar alpha = 1.0; 586 PetscScalar beta; 587 char matdescra[6]; 588 589 PetscFunctionBegin; 590 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 591 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 592 593 PetscCall(VecGetArrayRead(xx, &x)); 594 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 595 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 596 aa = a->a; /* Nonzero elements stored row-by-row. */ 597 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 598 599 /* Call MKL sparse BLAS routine to do the MatMult. */ 600 if (zz == yy) { 601 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 602 beta = 1.0; 603 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 604 } else { 605 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 606 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 607 beta = 0.0; 608 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 609 for (i = 0; i < n; i++) z[i] += y[i]; 610 } 611 612 PetscCall(PetscLogFlops(2.0 * a->nz)); 613 PetscCall(VecRestoreArrayRead(xx, &x)); 614 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 615 PetscFunctionReturn(0); 616 } 617 #endif 618 619 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 620 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) { 621 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 622 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 623 const PetscScalar *x; 624 PetscScalar *y, *z; 625 PetscInt n = A->cmap->n; 626 PetscInt i; 627 PetscObjectState state; 628 629 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 630 631 PetscFunctionBegin; 632 633 /* If there are no nonzero entries, set zz = yy and return immediately. */ 634 if (!a->nz) { 635 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 636 PetscCall(PetscArraycpy(z, y, n)); 637 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 638 PetscFunctionReturn(0); 639 } 640 641 PetscCall(VecGetArrayRead(xx, &x)); 642 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 643 644 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 645 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 646 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 647 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 648 if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A); 649 650 /* Call MKL sparse BLAS routine to do the MatMult. */ 651 if (zz == yy) { 652 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 653 * with alpha and beta both set to 1.0. */ 654 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z); 655 } else { 656 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 657 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 658 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z); 659 for (i = 0; i < n; i++) z[i] += y[i]; 660 } 661 662 PetscCall(PetscLogFlops(2.0 * a->nz)); 663 PetscCall(VecRestoreArrayRead(xx, &x)); 664 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 665 PetscFunctionReturn(0); 666 } 667 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 668 669 /* -------------------------- MatProduct code -------------------------- */ 670 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 671 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) { 672 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr; 673 sparse_matrix_t csrA, csrB, csrC; 674 PetscInt nrows, ncols; 675 struct matrix_descr descr_type_gen; 676 PetscObjectState state; 677 678 PetscFunctionBegin; 679 /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does 680 * not handle sparse matrices with zero rows or columns. */ 681 if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N; 682 else nrows = A->cmap->N; 683 if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N; 684 else ncols = B->rmap->N; 685 686 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 687 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 688 PetscCall(PetscObjectStateGet((PetscObject)B, &state)); 689 if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 690 csrA = a->csrA; 691 csrB = b->csrA; 692 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 693 694 if (csrA && csrB) { 695 PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC); 696 } else { 697 csrC = PETSC_NULL; 698 } 699 700 PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C)); 701 702 PetscFunctionReturn(0); 703 } 704 705 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) { 706 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr; 707 sparse_matrix_t csrA, csrB, csrC; 708 struct matrix_descr descr_type_gen; 709 PetscObjectState state; 710 711 PetscFunctionBegin; 712 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 713 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 714 PetscCall(PetscObjectStateGet((PetscObject)B, &state)); 715 if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 716 csrA = a->csrA; 717 csrB = b->csrA; 718 csrC = c->csrA; 719 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 720 721 if (csrA && csrB) { 722 PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC); 723 } else { 724 csrC = PETSC_NULL; 725 } 726 727 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 728 PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 729 730 PetscFunctionReturn(0); 731 } 732 733 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) { 734 PetscFunctionBegin; 735 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 736 PetscFunctionReturn(0); 737 } 738 739 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) { 740 PetscFunctionBegin; 741 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 742 PetscFunctionReturn(0); 743 } 744 745 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) { 746 PetscFunctionBegin; 747 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 748 PetscFunctionReturn(0); 749 } 750 751 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) { 752 PetscFunctionBegin; 753 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 754 PetscFunctionReturn(0); 755 } 756 757 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) { 758 PetscFunctionBegin; 759 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C)); 760 PetscFunctionReturn(0); 761 } 762 763 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) { 764 PetscFunctionBegin; 765 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C)); 766 PetscFunctionReturn(0); 767 } 768 769 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) { 770 Mat_Product *product = C->product; 771 Mat A = product->A, B = product->B; 772 773 PetscFunctionBegin; 774 PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C)); 775 PetscFunctionReturn(0); 776 } 777 778 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) { 779 Mat_Product *product = C->product; 780 Mat A = product->A, B = product->B; 781 PetscReal fill = product->fill; 782 783 PetscFunctionBegin; 784 PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C)); 785 C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL; 786 PetscFunctionReturn(0); 787 } 788 789 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C) { 790 Mat Ct; 791 Vec zeros; 792 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr; 793 sparse_matrix_t csrA, csrP, csrC; 794 PetscBool set, flag; 795 struct matrix_descr descr_type_sym; 796 PetscObjectState state; 797 798 PetscFunctionBegin; 799 PetscCall(MatIsSymmetricKnown(A, &set, &flag)); 800 PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric"); 801 802 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 803 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 804 PetscCall(PetscObjectStateGet((PetscObject)P, &state)); 805 if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 806 csrA = a->csrA; 807 csrP = p->csrA; 808 csrC = c->csrA; 809 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 810 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 811 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 812 813 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 814 PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT); 815 816 /* Update the PETSc AIJ representation for matrix C from contents of MKL handle. 817 * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix, 818 * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix. 819 * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY 820 * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might 821 * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output 822 * the full matrix. */ 823 PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 824 PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct)); 825 PetscCall(MatCreateVecs(C, &zeros, NULL)); 826 PetscCall(VecSetFromOptions(zeros)); 827 PetscCall(VecZeroEntries(zeros)); 828 PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES)); 829 PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN)); 830 /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */ 831 PetscCall(MatProductCreateWithMat(A, P, PETSC_NULL, C)); 832 PetscCall(MatProductSetType(C, MATPRODUCT_PtAP)); 833 PetscCall(MatSeqAIJMKL_create_mkl_handle(C)); 834 PetscCall(VecDestroy(&zeros)); 835 PetscCall(MatDestroy(&Ct)); 836 PetscFunctionReturn(0); 837 } 838 839 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) { 840 Mat_Product *product = C->product; 841 Mat A = product->A, P = product->B; 842 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr; 843 sparse_matrix_t csrA, csrP, csrC; 844 struct matrix_descr descr_type_sym; 845 PetscObjectState state; 846 847 PetscFunctionBegin; 848 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 849 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 850 PetscCall(PetscObjectStateGet((PetscObject)P, &state)); 851 if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 852 csrA = a->csrA; 853 csrP = p->csrA; 854 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 855 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 856 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 857 858 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 859 if (csrP && csrA) { 860 PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL); 861 } else { 862 csrC = PETSC_NULL; 863 } 864 865 /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle. 866 * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain 867 * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that 868 * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this 869 * is guaranteed. */ 870 PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C)); 871 872 C->ops->productnumeric = MatProductNumeric_PtAP; 873 PetscFunctionReturn(0); 874 } 875 876 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) { 877 PetscFunctionBegin; 878 C->ops->productsymbolic = MatProductSymbolic_AB; 879 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 880 PetscFunctionReturn(0); 881 } 882 883 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) { 884 PetscFunctionBegin; 885 C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL; 886 PetscFunctionReturn(0); 887 } 888 889 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) { 890 PetscFunctionBegin; 891 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 892 C->ops->productsymbolic = MatProductSymbolic_ABt; 893 PetscFunctionReturn(0); 894 } 895 896 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) { 897 Mat_Product *product = C->product; 898 Mat A = product->A; 899 PetscBool set, flag; 900 901 PetscFunctionBegin; 902 if (PetscDefined(USE_COMPLEX)) { 903 /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used. 904 * We do this in several other locations in this file. This works for the time being, but these 905 * routines are considered unsafe and may be removed from the MatProduct code in the future. 906 * TODO: Add proper MATSEQAIJMKL implementations */ 907 C->ops->productsymbolic = NULL; 908 } else { 909 /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */ 910 PetscCall(MatIsSymmetricKnown(A, &set, &flag)); 911 if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 912 else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 913 /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(), 914 * depending on whether the algorithm for the general case vs. the real symmetric one is used. */ 915 } 916 PetscFunctionReturn(0); 917 } 918 919 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) { 920 PetscFunctionBegin; 921 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 922 PetscFunctionReturn(0); 923 } 924 925 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) { 926 PetscFunctionBegin; 927 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 928 PetscFunctionReturn(0); 929 } 930 931 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) { 932 Mat_Product *product = C->product; 933 934 PetscFunctionBegin; 935 switch (product->type) { 936 case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C)); break; 937 case MATPRODUCT_AtB: PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C)); break; 938 case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C)); break; 939 case MATPRODUCT_PtAP: PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C)); break; 940 case MATPRODUCT_RARt: PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C)); break; 941 case MATPRODUCT_ABC: PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C)); break; 942 default: break; 943 } 944 PetscFunctionReturn(0); 945 } 946 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 947 /* ------------------------ End MatProduct code ------------------------ */ 948 949 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 950 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 951 * routine, but can also be used to convert an assembled SeqAIJ matrix 952 * into a SeqAIJMKL one. */ 953 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat) { 954 Mat B = *newmat; 955 Mat_SeqAIJMKL *aijmkl; 956 PetscBool set; 957 PetscBool sametype; 958 959 PetscFunctionBegin; 960 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 961 962 PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype)); 963 if (sametype) PetscFunctionReturn(0); 964 965 PetscCall(PetscNewLog(B, &aijmkl)); 966 B->spptr = (void *)aijmkl; 967 968 /* Set function pointers for methods that we inherit from AIJ but override. 969 * We also parse some command line options below, since those determine some of the methods we point to. */ 970 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 971 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 972 B->ops->destroy = MatDestroy_SeqAIJMKL; 973 974 aijmkl->sparse_optimized = PETSC_FALSE; 975 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 976 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 977 #else 978 aijmkl->no_SpMV2 = PETSC_TRUE; 979 #endif 980 aijmkl->eager_inspection = PETSC_FALSE; 981 982 /* Parse command line options. */ 983 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat"); 984 PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set)); 985 PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set)); 986 PetscOptionsEnd(); 987 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 988 if (!aijmkl->no_SpMV2) { 989 PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n")); 990 aijmkl->no_SpMV2 = PETSC_TRUE; 991 } 992 #endif 993 994 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 995 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 996 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 997 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 998 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 999 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1000 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL; 1001 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1002 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1003 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL; 1004 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1005 #if !defined(PETSC_USE_COMPLEX) 1006 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1007 #else 1008 B->ops->ptapnumeric = NULL; 1009 #endif 1010 #endif 1011 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1012 1013 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1014 /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1015 * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1016 * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1017 * versions in which the older interface has not been deprecated, we use the old interface. */ 1018 if (aijmkl->no_SpMV2) { 1019 B->ops->mult = MatMult_SeqAIJMKL; 1020 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 1021 B->ops->multadd = MatMultAdd_SeqAIJMKL; 1022 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1023 } 1024 #endif 1025 1026 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ)); 1027 1028 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL)); 1029 *newmat = B; 1030 PetscFunctionReturn(0); 1031 } 1032 1033 /*@C 1034 MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`. 1035 This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS 1036 routines from Intel MKL whenever possible. 1037 If the installed version of MKL supports the "SpMV2" sparse 1038 inspector-executor routines, then those are used by default. 1039 `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()` 1040 (for symmetric A) operations are currently supported. 1041 Note that MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`. 1042 1043 Collective 1044 1045 Input Parameters: 1046 + comm - MPI communicator, set to `PETSC_COMM_SELF` 1047 . m - number of rows 1048 . n - number of columns 1049 . nz - number of nonzeros per row (same for all rows) 1050 - nnz - array containing the number of nonzeros in the various rows 1051 (possibly different for each row) or NULL 1052 1053 Output Parameter: 1054 . A - the matrix 1055 1056 Options Database Keys: 1057 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1058 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 1059 1060 Note: 1061 If nnz is given then nz is ignored 1062 1063 Level: intermediate 1064 1065 .seealso: `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()` 1066 @*/ 1067 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) { 1068 PetscFunctionBegin; 1069 PetscCall(MatCreate(comm, A)); 1070 PetscCall(MatSetSizes(*A, m, n, m, n)); 1071 PetscCall(MatSetType(*A, MATSEQAIJMKL)); 1072 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz)); 1073 PetscFunctionReturn(0); 1074 } 1075 1076 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) { 1077 PetscFunctionBegin; 1078 PetscCall(MatSetType(A, MATSEQAIJ)); 1079 PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A)); 1080 PetscFunctionReturn(0); 1081 } 1082