1 2 /* 3 Defines basic operations for the MATSEQAIJMKL matrix class. 4 This class is derived from the MATSEQAIJ class and retains the 5 compressed row storage (aka Yale sparse matrix format) but uses 6 sparse BLAS operations from the Intel Math Kernel Library (MKL) 7 wherever possible. 8 */ 9 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12 #include <mkl_spblas.h> 13 14 typedef struct { 15 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 16 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 PetscObjectState state; 19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat) { 28 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 29 /* so we will ignore 'MatType type'. */ 30 Mat B = *newmat; 31 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 32 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 33 #endif 34 35 PetscFunctionBegin; 36 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 37 38 /* Reset the original function pointers. */ 39 B->ops->duplicate = MatDuplicate_SeqAIJ; 40 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 41 B->ops->destroy = MatDestroy_SeqAIJ; 42 B->ops->mult = MatMult_SeqAIJ; 43 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 44 B->ops->multadd = MatMultAdd_SeqAIJ; 45 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 46 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ; 47 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 48 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 49 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 50 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; 51 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 52 53 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL)); 54 55 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 56 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 57 * simply involves destroying the MKL sparse matrix handle and then freeing 58 * the spptr pointer. */ 59 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr; 60 61 if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 62 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 63 PetscCall(PetscFree(B->spptr)); 64 65 /* Change the type of B to MATSEQAIJ. */ 66 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ)); 67 68 *newmat = B; 69 PetscFunctionReturn(0); 70 } 71 72 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) { 73 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 74 75 PetscFunctionBegin; 76 77 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */ 78 if (aijmkl) { 79 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 80 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 81 if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 82 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 83 PetscCall(PetscFree(A->spptr)); 84 } 85 86 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 87 * to destroy everything that remains. */ 88 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ)); 89 /* Note that I don't call MatSetType(). I believe this is because that 90 * is only to be called when *building* a matrix. I could be wrong, but 91 * that is how things work for the SuperLU matrix class. */ 92 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL)); 93 PetscCall(MatDestroy_SeqAIJ(A)); 94 PetscFunctionReturn(0); 95 } 96 97 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 98 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 99 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 100 * handle, creates a new one, and then calls mkl_sparse_optimize(). 101 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 102 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 103 * an unoptimized matrix handle here. */ 104 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) { 105 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 106 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 107 * does nothing. We make it callable anyway in this case because it cuts 108 * down on littering the code with #ifdefs. */ 109 PetscFunctionBegin; 110 PetscFunctionReturn(0); 111 #else 112 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 113 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 114 PetscInt m, n; 115 MatScalar *aa; 116 PetscInt *aj, *ai; 117 118 PetscFunctionBegin; 119 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 120 /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 121 * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 122 * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 123 * mkl_sparse_optimize() later. */ 124 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 125 #endif 126 127 if (aijmkl->sparse_optimized) { 128 /* Matrix has been previously assembled and optimized. Must destroy old 129 * matrix handle before running the optimization step again. */ 130 PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 131 } 132 aijmkl->sparse_optimized = PETSC_FALSE; 133 134 /* Now perform the SpMV2 setup and matrix optimization. */ 135 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 136 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 137 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 138 m = A->rmap->n; 139 n = A->cmap->n; 140 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 141 aa = a->a; /* Nonzero elements stored row-by-row. */ 142 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 143 if (a->nz && aa && !A->structure_only) { 144 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 145 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 146 PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, m, n, ai, ai + 1, aj, aa); 147 PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000); 148 PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE); 149 if (!aijmkl->no_SpMV2) { PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA); } 150 aijmkl->sparse_optimized = PETSC_TRUE; 151 PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state))); 152 } else { 153 aijmkl->csrA = PETSC_NULL; 154 } 155 156 PetscFunctionReturn(0); 157 #endif 158 } 159 160 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 161 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */ 162 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A) { 163 sparse_index_base_t indexing; 164 PetscInt m, n; 165 PetscInt *aj, *ai, *dummy; 166 MatScalar *aa; 167 Mat_SeqAIJMKL *aijmkl; 168 169 PetscFunctionBegin; 170 if (csrA) { 171 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 172 PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, &m, &n, &ai, &dummy, &aj, &aa); 173 PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()"); 174 } else { 175 aj = ai = PETSC_NULL; 176 aa = PETSC_NULL; 177 } 178 179 PetscCall(MatSetType(A, MATSEQAIJ)); 180 PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols)); 181 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 182 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 183 * they will be destroyed when the MKL handle is destroyed. 184 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 185 if (csrA) { 186 PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL)); 187 } else { 188 /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */ 189 PetscCall(MatSetUp(A)); 190 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 191 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 192 } 193 194 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 195 * Now turn it into a MATSEQAIJMKL. */ 196 PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A)); 197 198 aijmkl = (Mat_SeqAIJMKL *)A->spptr; 199 aijmkl->csrA = csrA; 200 201 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 202 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 203 * and just need to be able to run the MKL optimization step. */ 204 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 205 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 206 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 207 if (csrA) { 208 PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000); 209 PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE); 210 } 211 PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state))); 212 PetscFunctionReturn(0); 213 } 214 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 215 216 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 217 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 218 * MatMatMultNumeric(). */ 219 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 220 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) { 221 PetscInt i; 222 PetscInt nrows, ncols; 223 PetscInt nz; 224 PetscInt *ai, *aj, *dummy; 225 PetscScalar *aa; 226 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 227 sparse_index_base_t indexing; 228 229 PetscFunctionBegin; 230 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 231 if (!aijmkl->csrA) PetscFunctionReturn(0); 232 233 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 234 PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, &nrows, &ncols, &ai, &dummy, &aj, &aa); 235 236 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 237 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 238 for (i = 0; i < nrows; i++) { 239 nz = ai[i + 1] - ai[i]; 240 PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES)); 241 } 242 243 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 244 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 245 246 PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state))); 247 /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation. 248 * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed 249 * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */ 250 aijmkl->sparse_optimized = PETSC_FALSE; 251 PetscFunctionReturn(0); 252 } 253 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 254 255 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 256 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer) { 257 PetscInt i, j, k; 258 PetscInt nrows, ncols; 259 PetscInt nz; 260 PetscInt *ai, *aj, *dummy; 261 PetscScalar *aa; 262 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 263 sparse_index_base_t indexing; 264 265 PetscFunctionBegin; 266 PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n")); 267 268 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 269 if (!aijmkl->csrA) { 270 PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n")); 271 PetscFunctionReturn(0); 272 } 273 274 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 275 PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, &nrows, &ncols, &ai, &dummy, &aj, &aa); 276 277 k = 0; 278 for (i = 0; i < nrows; i++) { 279 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i)); 280 nz = ai[i + 1] - ai[i]; 281 for (j = 0; j < nz; j++) { 282 if (aa) { 283 PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g) ", aj[k], PetscRealPart(aa[k]))); 284 } else { 285 PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k])); 286 } 287 k++; 288 } 289 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 290 } 291 PetscFunctionReturn(0); 292 } 293 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 294 295 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) { 296 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 297 Mat_SeqAIJMKL *aijmkl_dest; 298 299 PetscFunctionBegin; 300 PetscCall(MatDuplicate_SeqAIJ(A, op, M)); 301 aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr; 302 PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1)); 303 aijmkl_dest->sparse_optimized = PETSC_FALSE; 304 if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 305 PetscFunctionReturn(0); 306 } 307 308 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) { 309 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 310 Mat_SeqAIJMKL *aijmkl; 311 312 PetscFunctionBegin; 313 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 314 315 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 316 * extra information and some different methods, call the AssemblyEnd 317 * routine for a MATSEQAIJ. 318 * I'm not sure if this is the best way to do this, but it avoids 319 * a lot of code duplication. */ 320 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 321 PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 322 323 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 324 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 325 aijmkl = (Mat_SeqAIJMKL *)A->spptr; 326 if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 327 328 PetscFunctionReturn(0); 329 } 330 331 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 332 PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy) { 333 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 334 const PetscScalar *x; 335 PetscScalar *y; 336 const MatScalar *aa; 337 PetscInt m = A->rmap->n; 338 PetscInt n = A->cmap->n; 339 PetscScalar alpha = 1.0; 340 PetscScalar beta = 0.0; 341 const PetscInt *aj, *ai; 342 char matdescra[6]; 343 344 /* Variables not in MatMult_SeqAIJ. */ 345 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 346 347 PetscFunctionBegin; 348 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 349 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 350 PetscCall(VecGetArrayRead(xx, &x)); 351 PetscCall(VecGetArray(yy, &y)); 352 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 353 aa = a->a; /* Nonzero elements stored row-by-row. */ 354 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 355 356 /* Call MKL sparse BLAS routine to do the MatMult. */ 357 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y); 358 359 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 360 PetscCall(VecRestoreArrayRead(xx, &x)); 361 PetscCall(VecRestoreArray(yy, &y)); 362 PetscFunctionReturn(0); 363 } 364 #endif 365 366 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 367 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) { 368 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 369 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 370 const PetscScalar *x; 371 PetscScalar *y; 372 PetscObjectState state; 373 374 PetscFunctionBegin; 375 376 /* If there are no nonzero entries, zero yy and return immediately. */ 377 if (!a->nz) { 378 PetscCall(VecGetArray(yy, &y)); 379 PetscCall(PetscArrayzero(y, A->rmap->n)); 380 PetscCall(VecRestoreArray(yy, &y)); 381 PetscFunctionReturn(0); 382 } 383 384 PetscCall(VecGetArrayRead(xx, &x)); 385 PetscCall(VecGetArray(yy, &y)); 386 387 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 388 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 389 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 390 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 391 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 392 393 /* Call MKL SpMV2 executor routine to do the MatMult. */ 394 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y); 395 396 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 397 PetscCall(VecRestoreArrayRead(xx, &x)); 398 PetscCall(VecRestoreArray(yy, &y)); 399 PetscFunctionReturn(0); 400 } 401 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 402 403 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 404 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy) { 405 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 406 const PetscScalar *x; 407 PetscScalar *y; 408 const MatScalar *aa; 409 PetscInt m = A->rmap->n; 410 PetscInt n = A->cmap->n; 411 PetscScalar alpha = 1.0; 412 PetscScalar beta = 0.0; 413 const PetscInt *aj, *ai; 414 char matdescra[6]; 415 416 /* Variables not in MatMultTranspose_SeqAIJ. */ 417 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 418 419 PetscFunctionBegin; 420 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 421 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 422 PetscCall(VecGetArrayRead(xx, &x)); 423 PetscCall(VecGetArray(yy, &y)); 424 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 425 aa = a->a; /* Nonzero elements stored row-by-row. */ 426 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 427 428 /* Call MKL sparse BLAS routine to do the MatMult. */ 429 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y); 430 431 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 432 PetscCall(VecRestoreArrayRead(xx, &x)); 433 PetscCall(VecRestoreArray(yy, &y)); 434 PetscFunctionReturn(0); 435 } 436 #endif 437 438 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 439 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) { 440 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 441 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 442 const PetscScalar *x; 443 PetscScalar *y; 444 PetscObjectState state; 445 446 PetscFunctionBegin; 447 448 /* If there are no nonzero entries, zero yy and return immediately. */ 449 if (!a->nz) { 450 PetscCall(VecGetArray(yy, &y)); 451 PetscCall(PetscArrayzero(y, A->cmap->n)); 452 PetscCall(VecRestoreArray(yy, &y)); 453 PetscFunctionReturn(0); 454 } 455 456 PetscCall(VecGetArrayRead(xx, &x)); 457 PetscCall(VecGetArray(yy, &y)); 458 459 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 460 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 461 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 462 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 463 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 464 465 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 466 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y); 467 468 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 469 PetscCall(VecRestoreArrayRead(xx, &x)); 470 PetscCall(VecRestoreArray(yy, &y)); 471 PetscFunctionReturn(0); 472 } 473 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 474 475 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 476 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) { 477 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 478 const PetscScalar *x; 479 PetscScalar *y, *z; 480 const MatScalar *aa; 481 PetscInt m = A->rmap->n; 482 PetscInt n = A->cmap->n; 483 const PetscInt *aj, *ai; 484 PetscInt i; 485 486 /* Variables not in MatMultAdd_SeqAIJ. */ 487 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 488 PetscScalar alpha = 1.0; 489 PetscScalar beta; 490 char matdescra[6]; 491 492 PetscFunctionBegin; 493 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 494 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 495 496 PetscCall(VecGetArrayRead(xx, &x)); 497 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 498 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 499 aa = a->a; /* Nonzero elements stored row-by-row. */ 500 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 501 502 /* Call MKL sparse BLAS routine to do the MatMult. */ 503 if (zz == yy) { 504 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 505 beta = 1.0; 506 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 507 } else { 508 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 509 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 510 beta = 0.0; 511 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 512 for (i = 0; i < m; i++) { z[i] += y[i]; } 513 } 514 515 PetscCall(PetscLogFlops(2.0 * a->nz)); 516 PetscCall(VecRestoreArrayRead(xx, &x)); 517 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 518 PetscFunctionReturn(0); 519 } 520 #endif 521 522 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 523 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) { 524 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 525 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 526 const PetscScalar *x; 527 PetscScalar *y, *z; 528 PetscInt m = A->rmap->n; 529 PetscInt i; 530 531 /* Variables not in MatMultAdd_SeqAIJ. */ 532 PetscObjectState state; 533 534 PetscFunctionBegin; 535 536 /* If there are no nonzero entries, set zz = yy and return immediately. */ 537 if (!a->nz) { 538 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 539 PetscCall(PetscArraycpy(z, y, m)); 540 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 541 PetscFunctionReturn(0); 542 } 543 544 PetscCall(VecGetArrayRead(xx, &x)); 545 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 546 547 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 548 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 549 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 550 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 551 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 552 553 /* Call MKL sparse BLAS routine to do the MatMult. */ 554 if (zz == yy) { 555 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 556 * with alpha and beta both set to 1.0. */ 557 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z); 558 } else { 559 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 560 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 561 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z); 562 for (i = 0; i < m; i++) z[i] += y[i]; 563 } 564 565 PetscCall(PetscLogFlops(2.0 * a->nz)); 566 PetscCall(VecRestoreArrayRead(xx, &x)); 567 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 568 PetscFunctionReturn(0); 569 } 570 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 571 572 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 573 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) { 574 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 575 const PetscScalar *x; 576 PetscScalar *y, *z; 577 const MatScalar *aa; 578 PetscInt m = A->rmap->n; 579 PetscInt n = A->cmap->n; 580 const PetscInt *aj, *ai; 581 PetscInt i; 582 583 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 584 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 585 PetscScalar alpha = 1.0; 586 PetscScalar beta; 587 char matdescra[6]; 588 589 PetscFunctionBegin; 590 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 591 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 592 593 PetscCall(VecGetArrayRead(xx, &x)); 594 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 595 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 596 aa = a->a; /* Nonzero elements stored row-by-row. */ 597 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 598 599 /* Call MKL sparse BLAS routine to do the MatMult. */ 600 if (zz == yy) { 601 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 602 beta = 1.0; 603 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 604 } else { 605 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 606 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 607 beta = 0.0; 608 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 609 for (i = 0; i < n; i++) { z[i] += y[i]; } 610 } 611 612 PetscCall(PetscLogFlops(2.0 * a->nz)); 613 PetscCall(VecRestoreArrayRead(xx, &x)); 614 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 615 PetscFunctionReturn(0); 616 } 617 #endif 618 619 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 620 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) { 621 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 622 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 623 const PetscScalar *x; 624 PetscScalar *y, *z; 625 PetscInt n = A->cmap->n; 626 PetscInt i; 627 PetscObjectState state; 628 629 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 630 631 PetscFunctionBegin; 632 633 /* If there are no nonzero entries, set zz = yy and return immediately. */ 634 if (!a->nz) { 635 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 636 PetscCall(PetscArraycpy(z, y, n)); 637 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 638 PetscFunctionReturn(0); 639 } 640 641 PetscCall(VecGetArrayRead(xx, &x)); 642 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 643 644 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 645 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 646 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 647 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 648 if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A); 649 650 /* Call MKL sparse BLAS routine to do the MatMult. */ 651 if (zz == yy) { 652 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 653 * with alpha and beta both set to 1.0. */ 654 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z); 655 } else { 656 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 657 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 658 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z); 659 for (i = 0; i < n; i++) z[i] += y[i]; 660 } 661 662 PetscCall(PetscLogFlops(2.0 * a->nz)); 663 PetscCall(VecRestoreArrayRead(xx, &x)); 664 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 665 PetscFunctionReturn(0); 666 } 667 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 668 669 /* -------------------------- MatProduct code -------------------------- */ 670 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 671 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) { 672 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr; 673 sparse_matrix_t csrA, csrB, csrC; 674 PetscInt nrows, ncols; 675 struct matrix_descr descr_type_gen; 676 PetscObjectState state; 677 678 PetscFunctionBegin; 679 /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does 680 * not handle sparse matrices with zero rows or columns. */ 681 if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N; 682 else nrows = A->cmap->N; 683 if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N; 684 else ncols = B->rmap->N; 685 686 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 687 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 688 PetscCall(PetscObjectStateGet((PetscObject)B, &state)); 689 if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 690 csrA = a->csrA; 691 csrB = b->csrA; 692 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 693 694 if (csrA && csrB) { 695 PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC); 696 } else { 697 csrC = PETSC_NULL; 698 } 699 700 PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C)); 701 702 PetscFunctionReturn(0); 703 } 704 705 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) { 706 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr; 707 sparse_matrix_t csrA, csrB, csrC; 708 struct matrix_descr descr_type_gen; 709 PetscObjectState state; 710 711 PetscFunctionBegin; 712 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 713 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 714 PetscCall(PetscObjectStateGet((PetscObject)B, &state)); 715 if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 716 csrA = a->csrA; 717 csrB = b->csrA; 718 csrC = c->csrA; 719 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 720 721 if (csrA && csrB) { 722 PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC); 723 } else { 724 csrC = PETSC_NULL; 725 } 726 727 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 728 PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 729 730 PetscFunctionReturn(0); 731 } 732 733 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) { 734 PetscFunctionBegin; 735 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 736 PetscFunctionReturn(0); 737 } 738 739 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) { 740 PetscFunctionBegin; 741 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 742 PetscFunctionReturn(0); 743 } 744 745 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) { 746 PetscFunctionBegin; 747 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 748 PetscFunctionReturn(0); 749 } 750 751 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) { 752 PetscFunctionBegin; 753 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 754 PetscFunctionReturn(0); 755 } 756 757 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) { 758 PetscFunctionBegin; 759 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C)); 760 PetscFunctionReturn(0); 761 } 762 763 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) { 764 PetscFunctionBegin; 765 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C)); 766 PetscFunctionReturn(0); 767 } 768 769 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) { 770 Mat_Product *product = C->product; 771 Mat A = product->A, B = product->B; 772 773 PetscFunctionBegin; 774 PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C)); 775 PetscFunctionReturn(0); 776 } 777 778 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) { 779 Mat_Product *product = C->product; 780 Mat A = product->A, B = product->B; 781 PetscReal fill = product->fill; 782 783 PetscFunctionBegin; 784 PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C)); 785 C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL; 786 PetscFunctionReturn(0); 787 } 788 789 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C) { 790 Mat Ct; 791 Vec zeros; 792 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr; 793 sparse_matrix_t csrA, csrP, csrC; 794 PetscBool set, flag; 795 struct matrix_descr descr_type_sym; 796 PetscObjectState state; 797 798 PetscFunctionBegin; 799 PetscCall(MatIsSymmetricKnown(A, &set, &flag)); 800 PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric"); 801 802 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 803 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 804 PetscCall(PetscObjectStateGet((PetscObject)P, &state)); 805 if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 806 csrA = a->csrA; 807 csrP = p->csrA; 808 csrC = c->csrA; 809 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 810 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 811 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 812 813 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 814 PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT); 815 816 /* Update the PETSc AIJ representation for matrix C from contents of MKL handle. 817 * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix, 818 * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix. 819 * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY 820 * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might 821 * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output 822 * the full matrix. */ 823 PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 824 PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct)); 825 PetscCall(MatCreateVecs(C, &zeros, NULL)); 826 PetscCall(VecSetFromOptions(zeros)); 827 PetscCall(VecZeroEntries(zeros)); 828 PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES)); 829 PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN)); 830 /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */ 831 PetscCall(MatProductCreateWithMat(A, P, PETSC_NULL, C)); 832 PetscCall(MatProductSetType(C, MATPRODUCT_PtAP)); 833 PetscCall(MatSeqAIJMKL_create_mkl_handle(C)); 834 PetscCall(VecDestroy(&zeros)); 835 PetscCall(MatDestroy(&Ct)); 836 PetscFunctionReturn(0); 837 } 838 839 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) { 840 Mat_Product *product = C->product; 841 Mat A = product->A, P = product->B; 842 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr; 843 sparse_matrix_t csrA, csrP, csrC; 844 struct matrix_descr descr_type_sym; 845 PetscObjectState state; 846 847 PetscFunctionBegin; 848 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 849 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 850 PetscCall(PetscObjectStateGet((PetscObject)P, &state)); 851 if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 852 csrA = a->csrA; 853 csrP = p->csrA; 854 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 855 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 856 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 857 858 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 859 if (csrP && csrA) { 860 PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL); 861 } else { 862 csrC = PETSC_NULL; 863 } 864 865 /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle. 866 * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain 867 * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that 868 * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this 869 * is guaranteed. */ 870 PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C)); 871 872 C->ops->productnumeric = MatProductNumeric_PtAP; 873 PetscFunctionReturn(0); 874 } 875 876 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) { 877 PetscFunctionBegin; 878 C->ops->productsymbolic = MatProductSymbolic_AB; 879 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 880 PetscFunctionReturn(0); 881 } 882 883 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) { 884 PetscFunctionBegin; 885 C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL; 886 PetscFunctionReturn(0); 887 } 888 889 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) { 890 PetscFunctionBegin; 891 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 892 C->ops->productsymbolic = MatProductSymbolic_ABt; 893 PetscFunctionReturn(0); 894 } 895 896 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) { 897 Mat_Product *product = C->product; 898 Mat A = product->A; 899 PetscBool set, flag; 900 901 PetscFunctionBegin; 902 if (PetscDefined(USE_COMPLEX)) { 903 /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used. 904 * We do this in several other locations in this file. This works for the time being, but these 905 * routines are considered unsafe and may be removed from the MatProduct code in the future. 906 * TODO: Add proper MATSEQAIJMKL implementations */ 907 C->ops->productsymbolic = NULL; 908 } else { 909 /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */ 910 PetscCall(MatIsSymmetricKnown(A, &set, &flag)); 911 if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 912 else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 913 /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(), 914 * depending on whether the algorithm for the general case vs. the real symmetric one is used. */ 915 } 916 PetscFunctionReturn(0); 917 } 918 919 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) { 920 PetscFunctionBegin; 921 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 922 PetscFunctionReturn(0); 923 } 924 925 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) { 926 PetscFunctionBegin; 927 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 928 PetscFunctionReturn(0); 929 } 930 931 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) { 932 Mat_Product *product = C->product; 933 934 PetscFunctionBegin; 935 switch (product->type) { 936 case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C)); break; 937 case MATPRODUCT_AtB: PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C)); break; 938 case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C)); break; 939 case MATPRODUCT_PtAP: PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C)); break; 940 case MATPRODUCT_RARt: PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C)); break; 941 case MATPRODUCT_ABC: PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C)); break; 942 default: break; 943 } 944 PetscFunctionReturn(0); 945 } 946 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 947 /* ------------------------ End MatProduct code ------------------------ */ 948 949 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 950 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 951 * routine, but can also be used to convert an assembled SeqAIJ matrix 952 * into a SeqAIJMKL one. */ 953 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat) { 954 Mat B = *newmat; 955 Mat_SeqAIJMKL *aijmkl; 956 PetscBool set; 957 PetscBool sametype; 958 959 PetscFunctionBegin; 960 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 961 962 PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype)); 963 if (sametype) PetscFunctionReturn(0); 964 965 PetscCall(PetscNewLog(B, &aijmkl)); 966 B->spptr = (void *)aijmkl; 967 968 /* Set function pointers for methods that we inherit from AIJ but override. 969 * We also parse some command line options below, since those determine some of the methods we point to. */ 970 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 971 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 972 B->ops->destroy = MatDestroy_SeqAIJMKL; 973 974 aijmkl->sparse_optimized = PETSC_FALSE; 975 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 976 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 977 #else 978 aijmkl->no_SpMV2 = PETSC_TRUE; 979 #endif 980 aijmkl->eager_inspection = PETSC_FALSE; 981 982 /* Parse command line options. */ 983 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat"); 984 PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set)); 985 PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set)); 986 PetscOptionsEnd(); 987 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 988 if (!aijmkl->no_SpMV2) { 989 PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n")); 990 aijmkl->no_SpMV2 = PETSC_TRUE; 991 } 992 #endif 993 994 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 995 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 996 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 997 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 998 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 999 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1000 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL; 1001 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1002 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1003 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL; 1004 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1005 #if !defined(PETSC_USE_COMPLEX) 1006 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1007 #else 1008 B->ops->ptapnumeric = NULL; 1009 #endif 1010 #endif 1011 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1012 1013 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1014 /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1015 * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1016 * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1017 * versions in which the older interface has not been deprecated, we use the old interface. */ 1018 if (aijmkl->no_SpMV2) { 1019 B->ops->mult = MatMult_SeqAIJMKL; 1020 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 1021 B->ops->multadd = MatMultAdd_SeqAIJMKL; 1022 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1023 } 1024 #endif 1025 1026 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ)); 1027 1028 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL)); 1029 *newmat = B; 1030 PetscFunctionReturn(0); 1031 } 1032 1033 /*@C 1034 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 1035 This type inherits from AIJ and is largely identical, but uses sparse BLAS 1036 routines from Intel MKL whenever possible. 1037 If the installed version of MKL supports the "SpMV2" sparse 1038 inspector-executor routines, then those are used by default. 1039 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 1040 symmetric A) operations are currently supported. 1041 Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 1042 1043 Collective 1044 1045 Input Parameters: 1046 + comm - MPI communicator, set to PETSC_COMM_SELF 1047 . m - number of rows 1048 . n - number of columns 1049 . nz - number of nonzeros per row (same for all rows) 1050 - nnz - array containing the number of nonzeros in the various rows 1051 (possibly different for each row) or NULL 1052 1053 Output Parameter: 1054 . A - the matrix 1055 1056 Options Database Keys: 1057 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1058 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 1059 1060 Notes: 1061 If nnz is given then nz is ignored 1062 1063 Level: intermediate 1064 1065 .seealso: `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()` 1066 @*/ 1067 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) { 1068 PetscFunctionBegin; 1069 PetscCall(MatCreate(comm, A)); 1070 PetscCall(MatSetSizes(*A, m, n, m, n)); 1071 PetscCall(MatSetType(*A, MATSEQAIJMKL)); 1072 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz)); 1073 PetscFunctionReturn(0); 1074 } 1075 1076 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) { 1077 PetscFunctionBegin; 1078 PetscCall(MatSetType(A, MATSEQAIJ)); 1079 PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A)); 1080 PetscFunctionReturn(0); 1081 } 1082