1 2 /* 3 Defines basic operations for the MATSEQAIJMKL matrix class. 4 This class is derived from the MATSEQAIJ class and retains the 5 compressed row storage (aka Yale sparse matrix format) but uses 6 sparse BLAS operations from the Intel Math Kernel Library (MKL) 7 wherever possible. 8 */ 9 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12 #include <mkl_spblas.h> 13 14 typedef struct { 15 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 16 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 PetscObjectState state; 19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 28 { 29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 30 /* so we will ignore 'MatType type'. */ 31 Mat B = *newmat; 32 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 33 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 34 #endif 35 36 PetscFunctionBegin; 37 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B)); 38 39 /* Reset the original function pointers. */ 40 B->ops->duplicate = MatDuplicate_SeqAIJ; 41 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 42 B->ops->destroy = MatDestroy_SeqAIJ; 43 B->ops->mult = MatMult_SeqAIJ; 44 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 45 B->ops->multadd = MatMultAdd_SeqAIJ; 46 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 47 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ; 48 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 49 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 50 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 51 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; 52 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 53 54 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL)); 55 56 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 57 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 58 * simply involves destroying the MKL sparse matrix handle and then freeing 59 * the spptr pointer. */ 60 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 61 62 if (aijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA); 63 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 64 PetscCall(PetscFree(B->spptr)); 65 66 /* Change the type of B to MATSEQAIJ. */ 67 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ)); 68 69 *newmat = B; 70 PetscFunctionReturn(0); 71 } 72 73 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 74 { 75 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 76 77 PetscFunctionBegin; 78 79 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */ 80 if (aijmkl) { 81 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 82 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 83 if (aijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA); 84 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 85 PetscCall(PetscFree(A->spptr)); 86 } 87 88 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 89 * to destroy everything that remains. */ 90 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ)); 91 /* Note that I don't call MatSetType(). I believe this is because that 92 * is only to be called when *building* a matrix. I could be wrong, but 93 * that is how things work for the SuperLU matrix class. */ 94 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaijmkl_seqaij_C",NULL)); 95 PetscCall(MatDestroy_SeqAIJ(A)); 96 PetscFunctionReturn(0); 97 } 98 99 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 100 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 101 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 102 * handle, creates a new one, and then calls mkl_sparse_optimize(). 103 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 104 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 105 * an unoptimized matrix handle here. */ 106 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 107 { 108 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 109 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 110 * does nothing. We make it callable anyway in this case because it cuts 111 * down on littering the code with #ifdefs. */ 112 PetscFunctionBegin; 113 PetscFunctionReturn(0); 114 #else 115 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 116 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 117 PetscInt m,n; 118 MatScalar *aa; 119 PetscInt *aj,*ai; 120 121 PetscFunctionBegin; 122 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 123 /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 124 * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 125 * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 126 * mkl_sparse_optimize() later. */ 127 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 128 #endif 129 130 if (aijmkl->sparse_optimized) { 131 /* Matrix has been previously assembled and optimized. Must destroy old 132 * matrix handle before running the optimization step again. */ 133 PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA); 134 } 135 aijmkl->sparse_optimized = PETSC_FALSE; 136 137 /* Now perform the SpMV2 setup and matrix optimization. */ 138 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 139 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 140 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 141 m = A->rmap->n; 142 n = A->cmap->n; 143 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 144 aa = a->a; /* Nonzero elements stored row-by-row. */ 145 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 146 if (a->nz && aa && !A->structure_only) { 147 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 148 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 149 PetscStackCallStandard(mkl_sparse_x_create_csr,&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 150 PetscStackCallStandard(mkl_sparse_set_mv_hint,aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 151 PetscStackCallStandard(mkl_sparse_set_memory_hint,aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 152 if (!aijmkl->no_SpMV2) { 153 PetscStackCallStandard(mkl_sparse_optimize,aijmkl->csrA); 154 } 155 aijmkl->sparse_optimized = PETSC_TRUE; 156 PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state))); 157 } else { 158 aijmkl->csrA = PETSC_NULL; 159 } 160 161 PetscFunctionReturn(0); 162 #endif 163 } 164 165 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 166 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */ 167 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A) 168 { 169 sparse_index_base_t indexing; 170 PetscInt m,n; 171 PetscInt *aj,*ai,*dummy; 172 MatScalar *aa; 173 Mat_SeqAIJMKL *aijmkl; 174 175 PetscFunctionBegin; 176 if (csrA) { 177 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 178 PetscStackCallStandard(mkl_sparse_x_export_csr,csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa); 179 PetscCheck((m == nrows) && (n == ncols),PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()"); 180 } else { 181 aj = ai = PETSC_NULL; 182 aa = PETSC_NULL; 183 } 184 185 PetscCall(MatSetType(A,MATSEQAIJ)); 186 PetscCall(MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols)); 187 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 188 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 189 * they will be destroyed when the MKL handle is destroyed. 190 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 191 if (csrA) { 192 PetscCall(MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL)); 193 } else { 194 /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */ 195 PetscCall(MatSetUp(A)); 196 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 197 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 198 } 199 200 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 201 * Now turn it into a MATSEQAIJMKL. */ 202 PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A)); 203 204 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 205 aijmkl->csrA = csrA; 206 207 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 208 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 209 * and just need to be able to run the MKL optimization step. */ 210 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 211 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 212 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 213 if (csrA) { 214 PetscStackCallStandard(mkl_sparse_set_mv_hint,aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 215 PetscStackCallStandard(mkl_sparse_set_memory_hint,aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 216 } 217 PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state))); 218 PetscFunctionReturn(0); 219 } 220 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 221 222 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 223 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 224 * MatMatMultNumeric(). */ 225 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 226 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 227 { 228 PetscInt i; 229 PetscInt nrows,ncols; 230 PetscInt nz; 231 PetscInt *ai,*aj,*dummy; 232 PetscScalar *aa; 233 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 234 sparse_index_base_t indexing; 235 236 PetscFunctionBegin; 237 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 238 if (!aijmkl->csrA) PetscFunctionReturn(0); 239 240 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 241 PetscStackCallStandard(mkl_sparse_x_export_csr,aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 242 243 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 244 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 245 for (i=0; i<nrows; i++) { 246 nz = ai[i+1] - ai[i]; 247 PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES)); 248 } 249 250 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 251 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 252 253 PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state))); 254 /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation. 255 * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed 256 * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */ 257 aijmkl->sparse_optimized = PETSC_FALSE; 258 PetscFunctionReturn(0); 259 } 260 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 261 262 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 263 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer) 264 { 265 PetscInt i,j,k; 266 PetscInt nrows,ncols; 267 PetscInt nz; 268 PetscInt *ai,*aj,*dummy; 269 PetscScalar *aa; 270 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 271 sparse_index_base_t indexing; 272 273 PetscFunctionBegin; 274 PetscCall(PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n")); 275 276 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 277 if (!aijmkl->csrA) { 278 PetscCall(PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n")); 279 PetscFunctionReturn(0); 280 } 281 282 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 283 PetscStackCallStandard(mkl_sparse_x_export_csr,aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 284 285 k = 0; 286 for (i=0; i<nrows; i++) { 287 PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ": ",i)); 288 nz = ai[i+1] - ai[i]; 289 for (j=0; j<nz; j++) { 290 if (aa) { 291 PetscCall(PetscViewerASCIIPrintf(viewer,"(%" PetscInt_FMT ", %g) ",aj[k],PetscRealPart(aa[k]))); 292 } else { 293 PetscCall(PetscViewerASCIIPrintf(viewer,"(%" PetscInt_FMT ", NULL)",aj[k])); 294 } 295 k++; 296 } 297 PetscCall(PetscViewerASCIIPrintf(viewer,"\n")); 298 } 299 PetscFunctionReturn(0); 300 } 301 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 302 303 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 304 { 305 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 306 Mat_SeqAIJMKL *aijmkl_dest; 307 308 PetscFunctionBegin; 309 PetscCall(MatDuplicate_SeqAIJ(A,op,M)); 310 aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr; 311 PetscCall(PetscArraycpy(aijmkl_dest,aijmkl,1)); 312 aijmkl_dest->sparse_optimized = PETSC_FALSE; 313 if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 314 PetscFunctionReturn(0); 315 } 316 317 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 318 { 319 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 320 Mat_SeqAIJMKL *aijmkl; 321 322 PetscFunctionBegin; 323 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 324 325 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 326 * extra information and some different methods, call the AssemblyEnd 327 * routine for a MATSEQAIJ. 328 * I'm not sure if this is the best way to do this, but it avoids 329 * a lot of code duplication. */ 330 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 331 PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 332 333 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 334 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 335 aijmkl = (Mat_SeqAIJMKL*)A->spptr; 336 if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 337 338 PetscFunctionReturn(0); 339 } 340 341 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 342 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 343 { 344 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 345 const PetscScalar *x; 346 PetscScalar *y; 347 const MatScalar *aa; 348 PetscInt m = A->rmap->n; 349 PetscInt n = A->cmap->n; 350 PetscScalar alpha = 1.0; 351 PetscScalar beta = 0.0; 352 const PetscInt *aj,*ai; 353 char matdescra[6]; 354 355 /* Variables not in MatMult_SeqAIJ. */ 356 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 357 358 PetscFunctionBegin; 359 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 360 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 361 PetscCall(VecGetArrayRead(xx,&x)); 362 PetscCall(VecGetArray(yy,&y)); 363 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 364 aa = a->a; /* Nonzero elements stored row-by-row. */ 365 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 366 367 /* Call MKL sparse BLAS routine to do the MatMult. */ 368 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 369 370 PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt)); 371 PetscCall(VecRestoreArrayRead(xx,&x)); 372 PetscCall(VecRestoreArray(yy,&y)); 373 PetscFunctionReturn(0); 374 } 375 #endif 376 377 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 378 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 379 { 380 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 381 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 382 const PetscScalar *x; 383 PetscScalar *y; 384 PetscObjectState state; 385 386 PetscFunctionBegin; 387 388 /* If there are no nonzero entries, zero yy and return immediately. */ 389 if (!a->nz) { 390 PetscCall(VecGetArray(yy,&y)); 391 PetscCall(PetscArrayzero(y,A->rmap->n)); 392 PetscCall(VecRestoreArray(yy,&y)); 393 PetscFunctionReturn(0); 394 } 395 396 PetscCall(VecGetArrayRead(xx,&x)); 397 PetscCall(VecGetArray(yy,&y)); 398 399 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 400 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 401 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 402 PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 403 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 404 405 /* Call MKL SpMV2 executor routine to do the MatMult. */ 406 PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 407 408 PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt)); 409 PetscCall(VecRestoreArrayRead(xx,&x)); 410 PetscCall(VecRestoreArray(yy,&y)); 411 PetscFunctionReturn(0); 412 } 413 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 414 415 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 416 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 417 { 418 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 419 const PetscScalar *x; 420 PetscScalar *y; 421 const MatScalar *aa; 422 PetscInt m = A->rmap->n; 423 PetscInt n = A->cmap->n; 424 PetscScalar alpha = 1.0; 425 PetscScalar beta = 0.0; 426 const PetscInt *aj,*ai; 427 char matdescra[6]; 428 429 /* Variables not in MatMultTranspose_SeqAIJ. */ 430 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 431 432 PetscFunctionBegin; 433 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 434 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 435 PetscCall(VecGetArrayRead(xx,&x)); 436 PetscCall(VecGetArray(yy,&y)); 437 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 438 aa = a->a; /* Nonzero elements stored row-by-row. */ 439 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 440 441 /* Call MKL sparse BLAS routine to do the MatMult. */ 442 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 443 444 PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt)); 445 PetscCall(VecRestoreArrayRead(xx,&x)); 446 PetscCall(VecRestoreArray(yy,&y)); 447 PetscFunctionReturn(0); 448 } 449 #endif 450 451 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 452 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 453 { 454 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 455 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 456 const PetscScalar *x; 457 PetscScalar *y; 458 PetscObjectState state; 459 460 PetscFunctionBegin; 461 462 /* If there are no nonzero entries, zero yy and return immediately. */ 463 if (!a->nz) { 464 PetscCall(VecGetArray(yy,&y)); 465 PetscCall(PetscArrayzero(y,A->cmap->n)); 466 PetscCall(VecRestoreArray(yy,&y)); 467 PetscFunctionReturn(0); 468 } 469 470 PetscCall(VecGetArrayRead(xx,&x)); 471 PetscCall(VecGetArray(yy,&y)); 472 473 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 474 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 475 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 476 PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 477 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 478 479 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 480 PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 481 482 PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt)); 483 PetscCall(VecRestoreArrayRead(xx,&x)); 484 PetscCall(VecRestoreArray(yy,&y)); 485 PetscFunctionReturn(0); 486 } 487 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 488 489 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 490 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 491 { 492 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 493 const PetscScalar *x; 494 PetscScalar *y,*z; 495 const MatScalar *aa; 496 PetscInt m = A->rmap->n; 497 PetscInt n = A->cmap->n; 498 const PetscInt *aj,*ai; 499 PetscInt i; 500 501 /* Variables not in MatMultAdd_SeqAIJ. */ 502 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 503 PetscScalar alpha = 1.0; 504 PetscScalar beta; 505 char matdescra[6]; 506 507 PetscFunctionBegin; 508 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 509 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 510 511 PetscCall(VecGetArrayRead(xx,&x)); 512 PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 513 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 514 aa = a->a; /* Nonzero elements stored row-by-row. */ 515 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 516 517 /* Call MKL sparse BLAS routine to do the MatMult. */ 518 if (zz == yy) { 519 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 520 beta = 1.0; 521 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 522 } else { 523 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 524 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 525 beta = 0.0; 526 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 527 for (i=0; i<m; i++) { 528 z[i] += y[i]; 529 } 530 } 531 532 PetscCall(PetscLogFlops(2.0*a->nz)); 533 PetscCall(VecRestoreArrayRead(xx,&x)); 534 PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 535 PetscFunctionReturn(0); 536 } 537 #endif 538 539 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 540 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 541 { 542 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 543 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 544 const PetscScalar *x; 545 PetscScalar *y,*z; 546 PetscInt m = A->rmap->n; 547 PetscInt i; 548 549 /* Variables not in MatMultAdd_SeqAIJ. */ 550 PetscObjectState state; 551 552 PetscFunctionBegin; 553 554 /* If there are no nonzero entries, set zz = yy and return immediately. */ 555 if (!a->nz) { 556 PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 557 PetscCall(PetscArraycpy(z,y,m)); 558 PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 559 PetscFunctionReturn(0); 560 } 561 562 PetscCall(VecGetArrayRead(xx,&x)); 563 PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 564 565 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 566 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 567 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 568 PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 569 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 570 571 /* Call MKL sparse BLAS routine to do the MatMult. */ 572 if (zz == yy) { 573 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 574 * with alpha and beta both set to 1.0. */ 575 PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 576 } else { 577 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 578 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 579 PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 580 for (i=0; i<m; i++) z[i] += y[i]; 581 } 582 583 PetscCall(PetscLogFlops(2.0*a->nz)); 584 PetscCall(VecRestoreArrayRead(xx,&x)); 585 PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 586 PetscFunctionReturn(0); 587 } 588 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 589 590 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 591 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 592 { 593 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 594 const PetscScalar *x; 595 PetscScalar *y,*z; 596 const MatScalar *aa; 597 PetscInt m = A->rmap->n; 598 PetscInt n = A->cmap->n; 599 const PetscInt *aj,*ai; 600 PetscInt i; 601 602 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 603 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 604 PetscScalar alpha = 1.0; 605 PetscScalar beta; 606 char matdescra[6]; 607 608 PetscFunctionBegin; 609 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 610 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 611 612 PetscCall(VecGetArrayRead(xx,&x)); 613 PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 614 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 615 aa = a->a; /* Nonzero elements stored row-by-row. */ 616 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 617 618 /* Call MKL sparse BLAS routine to do the MatMult. */ 619 if (zz == yy) { 620 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 621 beta = 1.0; 622 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 623 } else { 624 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 625 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 626 beta = 0.0; 627 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 628 for (i=0; i<n; i++) { 629 z[i] += y[i]; 630 } 631 } 632 633 PetscCall(PetscLogFlops(2.0*a->nz)); 634 PetscCall(VecRestoreArrayRead(xx,&x)); 635 PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 636 PetscFunctionReturn(0); 637 } 638 #endif 639 640 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 641 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 642 { 643 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 644 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 645 const PetscScalar *x; 646 PetscScalar *y,*z; 647 PetscInt n = A->cmap->n; 648 PetscInt i; 649 PetscObjectState state; 650 651 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 652 653 PetscFunctionBegin; 654 655 /* If there are no nonzero entries, set zz = yy and return immediately. */ 656 if (!a->nz) { 657 PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 658 PetscCall(PetscArraycpy(z,y,n)); 659 PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 660 PetscFunctionReturn(0); 661 } 662 663 PetscCall(VecGetArrayRead(xx,&x)); 664 PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 665 666 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 667 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 668 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 669 PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 670 if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A); 671 672 /* Call MKL sparse BLAS routine to do the MatMult. */ 673 if (zz == yy) { 674 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 675 * with alpha and beta both set to 1.0. */ 676 PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 677 } else { 678 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 679 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 680 PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 681 for (i=0; i<n; i++) z[i] += y[i]; 682 } 683 684 PetscCall(PetscLogFlops(2.0*a->nz)); 685 PetscCall(VecRestoreArrayRead(xx,&x)); 686 PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 687 PetscFunctionReturn(0); 688 } 689 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 690 691 /* -------------------------- MatProduct code -------------------------- */ 692 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 693 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 694 { 695 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr; 696 sparse_matrix_t csrA,csrB,csrC; 697 PetscInt nrows,ncols; 698 struct matrix_descr descr_type_gen; 699 PetscObjectState state; 700 701 PetscFunctionBegin; 702 /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does 703 * not handle sparse matrices with zero rows or columns. */ 704 if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N; 705 else nrows = A->cmap->N; 706 if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N; 707 else ncols = B->rmap->N; 708 709 PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 710 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 711 PetscCall(PetscObjectStateGet((PetscObject)B,&state)); 712 if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 713 csrA = a->csrA; 714 csrB = b->csrA; 715 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 716 717 if (csrA && csrB) { 718 PetscStackCallStandard(mkl_sparse_sp2m,transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC); 719 } else { 720 csrC = PETSC_NULL; 721 } 722 723 PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C)); 724 725 PetscFunctionReturn(0); 726 } 727 728 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 729 { 730 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 731 sparse_matrix_t csrA, csrB, csrC; 732 struct matrix_descr descr_type_gen; 733 PetscObjectState state; 734 735 PetscFunctionBegin; 736 PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 737 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 738 PetscCall(PetscObjectStateGet((PetscObject)B,&state)); 739 if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 740 csrA = a->csrA; 741 csrB = b->csrA; 742 csrC = c->csrA; 743 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 744 745 if (csrA && csrB) { 746 PetscStackCallStandard(mkl_sparse_sp2m,transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC); 747 } else { 748 csrC = PETSC_NULL; 749 } 750 751 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 752 PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 753 754 PetscFunctionReturn(0); 755 } 756 757 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 758 { 759 PetscFunctionBegin; 760 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C)); 761 PetscFunctionReturn(0); 762 } 763 764 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 765 { 766 PetscFunctionBegin; 767 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C)); 768 PetscFunctionReturn(0); 769 } 770 771 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 772 { 773 PetscFunctionBegin; 774 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C)); 775 PetscFunctionReturn(0); 776 } 777 778 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 779 { 780 PetscFunctionBegin; 781 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C)); 782 PetscFunctionReturn(0); 783 } 784 785 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 786 { 787 PetscFunctionBegin; 788 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C)); 789 PetscFunctionReturn(0); 790 } 791 792 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 793 { 794 PetscFunctionBegin; 795 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C)); 796 PetscFunctionReturn(0); 797 } 798 799 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 800 { 801 Mat_Product *product = C->product; 802 Mat A = product->A,B = product->B; 803 804 PetscFunctionBegin; 805 PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C)); 806 PetscFunctionReturn(0); 807 } 808 809 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 810 { 811 Mat_Product *product = C->product; 812 Mat A = product->A,B = product->B; 813 PetscReal fill = product->fill; 814 815 PetscFunctionBegin; 816 PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C)); 817 C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL; 818 PetscFunctionReturn(0); 819 } 820 821 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C) 822 { 823 Mat Ct; 824 Vec zeros; 825 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 826 sparse_matrix_t csrA, csrP, csrC; 827 PetscBool set, flag; 828 struct matrix_descr descr_type_sym; 829 PetscObjectState state; 830 831 PetscFunctionBegin; 832 PetscCall(MatIsSymmetricKnown(A,&set,&flag)); 833 PetscCheck(set && flag,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric"); 834 835 PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 836 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 837 PetscCall(PetscObjectStateGet((PetscObject)P,&state)); 838 if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 839 csrA = a->csrA; 840 csrP = p->csrA; 841 csrC = c->csrA; 842 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 843 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 844 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 845 846 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 847 PetscStackCallStandard(mkl_sparse_sypr,SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT); 848 849 /* Update the PETSc AIJ representation for matrix C from contents of MKL handle. 850 * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix, 851 * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix. 852 * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY 853 * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might 854 * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output 855 * the full matrix. */ 856 PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 857 PetscCall(MatTranspose(C,MAT_INITIAL_MATRIX,&Ct)); 858 PetscCall(MatCreateVecs(C,&zeros,NULL)); 859 PetscCall(VecSetFromOptions(zeros)); 860 PetscCall(VecZeroEntries(zeros)); 861 PetscCall(MatDiagonalSet(Ct,zeros,INSERT_VALUES)); 862 PetscCall(MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN)); 863 /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */ 864 PetscCall(MatProductCreateWithMat(A,P,PETSC_NULL,C)); 865 PetscCall(MatProductSetType(C,MATPRODUCT_PtAP)); 866 PetscCall(MatSeqAIJMKL_create_mkl_handle(C)); 867 PetscCall(VecDestroy(&zeros)); 868 PetscCall(MatDestroy(&Ct)); 869 PetscFunctionReturn(0); 870 } 871 872 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) 873 { 874 Mat_Product *product = C->product; 875 Mat A = product->A,P = product->B; 876 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr; 877 sparse_matrix_t csrA,csrP,csrC; 878 struct matrix_descr descr_type_sym; 879 PetscObjectState state; 880 881 PetscFunctionBegin; 882 PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 883 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 884 PetscCall(PetscObjectStateGet((PetscObject)P,&state)); 885 if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 886 csrA = a->csrA; 887 csrP = p->csrA; 888 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 889 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 890 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 891 892 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 893 if (csrP && csrA) { 894 PetscStackCallStandard(mkl_sparse_sypr,SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL); 895 } else { 896 csrC = PETSC_NULL; 897 } 898 899 /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle. 900 * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain 901 * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that 902 * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this 903 * is guaranteed. */ 904 PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C)); 905 906 C->ops->productnumeric = MatProductNumeric_PtAP; 907 PetscFunctionReturn(0); 908 } 909 910 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) 911 { 912 PetscFunctionBegin; 913 C->ops->productsymbolic = MatProductSymbolic_AB; 914 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 915 PetscFunctionReturn(0); 916 } 917 918 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) 919 { 920 PetscFunctionBegin; 921 C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL; 922 PetscFunctionReturn(0); 923 } 924 925 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) 926 { 927 PetscFunctionBegin; 928 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 929 C->ops->productsymbolic = MatProductSymbolic_ABt; 930 PetscFunctionReturn(0); 931 } 932 933 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) 934 { 935 Mat_Product *product = C->product; 936 Mat A = product->A; 937 PetscBool set, flag; 938 939 PetscFunctionBegin; 940 if (PetscDefined(USE_COMPLEX)) { 941 /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used. 942 * We do this in several other locations in this file. This works for the time being, but these 943 * routines are considered unsafe and may be removed from the MatProduct code in the future. 944 * TODO: Add proper MATSEQAIJMKL implementations */ 945 C->ops->productsymbolic = NULL; 946 } else { 947 /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */ 948 PetscCall(MatIsSymmetricKnown(A,&set,&flag)); 949 if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 950 else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 951 /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(), 952 * depending on whether the algorithm for the general case vs. the real symmetric one is used. */ 953 } 954 PetscFunctionReturn(0); 955 } 956 957 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) 958 { 959 PetscFunctionBegin; 960 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 961 PetscFunctionReturn(0); 962 } 963 964 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) 965 { 966 PetscFunctionBegin; 967 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 968 PetscFunctionReturn(0); 969 } 970 971 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) 972 { 973 Mat_Product *product = C->product; 974 975 PetscFunctionBegin; 976 switch (product->type) { 977 case MATPRODUCT_AB: 978 PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C)); 979 break; 980 case MATPRODUCT_AtB: 981 PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C)); 982 break; 983 case MATPRODUCT_ABt: 984 PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C)); 985 break; 986 case MATPRODUCT_PtAP: 987 PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C)); 988 break; 989 case MATPRODUCT_RARt: 990 PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C)); 991 break; 992 case MATPRODUCT_ABC: 993 PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C)); 994 break; 995 default: 996 break; 997 } 998 PetscFunctionReturn(0); 999 } 1000 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 1001 /* ------------------------ End MatProduct code ------------------------ */ 1002 1003 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 1004 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 1005 * routine, but can also be used to convert an assembled SeqAIJ matrix 1006 * into a SeqAIJMKL one. */ 1007 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 1008 { 1009 Mat B = *newmat; 1010 Mat_SeqAIJMKL *aijmkl; 1011 PetscBool set; 1012 PetscBool sametype; 1013 1014 PetscFunctionBegin; 1015 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B)); 1016 1017 PetscCall(PetscObjectTypeCompare((PetscObject)A,type,&sametype)); 1018 if (sametype) PetscFunctionReturn(0); 1019 1020 PetscCall(PetscNewLog(B,&aijmkl)); 1021 B->spptr = (void*) aijmkl; 1022 1023 /* Set function pointers for methods that we inherit from AIJ but override. 1024 * We also parse some command line options below, since those determine some of the methods we point to. */ 1025 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 1026 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 1027 B->ops->destroy = MatDestroy_SeqAIJMKL; 1028 1029 aijmkl->sparse_optimized = PETSC_FALSE; 1030 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1031 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 1032 #else 1033 aijmkl->no_SpMV2 = PETSC_TRUE; 1034 #endif 1035 aijmkl->eager_inspection = PETSC_FALSE; 1036 1037 /* Parse command line options. */ 1038 PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat"); 1039 PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set)); 1040 PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set)); 1041 PetscOptionsEnd(); 1042 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1043 if (!aijmkl->no_SpMV2) { 1044 PetscCall(PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n")); 1045 aijmkl->no_SpMV2 = PETSC_TRUE; 1046 } 1047 #endif 1048 1049 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1050 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 1051 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 1052 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1053 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 1054 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1055 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL; 1056 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1057 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1058 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL; 1059 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1060 # if !defined(PETSC_USE_COMPLEX) 1061 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1062 # else 1063 B->ops->ptapnumeric = NULL; 1064 # endif 1065 # endif 1066 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1067 1068 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1069 /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1070 * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1071 * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1072 * versions in which the older interface has not been deprecated, we use the old interface. */ 1073 if (aijmkl->no_SpMV2) { 1074 B->ops->mult = MatMult_SeqAIJMKL; 1075 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 1076 B->ops->multadd = MatMultAdd_SeqAIJMKL; 1077 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1078 } 1079 #endif 1080 1081 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ)); 1082 1083 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL)); 1084 *newmat = B; 1085 PetscFunctionReturn(0); 1086 } 1087 1088 /*@C 1089 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 1090 This type inherits from AIJ and is largely identical, but uses sparse BLAS 1091 routines from Intel MKL whenever possible. 1092 If the installed version of MKL supports the "SpMV2" sparse 1093 inspector-executor routines, then those are used by default. 1094 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 1095 symmetric A) operations are currently supported. 1096 Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 1097 1098 Collective 1099 1100 Input Parameters: 1101 + comm - MPI communicator, set to PETSC_COMM_SELF 1102 . m - number of rows 1103 . n - number of columns 1104 . nz - number of nonzeros per row (same for all rows) 1105 - nnz - array containing the number of nonzeros in the various rows 1106 (possibly different for each row) or NULL 1107 1108 Output Parameter: 1109 . A - the matrix 1110 1111 Options Database Keys: 1112 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1113 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 1114 1115 Notes: 1116 If nnz is given then nz is ignored 1117 1118 Level: intermediate 1119 1120 .seealso: `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()` 1121 @*/ 1122 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1123 { 1124 PetscFunctionBegin; 1125 PetscCall(MatCreate(comm,A)); 1126 PetscCall(MatSetSizes(*A,m,n,m,n)); 1127 PetscCall(MatSetType(*A,MATSEQAIJMKL)); 1128 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz)); 1129 PetscFunctionReturn(0); 1130 } 1131 1132 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1133 { 1134 PetscFunctionBegin; 1135 PetscCall(MatSetType(A,MATSEQAIJ)); 1136 PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A)); 1137 PetscFunctionReturn(0); 1138 } 1139