1 2 /* 3 Defines basic operations for the MATSEQAIJMKL matrix class. 4 This class is derived from the MATSEQAIJ class and retains the 5 compressed row storage (aka Yale sparse matrix format) but uses 6 sparse BLAS operations from the Intel Math Kernel Library (MKL) 7 wherever possible. 8 */ 9 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12 #include <mkl_spblas.h> 13 14 typedef struct { 15 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 16 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 PetscObjectState state; 19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 28 { 29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 30 /* so we will ignore 'MatType type'. */ 31 Mat B = *newmat; 32 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 33 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 34 #endif 35 36 PetscFunctionBegin; 37 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B)); 38 39 /* Reset the original function pointers. */ 40 B->ops->duplicate = MatDuplicate_SeqAIJ; 41 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 42 B->ops->destroy = MatDestroy_SeqAIJ; 43 B->ops->mult = MatMult_SeqAIJ; 44 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 45 B->ops->multadd = MatMultAdd_SeqAIJ; 46 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 47 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ; 48 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 49 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 50 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 51 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; 52 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 53 54 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL)); 55 56 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 57 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 58 * simply involves destroying the MKL sparse matrix handle and then freeing 59 * the spptr pointer. */ 60 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 61 62 if (aijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA); 63 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 64 PetscCall(PetscFree(B->spptr)); 65 66 /* Change the type of B to MATSEQAIJ. */ 67 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ)); 68 69 *newmat = B; 70 PetscFunctionReturn(0); 71 } 72 73 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 74 { 75 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 76 77 PetscFunctionBegin; 78 79 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */ 80 if (aijmkl) { 81 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 82 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 83 if (aijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA); 84 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 85 PetscCall(PetscFree(A->spptr)); 86 } 87 88 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 89 * to destroy everything that remains. */ 90 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ)); 91 /* Note that I don't call MatSetType(). I believe this is because that 92 * is only to be called when *building* a matrix. I could be wrong, but 93 * that is how things work for the SuperLU matrix class. */ 94 PetscCall(MatDestroy_SeqAIJ(A)); 95 PetscFunctionReturn(0); 96 } 97 98 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 99 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 100 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 101 * handle, creates a new one, and then calls mkl_sparse_optimize(). 102 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 103 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 104 * an unoptimized matrix handle here. */ 105 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 106 { 107 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 108 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 109 * does nothing. We make it callable anyway in this case because it cuts 110 * down on littering the code with #ifdefs. */ 111 PetscFunctionBegin; 112 PetscFunctionReturn(0); 113 #else 114 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 115 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 116 PetscInt m,n; 117 MatScalar *aa; 118 PetscInt *aj,*ai; 119 120 PetscFunctionBegin; 121 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 122 /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 123 * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 124 * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 125 * mkl_sparse_optimize() later. */ 126 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 127 #endif 128 129 if (aijmkl->sparse_optimized) { 130 /* Matrix has been previously assembled and optimized. Must destroy old 131 * matrix handle before running the optimization step again. */ 132 PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA); 133 } 134 aijmkl->sparse_optimized = PETSC_FALSE; 135 136 /* Now perform the SpMV2 setup and matrix optimization. */ 137 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 138 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 139 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 140 m = A->rmap->n; 141 n = A->cmap->n; 142 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 143 aa = a->a; /* Nonzero elements stored row-by-row. */ 144 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 145 if (a->nz && aa && !A->structure_only) { 146 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 147 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 148 PetscStackCallStandard(mkl_sparse_x_create_csr,&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 149 PetscStackCallStandard(mkl_sparse_set_mv_hint,aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 150 PetscStackCallStandard(mkl_sparse_set_memory_hint,aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 151 if (!aijmkl->no_SpMV2) { 152 PetscStackCallStandard(mkl_sparse_optimize,aijmkl->csrA); 153 } 154 aijmkl->sparse_optimized = PETSC_TRUE; 155 PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state))); 156 } else { 157 aijmkl->csrA = PETSC_NULL; 158 } 159 160 PetscFunctionReturn(0); 161 #endif 162 } 163 164 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 165 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */ 166 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A) 167 { 168 sparse_index_base_t indexing; 169 PetscInt m,n; 170 PetscInt *aj,*ai,*dummy; 171 MatScalar *aa; 172 Mat_SeqAIJMKL *aijmkl; 173 174 PetscFunctionBegin; 175 if (csrA) { 176 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 177 PetscStackCallStandard(mkl_sparse_x_export_csr,csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa); 178 PetscCheck((m == nrows) && (n == ncols),PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()"); 179 } else { 180 aj = ai = PETSC_NULL; 181 aa = PETSC_NULL; 182 } 183 184 PetscCall(MatSetType(A,MATSEQAIJ)); 185 PetscCall(MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols)); 186 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 187 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 188 * they will be destroyed when the MKL handle is destroyed. 189 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 190 if (csrA) { 191 PetscCall(MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL)); 192 } else { 193 /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */ 194 PetscCall(MatSetUp(A)); 195 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 196 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 197 } 198 199 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 200 * Now turn it into a MATSEQAIJMKL. */ 201 PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A)); 202 203 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 204 aijmkl->csrA = csrA; 205 206 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 207 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 208 * and just need to be able to run the MKL optimization step. */ 209 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 210 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 211 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 212 if (csrA) { 213 PetscStackCallStandard(mkl_sparse_set_mv_hint,aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 214 PetscStackCallStandard(mkl_sparse_set_memory_hint,aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 215 } 216 PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state))); 217 PetscFunctionReturn(0); 218 } 219 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 220 221 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 222 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 223 * MatMatMultNumeric(). */ 224 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 225 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 226 { 227 PetscInt i; 228 PetscInt nrows,ncols; 229 PetscInt nz; 230 PetscInt *ai,*aj,*dummy; 231 PetscScalar *aa; 232 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 233 sparse_index_base_t indexing; 234 235 PetscFunctionBegin; 236 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 237 if (!aijmkl->csrA) PetscFunctionReturn(0); 238 239 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 240 PetscStackCallStandard(mkl_sparse_x_export_csr,aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 241 242 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 243 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 244 for (i=0; i<nrows; i++) { 245 nz = ai[i+1] - ai[i]; 246 PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES)); 247 } 248 249 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 250 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 251 252 PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state))); 253 /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation. 254 * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed 255 * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */ 256 aijmkl->sparse_optimized = PETSC_FALSE; 257 PetscFunctionReturn(0); 258 } 259 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 260 261 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 262 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer) 263 { 264 PetscInt i,j,k; 265 PetscInt nrows,ncols; 266 PetscInt nz; 267 PetscInt *ai,*aj,*dummy; 268 PetscScalar *aa; 269 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 270 sparse_index_base_t indexing; 271 272 PetscFunctionBegin; 273 PetscCall(PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n")); 274 275 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 276 if (!aijmkl->csrA) { 277 PetscCall(PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n")); 278 PetscFunctionReturn(0); 279 } 280 281 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 282 PetscStackCallStandard(mkl_sparse_x_export_csr,aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 283 284 k = 0; 285 for (i=0; i<nrows; i++) { 286 PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ": ",i)); 287 nz = ai[i+1] - ai[i]; 288 for (j=0; j<nz; j++) { 289 if (aa) { 290 PetscCall(PetscViewerASCIIPrintf(viewer,"(%" PetscInt_FMT ", %g) ",aj[k],PetscRealPart(aa[k]))); 291 } else { 292 PetscCall(PetscViewerASCIIPrintf(viewer,"(%" PetscInt_FMT ", NULL)",aj[k])); 293 } 294 k++; 295 } 296 PetscCall(PetscViewerASCIIPrintf(viewer,"\n")); 297 } 298 PetscFunctionReturn(0); 299 } 300 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 301 302 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 303 { 304 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 305 Mat_SeqAIJMKL *aijmkl_dest; 306 307 PetscFunctionBegin; 308 PetscCall(MatDuplicate_SeqAIJ(A,op,M)); 309 aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr; 310 PetscCall(PetscArraycpy(aijmkl_dest,aijmkl,1)); 311 aijmkl_dest->sparse_optimized = PETSC_FALSE; 312 if (aijmkl->eager_inspection) { 313 PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 314 } 315 PetscFunctionReturn(0); 316 } 317 318 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 319 { 320 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 321 Mat_SeqAIJMKL *aijmkl; 322 323 PetscFunctionBegin; 324 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 325 326 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 327 * extra information and some different methods, call the AssemblyEnd 328 * routine for a MATSEQAIJ. 329 * I'm not sure if this is the best way to do this, but it avoids 330 * a lot of code duplication. */ 331 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 332 PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 333 334 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 335 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 336 aijmkl = (Mat_SeqAIJMKL*)A->spptr; 337 if (aijmkl->eager_inspection) { 338 PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 339 } 340 341 PetscFunctionReturn(0); 342 } 343 344 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 345 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 346 { 347 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 348 const PetscScalar *x; 349 PetscScalar *y; 350 const MatScalar *aa; 351 PetscInt m = A->rmap->n; 352 PetscInt n = A->cmap->n; 353 PetscScalar alpha = 1.0; 354 PetscScalar beta = 0.0; 355 const PetscInt *aj,*ai; 356 char matdescra[6]; 357 358 /* Variables not in MatMult_SeqAIJ. */ 359 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 360 361 PetscFunctionBegin; 362 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 363 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 364 PetscCall(VecGetArrayRead(xx,&x)); 365 PetscCall(VecGetArray(yy,&y)); 366 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 367 aa = a->a; /* Nonzero elements stored row-by-row. */ 368 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 369 370 /* Call MKL sparse BLAS routine to do the MatMult. */ 371 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 372 373 PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt)); 374 PetscCall(VecRestoreArrayRead(xx,&x)); 375 PetscCall(VecRestoreArray(yy,&y)); 376 PetscFunctionReturn(0); 377 } 378 #endif 379 380 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 381 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 382 { 383 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 384 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 385 const PetscScalar *x; 386 PetscScalar *y; 387 PetscObjectState state; 388 389 PetscFunctionBegin; 390 391 /* If there are no nonzero entries, zero yy and return immediately. */ 392 if (!a->nz) { 393 PetscCall(VecGetArray(yy,&y)); 394 PetscCall(PetscArrayzero(y,A->rmap->n)); 395 PetscCall(VecRestoreArray(yy,&y)); 396 PetscFunctionReturn(0); 397 } 398 399 PetscCall(VecGetArrayRead(xx,&x)); 400 PetscCall(VecGetArray(yy,&y)); 401 402 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 403 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 404 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 405 PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 406 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 407 408 /* Call MKL SpMV2 executor routine to do the MatMult. */ 409 PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 410 411 PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt)); 412 PetscCall(VecRestoreArrayRead(xx,&x)); 413 PetscCall(VecRestoreArray(yy,&y)); 414 PetscFunctionReturn(0); 415 } 416 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 417 418 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 419 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 420 { 421 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 422 const PetscScalar *x; 423 PetscScalar *y; 424 const MatScalar *aa; 425 PetscInt m = A->rmap->n; 426 PetscInt n = A->cmap->n; 427 PetscScalar alpha = 1.0; 428 PetscScalar beta = 0.0; 429 const PetscInt *aj,*ai; 430 char matdescra[6]; 431 432 /* Variables not in MatMultTranspose_SeqAIJ. */ 433 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 434 435 PetscFunctionBegin; 436 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 437 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 438 PetscCall(VecGetArrayRead(xx,&x)); 439 PetscCall(VecGetArray(yy,&y)); 440 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 441 aa = a->a; /* Nonzero elements stored row-by-row. */ 442 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 443 444 /* Call MKL sparse BLAS routine to do the MatMult. */ 445 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 446 447 PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt)); 448 PetscCall(VecRestoreArrayRead(xx,&x)); 449 PetscCall(VecRestoreArray(yy,&y)); 450 PetscFunctionReturn(0); 451 } 452 #endif 453 454 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 455 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 456 { 457 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 458 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 459 const PetscScalar *x; 460 PetscScalar *y; 461 PetscObjectState state; 462 463 PetscFunctionBegin; 464 465 /* If there are no nonzero entries, zero yy and return immediately. */ 466 if (!a->nz) { 467 PetscCall(VecGetArray(yy,&y)); 468 PetscCall(PetscArrayzero(y,A->cmap->n)); 469 PetscCall(VecRestoreArray(yy,&y)); 470 PetscFunctionReturn(0); 471 } 472 473 PetscCall(VecGetArrayRead(xx,&x)); 474 PetscCall(VecGetArray(yy,&y)); 475 476 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 477 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 478 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 479 PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 480 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 481 482 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 483 PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 484 485 PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt)); 486 PetscCall(VecRestoreArrayRead(xx,&x)); 487 PetscCall(VecRestoreArray(yy,&y)); 488 PetscFunctionReturn(0); 489 } 490 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 491 492 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 493 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 494 { 495 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 496 const PetscScalar *x; 497 PetscScalar *y,*z; 498 const MatScalar *aa; 499 PetscInt m = A->rmap->n; 500 PetscInt n = A->cmap->n; 501 const PetscInt *aj,*ai; 502 PetscInt i; 503 504 /* Variables not in MatMultAdd_SeqAIJ. */ 505 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 506 PetscScalar alpha = 1.0; 507 PetscScalar beta; 508 char matdescra[6]; 509 510 PetscFunctionBegin; 511 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 512 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 513 514 PetscCall(VecGetArrayRead(xx,&x)); 515 PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 516 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 517 aa = a->a; /* Nonzero elements stored row-by-row. */ 518 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 519 520 /* Call MKL sparse BLAS routine to do the MatMult. */ 521 if (zz == yy) { 522 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 523 beta = 1.0; 524 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 525 } else { 526 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 527 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 528 beta = 0.0; 529 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 530 for (i=0; i<m; i++) { 531 z[i] += y[i]; 532 } 533 } 534 535 PetscCall(PetscLogFlops(2.0*a->nz)); 536 PetscCall(VecRestoreArrayRead(xx,&x)); 537 PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 538 PetscFunctionReturn(0); 539 } 540 #endif 541 542 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 543 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 544 { 545 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 546 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 547 const PetscScalar *x; 548 PetscScalar *y,*z; 549 PetscInt m = A->rmap->n; 550 PetscInt i; 551 552 /* Variables not in MatMultAdd_SeqAIJ. */ 553 PetscObjectState state; 554 555 PetscFunctionBegin; 556 557 /* If there are no nonzero entries, set zz = yy and return immediately. */ 558 if (!a->nz) { 559 PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 560 PetscCall(PetscArraycpy(z,y,m)); 561 PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 562 PetscFunctionReturn(0); 563 } 564 565 PetscCall(VecGetArrayRead(xx,&x)); 566 PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 567 568 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 569 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 570 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 571 PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 572 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 573 574 /* Call MKL sparse BLAS routine to do the MatMult. */ 575 if (zz == yy) { 576 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 577 * with alpha and beta both set to 1.0. */ 578 PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 579 } else { 580 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 581 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 582 PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 583 for (i=0; i<m; i++) z[i] += y[i]; 584 } 585 586 PetscCall(PetscLogFlops(2.0*a->nz)); 587 PetscCall(VecRestoreArrayRead(xx,&x)); 588 PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 589 PetscFunctionReturn(0); 590 } 591 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 592 593 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 594 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 595 { 596 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 597 const PetscScalar *x; 598 PetscScalar *y,*z; 599 const MatScalar *aa; 600 PetscInt m = A->rmap->n; 601 PetscInt n = A->cmap->n; 602 const PetscInt *aj,*ai; 603 PetscInt i; 604 605 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 606 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 607 PetscScalar alpha = 1.0; 608 PetscScalar beta; 609 char matdescra[6]; 610 611 PetscFunctionBegin; 612 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 613 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 614 615 PetscCall(VecGetArrayRead(xx,&x)); 616 PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 617 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 618 aa = a->a; /* Nonzero elements stored row-by-row. */ 619 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 620 621 /* Call MKL sparse BLAS routine to do the MatMult. */ 622 if (zz == yy) { 623 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 624 beta = 1.0; 625 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 626 } else { 627 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 628 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 629 beta = 0.0; 630 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 631 for (i=0; i<n; i++) { 632 z[i] += y[i]; 633 } 634 } 635 636 PetscCall(PetscLogFlops(2.0*a->nz)); 637 PetscCall(VecRestoreArrayRead(xx,&x)); 638 PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 639 PetscFunctionReturn(0); 640 } 641 #endif 642 643 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 644 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 645 { 646 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 647 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 648 const PetscScalar *x; 649 PetscScalar *y,*z; 650 PetscInt n = A->cmap->n; 651 PetscInt i; 652 PetscObjectState state; 653 654 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 655 656 PetscFunctionBegin; 657 658 /* If there are no nonzero entries, set zz = yy and return immediately. */ 659 if (!a->nz) { 660 PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 661 PetscCall(PetscArraycpy(z,y,n)); 662 PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 663 PetscFunctionReturn(0); 664 } 665 666 PetscCall(VecGetArrayRead(xx,&x)); 667 PetscCall(VecGetArrayPair(yy,zz,&y,&z)); 668 669 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 670 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 671 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 672 PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 673 if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A); 674 675 /* Call MKL sparse BLAS routine to do the MatMult. */ 676 if (zz == yy) { 677 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 678 * with alpha and beta both set to 1.0. */ 679 PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 680 } else { 681 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 682 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 683 PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 684 for (i=0; i<n; i++) z[i] += y[i]; 685 } 686 687 PetscCall(PetscLogFlops(2.0*a->nz)); 688 PetscCall(VecRestoreArrayRead(xx,&x)); 689 PetscCall(VecRestoreArrayPair(yy,zz,&y,&z)); 690 PetscFunctionReturn(0); 691 } 692 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 693 694 /* -------------------------- MatProduct code -------------------------- */ 695 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 696 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 697 { 698 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr; 699 sparse_matrix_t csrA,csrB,csrC; 700 PetscInt nrows,ncols; 701 struct matrix_descr descr_type_gen; 702 PetscObjectState state; 703 704 PetscFunctionBegin; 705 /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does 706 * not handle sparse matrices with zero rows or columns. */ 707 if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N; 708 else nrows = A->cmap->N; 709 if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N; 710 else ncols = B->rmap->N; 711 712 PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 713 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 714 PetscCall(PetscObjectStateGet((PetscObject)B,&state)); 715 if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 716 csrA = a->csrA; 717 csrB = b->csrA; 718 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 719 720 if (csrA && csrB) { 721 PetscStackCallStandard(mkl_sparse_sp2m,transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC); 722 } else { 723 csrC = PETSC_NULL; 724 } 725 726 PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C)); 727 728 PetscFunctionReturn(0); 729 } 730 731 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 732 { 733 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 734 sparse_matrix_t csrA, csrB, csrC; 735 struct matrix_descr descr_type_gen; 736 PetscObjectState state; 737 738 PetscFunctionBegin; 739 PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 740 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 741 PetscCall(PetscObjectStateGet((PetscObject)B,&state)); 742 if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 743 csrA = a->csrA; 744 csrB = b->csrA; 745 csrC = c->csrA; 746 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 747 748 if (csrA && csrB) { 749 PetscStackCallStandard(mkl_sparse_sp2m,transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC); 750 } else { 751 csrC = PETSC_NULL; 752 } 753 754 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 755 PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 756 757 PetscFunctionReturn(0); 758 } 759 760 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 761 { 762 PetscFunctionBegin; 763 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C)); 764 PetscFunctionReturn(0); 765 } 766 767 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 768 { 769 PetscFunctionBegin; 770 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C)); 771 PetscFunctionReturn(0); 772 } 773 774 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 775 { 776 PetscFunctionBegin; 777 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C)); 778 PetscFunctionReturn(0); 779 } 780 781 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 782 { 783 PetscFunctionBegin; 784 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C)); 785 PetscFunctionReturn(0); 786 } 787 788 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 789 { 790 PetscFunctionBegin; 791 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C)); 792 PetscFunctionReturn(0); 793 } 794 795 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 796 { 797 PetscFunctionBegin; 798 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C)); 799 PetscFunctionReturn(0); 800 } 801 802 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 803 { 804 Mat_Product *product = C->product; 805 Mat A = product->A,B = product->B; 806 807 PetscFunctionBegin; 808 PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C)); 809 PetscFunctionReturn(0); 810 } 811 812 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 813 { 814 Mat_Product *product = C->product; 815 Mat A = product->A,B = product->B; 816 PetscReal fill = product->fill; 817 818 PetscFunctionBegin; 819 PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C)); 820 C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL; 821 PetscFunctionReturn(0); 822 } 823 824 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C) 825 { 826 Mat Ct; 827 Vec zeros; 828 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 829 sparse_matrix_t csrA, csrP, csrC; 830 PetscBool set, flag; 831 struct matrix_descr descr_type_sym; 832 PetscObjectState state; 833 834 PetscFunctionBegin; 835 PetscCall(MatIsSymmetricKnown(A,&set,&flag)); 836 PetscCheck(set && !(set && !flag),PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric"); 837 838 PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 839 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 840 PetscCall(PetscObjectStateGet((PetscObject)P,&state)); 841 if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 842 csrA = a->csrA; 843 csrP = p->csrA; 844 csrC = c->csrA; 845 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 846 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 847 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 848 849 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 850 PetscStackCallStandard(mkl_sparse_sypr,SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT); 851 852 /* Update the PETSc AIJ representation for matrix C from contents of MKL handle. 853 * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix, 854 * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix. 855 * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY 856 * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might 857 * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output 858 * the full matrix. */ 859 PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 860 PetscCall(MatTranspose(C,MAT_INITIAL_MATRIX,&Ct)); 861 PetscCall(MatCreateVecs(C,&zeros,NULL)); 862 PetscCall(VecSetFromOptions(zeros)); 863 PetscCall(VecZeroEntries(zeros)); 864 PetscCall(MatDiagonalSet(Ct,zeros,INSERT_VALUES)); 865 PetscCall(MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN)); 866 /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */ 867 PetscCall(MatProductCreateWithMat(A,P,PETSC_NULL,C)); 868 PetscCall(MatProductSetType(C,MATPRODUCT_PtAP)); 869 PetscCall(MatSeqAIJMKL_create_mkl_handle(C)); 870 PetscCall(VecDestroy(&zeros)); 871 PetscCall(MatDestroy(&Ct)); 872 PetscFunctionReturn(0); 873 } 874 875 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) 876 { 877 Mat_Product *product = C->product; 878 Mat A = product->A,P = product->B; 879 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr; 880 sparse_matrix_t csrA,csrP,csrC; 881 struct matrix_descr descr_type_sym; 882 PetscObjectState state; 883 884 PetscFunctionBegin; 885 PetscCall(PetscObjectStateGet((PetscObject)A,&state)); 886 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 887 PetscCall(PetscObjectStateGet((PetscObject)P,&state)); 888 if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 889 csrA = a->csrA; 890 csrP = p->csrA; 891 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 892 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 893 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 894 895 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 896 if (csrP && csrA) { 897 PetscStackCallStandard(mkl_sparse_sypr,SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL); 898 } else { 899 csrC = PETSC_NULL; 900 } 901 902 /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle. 903 * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain 904 * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that 905 * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this 906 * is guaranteed. */ 907 PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C)); 908 909 C->ops->productnumeric = MatProductNumeric_PtAP; 910 PetscFunctionReturn(0); 911 } 912 913 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) 914 { 915 PetscFunctionBegin; 916 C->ops->productsymbolic = MatProductSymbolic_AB; 917 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 918 PetscFunctionReturn(0); 919 } 920 921 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) 922 { 923 PetscFunctionBegin; 924 C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL; 925 PetscFunctionReturn(0); 926 } 927 928 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) 929 { 930 PetscFunctionBegin; 931 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 932 C->ops->productsymbolic = MatProductSymbolic_ABt; 933 PetscFunctionReturn(0); 934 } 935 936 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) 937 { 938 Mat_Product *product = C->product; 939 Mat A = product->A; 940 PetscBool set, flag; 941 942 PetscFunctionBegin; 943 if (PetscDefined(USE_COMPLEX)) { 944 /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used. 945 * We do this in several other locations in this file. This works for the time being, but these 946 * routines are considered unsafe and may be removed from the MatProduct code in the future. 947 * TODO: Add proper MATSEQAIJMKL implementations */ 948 C->ops->productsymbolic = NULL; 949 } else { 950 /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */ 951 PetscCall(MatIsSymmetricKnown(A,&set,&flag)); 952 if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 953 else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 954 /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(), 955 * depending on whether the algorithm for the general case vs. the real symmetric one is used. */ 956 } 957 PetscFunctionReturn(0); 958 } 959 960 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) 961 { 962 PetscFunctionBegin; 963 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 964 PetscFunctionReturn(0); 965 } 966 967 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) 968 { 969 PetscFunctionBegin; 970 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 971 PetscFunctionReturn(0); 972 } 973 974 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) 975 { 976 Mat_Product *product = C->product; 977 978 PetscFunctionBegin; 979 switch (product->type) { 980 case MATPRODUCT_AB: 981 PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C)); 982 break; 983 case MATPRODUCT_AtB: 984 PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C)); 985 break; 986 case MATPRODUCT_ABt: 987 PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C)); 988 break; 989 case MATPRODUCT_PtAP: 990 PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C)); 991 break; 992 case MATPRODUCT_RARt: 993 PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C)); 994 break; 995 case MATPRODUCT_ABC: 996 PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C)); 997 break; 998 default: 999 break; 1000 } 1001 PetscFunctionReturn(0); 1002 } 1003 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 1004 /* ------------------------ End MatProduct code ------------------------ */ 1005 1006 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 1007 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 1008 * routine, but can also be used to convert an assembled SeqAIJ matrix 1009 * into a SeqAIJMKL one. */ 1010 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 1011 { 1012 Mat B = *newmat; 1013 Mat_SeqAIJMKL *aijmkl; 1014 PetscBool set; 1015 PetscBool sametype; 1016 PetscErrorCode ierr; 1017 1018 PetscFunctionBegin; 1019 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B)); 1020 1021 PetscCall(PetscObjectTypeCompare((PetscObject)A,type,&sametype)); 1022 if (sametype) PetscFunctionReturn(0); 1023 1024 PetscCall(PetscNewLog(B,&aijmkl)); 1025 B->spptr = (void*) aijmkl; 1026 1027 /* Set function pointers for methods that we inherit from AIJ but override. 1028 * We also parse some command line options below, since those determine some of the methods we point to. */ 1029 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 1030 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 1031 B->ops->destroy = MatDestroy_SeqAIJMKL; 1032 1033 aijmkl->sparse_optimized = PETSC_FALSE; 1034 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1035 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 1036 #else 1037 aijmkl->no_SpMV2 = PETSC_TRUE; 1038 #endif 1039 aijmkl->eager_inspection = PETSC_FALSE; 1040 1041 /* Parse command line options. */ 1042 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");PetscCall(ierr); 1043 PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set)); 1044 PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set)); 1045 ierr = PetscOptionsEnd();PetscCall(ierr); 1046 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1047 if (!aijmkl->no_SpMV2) { 1048 PetscCall(PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n")); 1049 aijmkl->no_SpMV2 = PETSC_TRUE; 1050 } 1051 #endif 1052 1053 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1054 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 1055 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 1056 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1057 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 1058 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1059 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL; 1060 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1061 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1062 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL; 1063 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1064 # if !defined(PETSC_USE_COMPLEX) 1065 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1066 # else 1067 B->ops->ptapnumeric = NULL; 1068 # endif 1069 # endif 1070 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1071 1072 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1073 /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1074 * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1075 * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1076 * versions in which the older interface has not been deprecated, we use the old interface. */ 1077 if (aijmkl->no_SpMV2) { 1078 B->ops->mult = MatMult_SeqAIJMKL; 1079 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 1080 B->ops->multadd = MatMultAdd_SeqAIJMKL; 1081 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1082 } 1083 #endif 1084 1085 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ)); 1086 1087 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL)); 1088 *newmat = B; 1089 PetscFunctionReturn(0); 1090 } 1091 1092 /*@C 1093 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 1094 This type inherits from AIJ and is largely identical, but uses sparse BLAS 1095 routines from Intel MKL whenever possible. 1096 If the installed version of MKL supports the "SpMV2" sparse 1097 inspector-executor routines, then those are used by default. 1098 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 1099 symmetric A) operations are currently supported. 1100 Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 1101 1102 Collective 1103 1104 Input Parameters: 1105 + comm - MPI communicator, set to PETSC_COMM_SELF 1106 . m - number of rows 1107 . n - number of columns 1108 . nz - number of nonzeros per row (same for all rows) 1109 - nnz - array containing the number of nonzeros in the various rows 1110 (possibly different for each row) or NULL 1111 1112 Output Parameter: 1113 . A - the matrix 1114 1115 Options Database Keys: 1116 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1117 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 1118 1119 Notes: 1120 If nnz is given then nz is ignored 1121 1122 Level: intermediate 1123 1124 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 1125 @*/ 1126 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1127 { 1128 PetscFunctionBegin; 1129 PetscCall(MatCreate(comm,A)); 1130 PetscCall(MatSetSizes(*A,m,n,m,n)); 1131 PetscCall(MatSetType(*A,MATSEQAIJMKL)); 1132 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz)); 1133 PetscFunctionReturn(0); 1134 } 1135 1136 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1137 { 1138 PetscFunctionBegin; 1139 PetscCall(MatSetType(A,MATSEQAIJ)); 1140 PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A)); 1141 PetscFunctionReturn(0); 1142 } 1143