1 2 /* 3 Defines basic operations for the MATSEQAIJMKL matrix class. 4 This class is derived from the MATSEQAIJ class and retains the 5 compressed row storage (aka Yale sparse matrix format) but uses 6 sparse BLAS operations from the Intel Math Kernel Library (MKL) 7 wherever possible. 8 */ 9 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12 #include <mkl_spblas.h> 13 14 typedef struct { 15 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 16 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 PetscObjectState state; 19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 28 { 29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 30 /* so we will ignore 'MatType type'. */ 31 PetscErrorCode ierr; 32 Mat B = *newmat; 33 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 34 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 35 #endif 36 37 PetscFunctionBegin; 38 if (reuse == MAT_INITIAL_MATRIX) { 39 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 45 B->ops->destroy = MatDestroy_SeqAIJ; 46 B->ops->mult = MatMult_SeqAIJ; 47 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 48 B->ops->multadd = MatMultAdd_SeqAIJ; 49 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ; 51 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 52 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 53 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 54 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; 55 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 56 57 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 58 59 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 60 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 61 * simply involves destroying the MKL sparse matrix handle and then freeing 62 * the spptr pointer. */ 63 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 64 65 if (aijmkl->sparse_optimized) { 66 sparse_status_t stat; 67 stat = mkl_sparse_destroy(aijmkl->csrA); 68 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize()"); 69 } 70 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 71 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 72 73 /* Change the type of B to MATSEQAIJ. */ 74 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 75 76 *newmat = B; 77 PetscFunctionReturn(0); 78 } 79 80 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 81 { 82 PetscErrorCode ierr; 83 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 84 85 PetscFunctionBegin; 86 87 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */ 88 if (aijmkl) { 89 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 90 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 91 if (aijmkl->sparse_optimized) { 92 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 93 stat = mkl_sparse_destroy(aijmkl->csrA); 94 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()"); 95 } 96 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 97 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 98 } 99 100 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 101 * to destroy everything that remains. */ 102 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 103 /* Note that I don't call MatSetType(). I believe this is because that 104 * is only to be called when *building* a matrix. I could be wrong, but 105 * that is how things work for the SuperLU matrix class. */ 106 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 107 PetscFunctionReturn(0); 108 } 109 110 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 111 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 112 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 113 * handle, creates a new one, and then calls mkl_sparse_optimize(). 114 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 115 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 116 * an unoptimized matrix handle here. */ 117 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 118 { 119 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 120 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 121 * does nothing. We make it callable anyway in this case because it cuts 122 * down on littering the code with #ifdefs. */ 123 PetscFunctionBegin; 124 PetscFunctionReturn(0); 125 #else 126 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 127 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 128 PetscInt m,n; 129 MatScalar *aa; 130 PetscInt *aj,*ai; 131 sparse_status_t stat; 132 PetscErrorCode ierr; 133 134 PetscFunctionBegin; 135 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 136 /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 137 * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 138 * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 139 * mkl_sparse_optimize() later. */ 140 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 141 #endif 142 143 if (aijmkl->sparse_optimized) { 144 /* Matrix has been previously assembled and optimized. Must destroy old 145 * matrix handle before running the optimization step again. */ 146 stat = mkl_sparse_destroy(aijmkl->csrA); 147 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()"); 148 } 149 aijmkl->sparse_optimized = PETSC_FALSE; 150 151 /* Now perform the SpMV2 setup and matrix optimization. */ 152 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 153 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 154 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 155 m = A->rmap->n; 156 n = A->cmap->n; 157 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 158 aa = a->a; /* Nonzero elements stored row-by-row. */ 159 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 160 if (a->nz && aa && !A->structure_only) { 161 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 162 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 163 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 164 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle, mkl_sparse_x_create_csr()"); 165 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 166 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()"); 167 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 168 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()"); 169 if (!aijmkl->no_SpMV2) { 170 stat = mkl_sparse_optimize(aijmkl->csrA); 171 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize()"); 172 } 173 aijmkl->sparse_optimized = PETSC_TRUE; 174 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 175 } else { 176 aijmkl->csrA = PETSC_NULL; 177 } 178 179 PetscFunctionReturn(0); 180 #endif 181 } 182 183 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 184 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */ 185 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A) 186 { 187 PetscErrorCode ierr; 188 sparse_status_t stat; 189 sparse_index_base_t indexing; 190 PetscInt m,n; 191 PetscInt *aj,*ai,*dummy; 192 MatScalar *aa; 193 Mat_SeqAIJMKL *aijmkl; 194 195 if (csrA) { 196 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 197 stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa); 198 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 199 if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()"); 200 } else { 201 aj = ai = PETSC_NULL; 202 aa = PETSC_NULL; 203 } 204 205 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 206 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 207 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 208 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 209 * they will be destroyed when the MKL handle is destroyed. 210 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 211 if (csrA) { 212 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);CHKERRQ(ierr); 213 } else { 214 /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */ 215 ierr = MatSetUp(A);CHKERRQ(ierr); 216 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 217 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 218 } 219 220 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 221 * Now turn it into a MATSEQAIJMKL. */ 222 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 223 224 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 225 aijmkl->csrA = csrA; 226 227 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 228 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 229 * and just need to be able to run the MKL optimization step. */ 230 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 231 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 232 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 233 if (csrA) { 234 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 235 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()"); 236 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 237 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()"); 238 } 239 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 240 PetscFunctionReturn(0); 241 } 242 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 243 244 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 245 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 246 * MatMatMultNumeric(). */ 247 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 248 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 249 { 250 PetscInt i; 251 PetscInt nrows,ncols; 252 PetscInt nz; 253 PetscInt *ai,*aj,*dummy; 254 PetscScalar *aa; 255 PetscErrorCode ierr; 256 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 257 sparse_status_t stat; 258 sparse_index_base_t indexing; 259 260 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 261 if (!aijmkl->csrA) PetscFunctionReturn(0); 262 263 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 264 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 265 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 266 267 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 268 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 269 for (i=0; i<nrows; i++) { 270 nz = ai[i+1] - ai[i]; 271 ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr); 272 } 273 274 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 275 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 276 277 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 278 /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation. 279 * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed 280 * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */ 281 aijmkl->sparse_optimized = PETSC_FALSE; 282 PetscFunctionReturn(0); 283 } 284 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 285 286 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 287 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer) 288 { 289 PetscInt i,j,k; 290 PetscInt nrows,ncols; 291 PetscInt nz; 292 PetscInt *ai,*aj,*dummy; 293 PetscScalar *aa; 294 PetscErrorCode ierr; 295 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 296 sparse_status_t stat; 297 sparse_index_base_t indexing; 298 299 ierr = PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");CHKERRQ(ierr); 300 301 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 302 if (!aijmkl->csrA) { 303 ierr = PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");CHKERRQ(ierr); 304 PetscFunctionReturn(0); 305 } 306 307 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 308 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 309 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 310 311 k = 0; 312 for (i=0; i<nrows; i++) { 313 ierr = PetscViewerASCIIPrintf(viewer,"row %D: ",i);CHKERRQ(ierr); 314 nz = ai[i+1] - ai[i]; 315 for (j=0; j<nz; j++) { 316 if (aa) { 317 ierr = PetscViewerASCIIPrintf(viewer,"(%D, %g) ",aj[k],PetscRealPart(aa[k]));CHKERRQ(ierr); 318 } else { 319 ierr = PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);CHKERRQ(ierr); 320 } 321 k++; 322 } 323 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 324 } 325 PetscFunctionReturn(0); 326 } 327 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 328 329 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 330 { 331 PetscErrorCode ierr; 332 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 333 Mat_SeqAIJMKL *aijmkl_dest; 334 335 PetscFunctionBegin; 336 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 337 aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr; 338 ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr); 339 aijmkl_dest->sparse_optimized = PETSC_FALSE; 340 if (aijmkl->eager_inspection) { 341 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 342 } 343 PetscFunctionReturn(0); 344 } 345 346 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 347 { 348 PetscErrorCode ierr; 349 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 350 Mat_SeqAIJMKL *aijmkl; 351 352 PetscFunctionBegin; 353 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 354 355 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 356 * extra information and some different methods, call the AssemblyEnd 357 * routine for a MATSEQAIJ. 358 * I'm not sure if this is the best way to do this, but it avoids 359 * a lot of code duplication. */ 360 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 361 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 362 363 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 364 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 365 aijmkl = (Mat_SeqAIJMKL*)A->spptr; 366 if (aijmkl->eager_inspection) { 367 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 368 } 369 370 PetscFunctionReturn(0); 371 } 372 373 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 374 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 375 { 376 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 377 const PetscScalar *x; 378 PetscScalar *y; 379 const MatScalar *aa; 380 PetscErrorCode ierr; 381 PetscInt m = A->rmap->n; 382 PetscInt n = A->cmap->n; 383 PetscScalar alpha = 1.0; 384 PetscScalar beta = 0.0; 385 const PetscInt *aj,*ai; 386 char matdescra[6]; 387 388 389 /* Variables not in MatMult_SeqAIJ. */ 390 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 391 392 PetscFunctionBegin; 393 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 394 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 395 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 396 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 397 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 398 aa = a->a; /* Nonzero elements stored row-by-row. */ 399 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 400 401 /* Call MKL sparse BLAS routine to do the MatMult. */ 402 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 403 404 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 405 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 406 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 407 PetscFunctionReturn(0); 408 } 409 #endif 410 411 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 412 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 413 { 414 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 415 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 416 const PetscScalar *x; 417 PetscScalar *y; 418 PetscErrorCode ierr; 419 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 420 PetscObjectState state; 421 422 PetscFunctionBegin; 423 424 /* If there are no nonzero entries, zero yy and return immediately. */ 425 if (!a->nz) { 426 PetscInt i; 427 PetscInt m=A->rmap->n; 428 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 429 for (i=0; i<m; i++) { 430 y[i] = 0.0; 431 } 432 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 433 PetscFunctionReturn(0); 434 } 435 436 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 437 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 438 439 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 440 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 441 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 442 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 443 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 444 MatSeqAIJMKL_create_mkl_handle(A); 445 } 446 447 /* Call MKL SpMV2 executor routine to do the MatMult. */ 448 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 449 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 450 451 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 452 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 453 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 454 PetscFunctionReturn(0); 455 } 456 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 457 458 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 459 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 460 { 461 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 462 const PetscScalar *x; 463 PetscScalar *y; 464 const MatScalar *aa; 465 PetscErrorCode ierr; 466 PetscInt m = A->rmap->n; 467 PetscInt n = A->cmap->n; 468 PetscScalar alpha = 1.0; 469 PetscScalar beta = 0.0; 470 const PetscInt *aj,*ai; 471 char matdescra[6]; 472 473 /* Variables not in MatMultTranspose_SeqAIJ. */ 474 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 475 476 PetscFunctionBegin; 477 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 478 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 479 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 480 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 481 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 482 aa = a->a; /* Nonzero elements stored row-by-row. */ 483 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 484 485 /* Call MKL sparse BLAS routine to do the MatMult. */ 486 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 487 488 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 489 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 490 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 491 PetscFunctionReturn(0); 492 } 493 #endif 494 495 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 496 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 497 { 498 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 499 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 500 const PetscScalar *x; 501 PetscScalar *y; 502 PetscErrorCode ierr; 503 sparse_status_t stat; 504 PetscObjectState state; 505 506 PetscFunctionBegin; 507 508 /* If there are no nonzero entries, zero yy and return immediately. */ 509 if (!a->nz) { 510 PetscInt i; 511 PetscInt n=A->cmap->n; 512 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 513 for (i=0; i<n; i++) { 514 y[i] = 0.0; 515 } 516 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 517 PetscFunctionReturn(0); 518 } 519 520 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 521 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 522 523 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 524 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 525 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 526 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 527 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 528 MatSeqAIJMKL_create_mkl_handle(A); 529 } 530 531 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 532 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 533 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 534 535 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 536 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 537 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 538 PetscFunctionReturn(0); 539 } 540 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 541 542 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 543 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 544 { 545 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 546 const PetscScalar *x; 547 PetscScalar *y,*z; 548 const MatScalar *aa; 549 PetscErrorCode ierr; 550 PetscInt m = A->rmap->n; 551 PetscInt n = A->cmap->n; 552 const PetscInt *aj,*ai; 553 PetscInt i; 554 555 /* Variables not in MatMultAdd_SeqAIJ. */ 556 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 557 PetscScalar alpha = 1.0; 558 PetscScalar beta; 559 char matdescra[6]; 560 561 PetscFunctionBegin; 562 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 563 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 564 565 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 566 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 567 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 568 aa = a->a; /* Nonzero elements stored row-by-row. */ 569 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 570 571 /* Call MKL sparse BLAS routine to do the MatMult. */ 572 if (zz == yy) { 573 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 574 beta = 1.0; 575 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 576 } else { 577 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 578 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 579 beta = 0.0; 580 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 581 for (i=0; i<m; i++) { 582 z[i] += y[i]; 583 } 584 } 585 586 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 587 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 588 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 589 PetscFunctionReturn(0); 590 } 591 #endif 592 593 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 594 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 595 { 596 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 597 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 598 const PetscScalar *x; 599 PetscScalar *y,*z; 600 PetscErrorCode ierr; 601 PetscInt m = A->rmap->n; 602 PetscInt i; 603 604 /* Variables not in MatMultAdd_SeqAIJ. */ 605 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 606 PetscObjectState state; 607 608 PetscFunctionBegin; 609 610 /* If there are no nonzero entries, set zz = yy and return immediately. */ 611 if (!a->nz) { 612 PetscInt i; 613 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 614 for (i=0; i<m; i++) { 615 z[i] = y[i]; 616 } 617 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 618 PetscFunctionReturn(0); 619 } 620 621 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 622 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 623 624 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 625 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 626 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 627 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 628 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 629 MatSeqAIJMKL_create_mkl_handle(A); 630 } 631 632 /* Call MKL sparse BLAS routine to do the MatMult. */ 633 if (zz == yy) { 634 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 635 * with alpha and beta both set to 1.0. */ 636 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 637 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 638 } else { 639 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 640 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 641 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 642 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 643 for (i=0; i<m; i++) { 644 z[i] += y[i]; 645 } 646 } 647 648 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 649 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 650 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 651 PetscFunctionReturn(0); 652 } 653 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 654 655 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 656 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 657 { 658 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 659 const PetscScalar *x; 660 PetscScalar *y,*z; 661 const MatScalar *aa; 662 PetscErrorCode ierr; 663 PetscInt m = A->rmap->n; 664 PetscInt n = A->cmap->n; 665 const PetscInt *aj,*ai; 666 PetscInt i; 667 668 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 669 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 670 PetscScalar alpha = 1.0; 671 PetscScalar beta; 672 char matdescra[6]; 673 674 PetscFunctionBegin; 675 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 676 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 677 678 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 679 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 680 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 681 aa = a->a; /* Nonzero elements stored row-by-row. */ 682 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 683 684 /* Call MKL sparse BLAS routine to do the MatMult. */ 685 if (zz == yy) { 686 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 687 beta = 1.0; 688 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 689 } else { 690 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 691 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 692 beta = 0.0; 693 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 694 for (i=0; i<n; i++) { 695 z[i] += y[i]; 696 } 697 } 698 699 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 700 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 701 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 702 PetscFunctionReturn(0); 703 } 704 #endif 705 706 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 707 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 708 { 709 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 710 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 711 const PetscScalar *x; 712 PetscScalar *y,*z; 713 PetscErrorCode ierr; 714 PetscInt n = A->cmap->n; 715 PetscInt i; 716 PetscObjectState state; 717 718 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 719 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 720 721 PetscFunctionBegin; 722 723 /* If there are no nonzero entries, set zz = yy and return immediately. */ 724 if (!a->nz) { 725 PetscInt i; 726 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 727 for (i=0; i<n; i++) { 728 z[i] = y[i]; 729 } 730 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 731 PetscFunctionReturn(0); 732 } 733 734 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 735 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 736 737 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 738 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 739 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 740 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 741 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 742 MatSeqAIJMKL_create_mkl_handle(A); 743 } 744 745 /* Call MKL sparse BLAS routine to do the MatMult. */ 746 if (zz == yy) { 747 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 748 * with alpha and beta both set to 1.0. */ 749 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 750 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 751 } else { 752 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 753 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 754 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 755 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 756 for (i=0; i<n; i++) { 757 z[i] += y[i]; 758 } 759 } 760 761 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 762 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 763 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 764 PetscFunctionReturn(0); 765 } 766 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 767 768 /* -------------------------- MatProduct code -------------------------- */ 769 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 770 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 771 { 772 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr; 773 sparse_matrix_t csrA,csrB,csrC; 774 PetscInt nrows,ncols; 775 PetscErrorCode ierr; 776 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 777 struct matrix_descr descr_type_gen; 778 PetscObjectState state; 779 780 PetscFunctionBegin; 781 /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does 782 * not handle sparse matrices with zero rows or columns. */ 783 if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N; 784 else nrows = A->cmap->N; 785 if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N; 786 else ncols = B->rmap->N; 787 788 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 789 if (!a->sparse_optimized || a->state != state) { 790 MatSeqAIJMKL_create_mkl_handle(A); 791 } 792 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 793 if (!b->sparse_optimized || b->state != state) { 794 MatSeqAIJMKL_create_mkl_handle(B); 795 } 796 csrA = a->csrA; 797 csrB = b->csrA; 798 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 799 800 if (csrA && csrB) { 801 stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC); 802 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()"); 803 } else { 804 csrC = PETSC_NULL; 805 } 806 807 ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);CHKERRQ(ierr); 808 809 PetscFunctionReturn(0); 810 } 811 812 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 813 { 814 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 815 sparse_matrix_t csrA, csrB, csrC; 816 PetscErrorCode ierr; 817 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 818 struct matrix_descr descr_type_gen; 819 PetscObjectState state; 820 821 PetscFunctionBegin; 822 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 823 if (!a->sparse_optimized || a->state != state) { 824 MatSeqAIJMKL_create_mkl_handle(A); 825 } 826 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 827 if (!b->sparse_optimized || b->state != state) { 828 MatSeqAIJMKL_create_mkl_handle(B); 829 } 830 csrA = a->csrA; 831 csrB = b->csrA; 832 csrC = c->csrA; 833 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 834 835 if (csrA && csrB) { 836 stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC); 837 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()"); 838 } else { 839 csrC = PETSC_NULL; 840 } 841 842 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 843 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 844 845 PetscFunctionReturn(0); 846 } 847 848 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 849 { 850 PetscErrorCode ierr; 851 852 PetscFunctionBegin; 853 ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 854 PetscFunctionReturn(0); 855 } 856 857 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 858 { 859 PetscErrorCode ierr; 860 861 PetscFunctionBegin; 862 ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 867 { 868 PetscErrorCode ierr; 869 870 PetscFunctionBegin; 871 ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 872 PetscFunctionReturn(0); 873 } 874 875 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 876 { 877 PetscErrorCode ierr; 878 879 PetscFunctionBegin; 880 ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 881 PetscFunctionReturn(0); 882 } 883 884 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 885 { 886 PetscErrorCode ierr; 887 888 PetscFunctionBegin; 889 ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr); 890 PetscFunctionReturn(0); 891 } 892 893 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 894 { 895 PetscErrorCode ierr; 896 897 PetscFunctionBegin; 898 ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr); 899 PetscFunctionReturn(0); 900 } 901 902 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 903 { 904 PetscErrorCode ierr; 905 Mat_Product *product = C->product; 906 Mat A = product->A,B = product->B; 907 908 PetscFunctionBegin; 909 ierr = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);CHKERRQ(ierr); 910 PetscFunctionReturn(0); 911 } 912 913 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 914 { 915 PetscErrorCode ierr; 916 Mat_Product *product = C->product; 917 Mat A = product->A,B = product->B; 918 PetscReal fill = product->fill; 919 920 PetscFunctionBegin; 921 ierr = MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);CHKERRQ(ierr); 922 C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL; 923 PetscFunctionReturn(0); 924 } 925 926 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C) 927 { 928 Mat Ct; 929 Vec zeros; 930 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 931 sparse_matrix_t csrA, csrP, csrC; 932 PetscBool set, flag; 933 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 934 struct matrix_descr descr_type_sym; 935 PetscObjectState state; 936 PetscErrorCode ierr; 937 938 PetscFunctionBegin; 939 ierr = MatIsSymmetricKnown(A,&set,&flag); 940 if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric"); 941 942 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 943 if (!a->sparse_optimized || a->state != state) { 944 MatSeqAIJMKL_create_mkl_handle(A); 945 } 946 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 947 if (!p->sparse_optimized || p->state != state) { 948 MatSeqAIJMKL_create_mkl_handle(P); 949 } 950 csrA = a->csrA; 951 csrP = p->csrA; 952 csrC = c->csrA; 953 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 954 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 955 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 956 957 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 958 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT); 959 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr()"); 960 961 /* Update the PETSc AIJ representation for matrix C from contents of MKL handle. 962 * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix, 963 * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix. 964 * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY 965 * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might 966 * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output 967 * the full matrix. */ 968 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 969 ierr = MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);CHKERRQ(ierr); 970 ierr = MatCreateVecs(C,&zeros,NULL);CHKERRQ(ierr); 971 ierr = VecSetFromOptions(zeros);CHKERRQ(ierr); 972 ierr = VecZeroEntries(zeros);CHKERRQ(ierr); 973 ierr = MatDiagonalSet(Ct,zeros,INSERT_VALUES);CHKERRQ(ierr); 974 ierr = MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 975 /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */ 976 ierr = MatProductCreateWithMat(A,P,PETSC_NULL,C);CHKERRQ(ierr); 977 ierr = MatProductSetType(C,MATPRODUCT_PtAP);CHKERRQ(ierr); 978 ierr = MatSeqAIJMKL_create_mkl_handle(C);CHKERRQ(ierr); 979 ierr = VecDestroy(&zeros);CHKERRQ(ierr); 980 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) 985 { 986 Mat_Product *product = C->product; 987 Mat A = product->A,P = product->B; 988 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr; 989 sparse_matrix_t csrA,csrP,csrC; 990 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 991 struct matrix_descr descr_type_sym; 992 PetscObjectState state; 993 PetscErrorCode ierr; 994 995 PetscFunctionBegin; 996 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 997 if (!a->sparse_optimized || a->state != state) { 998 MatSeqAIJMKL_create_mkl_handle(A); 999 } 1000 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 1001 if (!p->sparse_optimized || p->state != state) { 1002 MatSeqAIJMKL_create_mkl_handle(P); 1003 } 1004 csrA = a->csrA; 1005 csrP = p->csrA; 1006 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 1007 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 1008 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 1009 1010 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 1011 if (csrP && csrA) { 1012 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL); 1013 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr()"); 1014 } else { 1015 csrC = PETSC_NULL; 1016 } 1017 1018 /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle. 1019 * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain 1020 * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that 1021 * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this 1022 * is guaranteed. */ 1023 ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);CHKERRQ(ierr); 1024 1025 C->ops->productnumeric = MatProductNumeric_PtAP; 1026 PetscFunctionReturn(0); 1027 } 1028 1029 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) 1030 { 1031 PetscFunctionBegin; 1032 C->ops->productsymbolic = MatProductSymbolic_AB; 1033 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1034 PetscFunctionReturn(0); 1035 } 1036 1037 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) 1038 { 1039 PetscFunctionBegin; 1040 C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL; 1041 PetscFunctionReturn(0); 1042 } 1043 1044 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) 1045 { 1046 PetscFunctionBegin; 1047 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 1048 C->ops->productsymbolic = MatProductSymbolic_ABt; 1049 PetscFunctionReturn(0); 1050 } 1051 1052 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) 1053 { 1054 PetscErrorCode ierr; 1055 Mat_Product *product = C->product; 1056 Mat A = product->A; 1057 PetscBool set, flag; 1058 1059 PetscFunctionBegin; 1060 #if defined(PETSC_USE_COMPLEX) 1061 /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used. 1062 * We do this in several other locations in this file. This works for the time being, but these 1063 * routines are considered unsafe and may be removed from the MatProduct code in the future. 1064 * TODO: Add proper MATSEQAIJMKL implementations */ 1065 C->ops->productsymbolic = NULL; 1066 #else 1067 /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */ 1068 ierr = MatIsSymmetricKnown(A,&set,&flag);CHKERRQ(ierr); 1069 if (set && flag) { 1070 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1071 PetscFunctionReturn(0); 1072 } else { 1073 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 1074 } 1075 /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(), 1076 * depending on whether the algorithm for the general case vs. the real symmetric one is used. */ 1077 #endif 1078 PetscFunctionReturn(0); 1079 } 1080 1081 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) 1082 { 1083 PetscFunctionBegin; 1084 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 1085 PetscFunctionReturn(0); 1086 } 1087 1088 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) 1089 { 1090 PetscFunctionBegin; 1091 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 1092 PetscFunctionReturn(0); 1093 } 1094 1095 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) 1096 { 1097 PetscErrorCode ierr; 1098 Mat_Product *product = C->product; 1099 1100 PetscFunctionBegin; 1101 switch (product->type) { 1102 case MATPRODUCT_AB: 1103 ierr = MatProductSetFromOptions_SeqAIJMKL_AB(C);CHKERRQ(ierr); 1104 break; 1105 case MATPRODUCT_AtB: 1106 ierr = MatProductSetFromOptions_SeqAIJMKL_AtB(C);CHKERRQ(ierr); 1107 break; 1108 case MATPRODUCT_ABt: 1109 ierr = MatProductSetFromOptions_SeqAIJMKL_ABt(C);CHKERRQ(ierr); 1110 break; 1111 case MATPRODUCT_PtAP: 1112 ierr = MatProductSetFromOptions_SeqAIJMKL_PtAP(C);CHKERRQ(ierr); 1113 break; 1114 case MATPRODUCT_RARt: 1115 ierr = MatProductSetFromOptions_SeqAIJMKL_RARt(C);CHKERRQ(ierr); 1116 break; 1117 case MATPRODUCT_ABC: 1118 ierr = MatProductSetFromOptions_SeqAIJMKL_ABC(C);CHKERRQ(ierr); 1119 break; 1120 default: 1121 break; 1122 } 1123 PetscFunctionReturn(0); 1124 } 1125 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 1126 /* ------------------------ End MatProduct code ------------------------ */ 1127 1128 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 1129 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 1130 * routine, but can also be used to convert an assembled SeqAIJ matrix 1131 * into a SeqAIJMKL one. */ 1132 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 1133 { 1134 PetscErrorCode ierr; 1135 Mat B = *newmat; 1136 Mat_SeqAIJMKL *aijmkl; 1137 PetscBool set; 1138 PetscBool sametype; 1139 1140 PetscFunctionBegin; 1141 if (reuse == MAT_INITIAL_MATRIX) { 1142 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1143 } 1144 1145 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 1146 if (sametype) PetscFunctionReturn(0); 1147 1148 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 1149 B->spptr = (void*) aijmkl; 1150 1151 /* Set function pointers for methods that we inherit from AIJ but override. 1152 * We also parse some command line options below, since those determine some of the methods we point to. */ 1153 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 1154 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 1155 B->ops->destroy = MatDestroy_SeqAIJMKL; 1156 1157 aijmkl->sparse_optimized = PETSC_FALSE; 1158 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1159 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 1160 #else 1161 aijmkl->no_SpMV2 = PETSC_TRUE; 1162 #endif 1163 aijmkl->eager_inspection = PETSC_FALSE; 1164 1165 /* Parse command line options. */ 1166 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 1167 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 1168 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 1169 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1170 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1171 if (!aijmkl->no_SpMV2) { 1172 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 1173 aijmkl->no_SpMV2 = PETSC_TRUE; 1174 } 1175 #endif 1176 1177 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1178 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 1179 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 1180 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1181 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 1182 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1183 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL; 1184 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1185 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1186 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL; 1187 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1188 # if !defined(PETSC_USE_COMPLEX) 1189 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1190 # else 1191 B->ops->ptapnumeric = NULL; 1192 # endif 1193 # endif 1194 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1195 1196 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1197 /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1198 * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1199 * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1200 * versions in which the older interface has not been deprecated, we use the old interface. */ 1201 if (aijmkl->no_SpMV2) { 1202 B->ops->mult = MatMult_SeqAIJMKL; 1203 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 1204 B->ops->multadd = MatMultAdd_SeqAIJMKL; 1205 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1206 } 1207 #endif 1208 1209 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 1210 1211 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 1212 *newmat = B; 1213 PetscFunctionReturn(0); 1214 } 1215 1216 /*@C 1217 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 1218 This type inherits from AIJ and is largely identical, but uses sparse BLAS 1219 routines from Intel MKL whenever possible. 1220 If the installed version of MKL supports the "SpMV2" sparse 1221 inspector-executor routines, then those are used by default. 1222 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 1223 symmetric A) operations are currently supported. 1224 Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 1225 1226 Collective 1227 1228 Input Parameters: 1229 + comm - MPI communicator, set to PETSC_COMM_SELF 1230 . m - number of rows 1231 . n - number of columns 1232 . nz - number of nonzeros per row (same for all rows) 1233 - nnz - array containing the number of nonzeros in the various rows 1234 (possibly different for each row) or NULL 1235 1236 Output Parameter: 1237 . A - the matrix 1238 1239 Options Database Keys: 1240 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1241 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 1242 1243 Notes: 1244 If nnz is given then nz is ignored 1245 1246 Level: intermediate 1247 1248 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 1249 @*/ 1250 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1251 { 1252 PetscErrorCode ierr; 1253 1254 PetscFunctionBegin; 1255 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1256 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1257 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 1258 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1263 { 1264 PetscErrorCode ierr; 1265 1266 PetscFunctionBegin; 1267 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1268 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 1269 PetscFunctionReturn(0); 1270 } 1271