1 2 /* 3 Defines basic operations for the MATSEQAIJMKL matrix class. 4 This class is derived from the MATSEQAIJ class and retains the 5 compressed row storage (aka Yale sparse matrix format) but uses 6 sparse BLAS operations from the Intel Math Kernel Library (MKL) 7 wherever possible. 8 */ 9 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12 #include <mkl_spblas.h> 13 14 typedef struct { 15 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 16 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 PetscObjectState state; 19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 28 { 29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 30 /* so we will ignore 'MatType type'. */ 31 PetscErrorCode ierr; 32 Mat B = *newmat; 33 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 34 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 35 #endif 36 37 PetscFunctionBegin; 38 if (reuse == MAT_INITIAL_MATRIX) { 39 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 45 B->ops->destroy = MatDestroy_SeqAIJ; 46 B->ops->mult = MatMult_SeqAIJ; 47 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 48 B->ops->multadd = MatMultAdd_SeqAIJ; 49 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ; 51 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 52 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 53 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 54 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; 55 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 56 57 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 58 59 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 60 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 61 * simply involves destroying the MKL sparse matrix handle and then freeing 62 * the spptr pointer. */ 63 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 64 65 if (aijmkl->sparse_optimized) { 66 sparse_status_t stat; 67 stat = mkl_sparse_destroy(aijmkl->csrA); 68 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize()"); 69 } 70 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 71 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 72 73 /* Change the type of B to MATSEQAIJ. */ 74 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 75 76 *newmat = B; 77 PetscFunctionReturn(0); 78 } 79 80 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 81 { 82 PetscErrorCode ierr; 83 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 84 85 PetscFunctionBegin; 86 87 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */ 88 if (aijmkl) { 89 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 90 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 91 if (aijmkl->sparse_optimized) { 92 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 93 stat = mkl_sparse_destroy(aijmkl->csrA); 94 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()"); 95 } 96 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 97 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 98 } 99 100 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 101 * to destroy everything that remains. */ 102 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 103 /* Note that I don't call MatSetType(). I believe this is because that 104 * is only to be called when *building* a matrix. I could be wrong, but 105 * that is how things work for the SuperLU matrix class. */ 106 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 107 PetscFunctionReturn(0); 108 } 109 110 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 111 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 112 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 113 * handle, creates a new one, and then calls mkl_sparse_optimize(). 114 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 115 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 116 * an unoptimized matrix handle here. */ 117 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 118 { 119 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 120 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 121 * does nothing. We make it callable anyway in this case because it cuts 122 * down on littering the code with #ifdefs. */ 123 PetscFunctionBegin; 124 PetscFunctionReturn(0); 125 #else 126 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 127 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 128 PetscInt m,n; 129 MatScalar *aa; 130 PetscInt *aj,*ai; 131 sparse_status_t stat; 132 PetscErrorCode ierr; 133 134 PetscFunctionBegin; 135 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 136 /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 137 * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 138 * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 139 * mkl_sparse_optimize() later. */ 140 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 141 #endif 142 143 if (aijmkl->sparse_optimized) { 144 /* Matrix has been previously assembled and optimized. Must destroy old 145 * matrix handle before running the optimization step again. */ 146 stat = mkl_sparse_destroy(aijmkl->csrA); 147 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()"); 148 } 149 aijmkl->sparse_optimized = PETSC_FALSE; 150 151 /* Now perform the SpMV2 setup and matrix optimization. */ 152 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 153 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 154 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 155 m = A->rmap->n; 156 n = A->cmap->n; 157 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 158 aa = a->a; /* Nonzero elements stored row-by-row. */ 159 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 160 if (a->nz && aa && !A->structure_only) { 161 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 162 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 163 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 164 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle, mkl_sparse_x_create_csr()"); 165 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 166 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()"); 167 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 168 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()"); 169 if (!aijmkl->no_SpMV2) { 170 stat = mkl_sparse_optimize(aijmkl->csrA); 171 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize()"); 172 } 173 aijmkl->sparse_optimized = PETSC_TRUE; 174 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 175 } else { 176 aijmkl->csrA = PETSC_NULL; 177 } 178 179 PetscFunctionReturn(0); 180 #endif 181 } 182 183 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 184 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */ 185 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A) 186 { 187 PetscErrorCode ierr; 188 sparse_status_t stat; 189 sparse_index_base_t indexing; 190 PetscInt m,n; 191 PetscInt *aj,*ai,*dummy; 192 MatScalar *aa; 193 Mat_SeqAIJMKL *aijmkl; 194 195 PetscFunctionBegin; 196 if (csrA) { 197 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 198 stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa); 199 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 200 if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()"); 201 } else { 202 aj = ai = PETSC_NULL; 203 aa = PETSC_NULL; 204 } 205 206 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 207 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 208 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 209 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 210 * they will be destroyed when the MKL handle is destroyed. 211 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 212 if (csrA) { 213 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);CHKERRQ(ierr); 214 } else { 215 /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */ 216 ierr = MatSetUp(A);CHKERRQ(ierr); 217 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 218 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 219 } 220 221 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 222 * Now turn it into a MATSEQAIJMKL. */ 223 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 224 225 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 226 aijmkl->csrA = csrA; 227 228 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 229 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 230 * and just need to be able to run the MKL optimization step. */ 231 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 232 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 233 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 234 if (csrA) { 235 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 236 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()"); 237 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 238 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()"); 239 } 240 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 241 PetscFunctionReturn(0); 242 } 243 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 244 245 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 246 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 247 * MatMatMultNumeric(). */ 248 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 249 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 250 { 251 PetscInt i; 252 PetscInt nrows,ncols; 253 PetscInt nz; 254 PetscInt *ai,*aj,*dummy; 255 PetscScalar *aa; 256 PetscErrorCode ierr; 257 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 258 sparse_status_t stat; 259 sparse_index_base_t indexing; 260 261 PetscFunctionBegin; 262 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 263 if (!aijmkl->csrA) PetscFunctionReturn(0); 264 265 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 266 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 267 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 268 269 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 270 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 271 for (i=0; i<nrows; i++) { 272 nz = ai[i+1] - ai[i]; 273 ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr); 274 } 275 276 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 277 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 278 279 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 280 /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation. 281 * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed 282 * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */ 283 aijmkl->sparse_optimized = PETSC_FALSE; 284 PetscFunctionReturn(0); 285 } 286 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 287 288 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 289 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer) 290 { 291 PetscInt i,j,k; 292 PetscInt nrows,ncols; 293 PetscInt nz; 294 PetscInt *ai,*aj,*dummy; 295 PetscScalar *aa; 296 PetscErrorCode ierr; 297 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 298 sparse_status_t stat; 299 sparse_index_base_t indexing; 300 301 PetscFunctionBegin; 302 ierr = PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");CHKERRQ(ierr); 303 304 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 305 if (!aijmkl->csrA) { 306 ierr = PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 311 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 312 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 313 314 k = 0; 315 for (i=0; i<nrows; i++) { 316 ierr = PetscViewerASCIIPrintf(viewer,"row %D: ",i);CHKERRQ(ierr); 317 nz = ai[i+1] - ai[i]; 318 for (j=0; j<nz; j++) { 319 if (aa) { 320 ierr = PetscViewerASCIIPrintf(viewer,"(%D, %g) ",aj[k],PetscRealPart(aa[k]));CHKERRQ(ierr); 321 } else { 322 ierr = PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);CHKERRQ(ierr); 323 } 324 k++; 325 } 326 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 327 } 328 PetscFunctionReturn(0); 329 } 330 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 331 332 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 333 { 334 PetscErrorCode ierr; 335 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 336 Mat_SeqAIJMKL *aijmkl_dest; 337 338 PetscFunctionBegin; 339 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 340 aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr; 341 ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr); 342 aijmkl_dest->sparse_optimized = PETSC_FALSE; 343 if (aijmkl->eager_inspection) { 344 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 345 } 346 PetscFunctionReturn(0); 347 } 348 349 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 350 { 351 PetscErrorCode ierr; 352 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 353 Mat_SeqAIJMKL *aijmkl; 354 355 PetscFunctionBegin; 356 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 357 358 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 359 * extra information and some different methods, call the AssemblyEnd 360 * routine for a MATSEQAIJ. 361 * I'm not sure if this is the best way to do this, but it avoids 362 * a lot of code duplication. */ 363 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 364 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 365 366 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 367 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 368 aijmkl = (Mat_SeqAIJMKL*)A->spptr; 369 if (aijmkl->eager_inspection) { 370 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 371 } 372 373 PetscFunctionReturn(0); 374 } 375 376 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 377 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 378 { 379 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 380 const PetscScalar *x; 381 PetscScalar *y; 382 const MatScalar *aa; 383 PetscErrorCode ierr; 384 PetscInt m = A->rmap->n; 385 PetscInt n = A->cmap->n; 386 PetscScalar alpha = 1.0; 387 PetscScalar beta = 0.0; 388 const PetscInt *aj,*ai; 389 char matdescra[6]; 390 391 /* Variables not in MatMult_SeqAIJ. */ 392 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 393 394 PetscFunctionBegin; 395 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 396 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 397 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 398 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 399 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 400 aa = a->a; /* Nonzero elements stored row-by-row. */ 401 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 402 403 /* Call MKL sparse BLAS routine to do the MatMult. */ 404 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 405 406 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 407 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 408 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 409 PetscFunctionReturn(0); 410 } 411 #endif 412 413 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 414 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 415 { 416 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 417 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 418 const PetscScalar *x; 419 PetscScalar *y; 420 PetscErrorCode ierr; 421 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 422 PetscObjectState state; 423 424 PetscFunctionBegin; 425 426 /* If there are no nonzero entries, zero yy and return immediately. */ 427 if (!a->nz) { 428 PetscInt i; 429 PetscInt m=A->rmap->n; 430 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 431 for (i=0; i<m; i++) { 432 y[i] = 0.0; 433 } 434 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 435 PetscFunctionReturn(0); 436 } 437 438 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 439 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 440 441 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 442 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 443 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 444 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 445 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 446 MatSeqAIJMKL_create_mkl_handle(A); 447 } 448 449 /* Call MKL SpMV2 executor routine to do the MatMult. */ 450 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 451 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 452 453 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 454 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 455 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 456 PetscFunctionReturn(0); 457 } 458 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 459 460 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 461 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 462 { 463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 464 const PetscScalar *x; 465 PetscScalar *y; 466 const MatScalar *aa; 467 PetscErrorCode ierr; 468 PetscInt m = A->rmap->n; 469 PetscInt n = A->cmap->n; 470 PetscScalar alpha = 1.0; 471 PetscScalar beta = 0.0; 472 const PetscInt *aj,*ai; 473 char matdescra[6]; 474 475 /* Variables not in MatMultTranspose_SeqAIJ. */ 476 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 477 478 PetscFunctionBegin; 479 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 480 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 481 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 482 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 483 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 484 aa = a->a; /* Nonzero elements stored row-by-row. */ 485 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 486 487 /* Call MKL sparse BLAS routine to do the MatMult. */ 488 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 489 490 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 491 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 492 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 493 PetscFunctionReturn(0); 494 } 495 #endif 496 497 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 498 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 499 { 500 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 501 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 502 const PetscScalar *x; 503 PetscScalar *y; 504 PetscErrorCode ierr; 505 sparse_status_t stat; 506 PetscObjectState state; 507 508 PetscFunctionBegin; 509 510 /* If there are no nonzero entries, zero yy and return immediately. */ 511 if (!a->nz) { 512 PetscInt i; 513 PetscInt n=A->cmap->n; 514 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 515 for (i=0; i<n; i++) { 516 y[i] = 0.0; 517 } 518 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 523 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 524 525 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 526 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 527 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 528 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 529 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 530 MatSeqAIJMKL_create_mkl_handle(A); 531 } 532 533 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 534 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 535 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 536 537 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 538 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 539 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 540 PetscFunctionReturn(0); 541 } 542 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 543 544 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 545 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 546 { 547 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 548 const PetscScalar *x; 549 PetscScalar *y,*z; 550 const MatScalar *aa; 551 PetscErrorCode ierr; 552 PetscInt m = A->rmap->n; 553 PetscInt n = A->cmap->n; 554 const PetscInt *aj,*ai; 555 PetscInt i; 556 557 /* Variables not in MatMultAdd_SeqAIJ. */ 558 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 559 PetscScalar alpha = 1.0; 560 PetscScalar beta; 561 char matdescra[6]; 562 563 PetscFunctionBegin; 564 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 565 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 566 567 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 568 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 569 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 570 aa = a->a; /* Nonzero elements stored row-by-row. */ 571 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 572 573 /* Call MKL sparse BLAS routine to do the MatMult. */ 574 if (zz == yy) { 575 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 576 beta = 1.0; 577 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 578 } else { 579 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 580 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 581 beta = 0.0; 582 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 583 for (i=0; i<m; i++) { 584 z[i] += y[i]; 585 } 586 } 587 588 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 589 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 590 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 591 PetscFunctionReturn(0); 592 } 593 #endif 594 595 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 596 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 597 { 598 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 599 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 600 const PetscScalar *x; 601 PetscScalar *y,*z; 602 PetscErrorCode ierr; 603 PetscInt m = A->rmap->n; 604 PetscInt i; 605 606 /* Variables not in MatMultAdd_SeqAIJ. */ 607 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 608 PetscObjectState state; 609 610 PetscFunctionBegin; 611 612 /* If there are no nonzero entries, set zz = yy and return immediately. */ 613 if (!a->nz) { 614 PetscInt i; 615 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 616 for (i=0; i<m; i++) { 617 z[i] = y[i]; 618 } 619 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 620 PetscFunctionReturn(0); 621 } 622 623 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 624 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 625 626 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 627 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 628 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 629 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 630 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 631 MatSeqAIJMKL_create_mkl_handle(A); 632 } 633 634 /* Call MKL sparse BLAS routine to do the MatMult. */ 635 if (zz == yy) { 636 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 637 * with alpha and beta both set to 1.0. */ 638 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 639 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 640 } else { 641 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 642 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 643 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 644 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 645 for (i=0; i<m; i++) { 646 z[i] += y[i]; 647 } 648 } 649 650 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 651 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 652 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 653 PetscFunctionReturn(0); 654 } 655 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 656 657 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 658 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 659 { 660 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 661 const PetscScalar *x; 662 PetscScalar *y,*z; 663 const MatScalar *aa; 664 PetscErrorCode ierr; 665 PetscInt m = A->rmap->n; 666 PetscInt n = A->cmap->n; 667 const PetscInt *aj,*ai; 668 PetscInt i; 669 670 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 671 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 672 PetscScalar alpha = 1.0; 673 PetscScalar beta; 674 char matdescra[6]; 675 676 PetscFunctionBegin; 677 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 678 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 679 680 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 681 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 682 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 683 aa = a->a; /* Nonzero elements stored row-by-row. */ 684 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 685 686 /* Call MKL sparse BLAS routine to do the MatMult. */ 687 if (zz == yy) { 688 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 689 beta = 1.0; 690 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 691 } else { 692 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 693 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 694 beta = 0.0; 695 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 696 for (i=0; i<n; i++) { 697 z[i] += y[i]; 698 } 699 } 700 701 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 702 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 703 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 #endif 707 708 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 709 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 710 { 711 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 712 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 713 const PetscScalar *x; 714 PetscScalar *y,*z; 715 PetscErrorCode ierr; 716 PetscInt n = A->cmap->n; 717 PetscInt i; 718 PetscObjectState state; 719 720 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 721 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 722 723 PetscFunctionBegin; 724 725 /* If there are no nonzero entries, set zz = yy and return immediately. */ 726 if (!a->nz) { 727 PetscInt i; 728 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 729 for (i=0; i<n; i++) { 730 z[i] = y[i]; 731 } 732 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 733 PetscFunctionReturn(0); 734 } 735 736 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 737 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 738 739 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 740 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 741 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 742 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 743 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 744 MatSeqAIJMKL_create_mkl_handle(A); 745 } 746 747 /* Call MKL sparse BLAS routine to do the MatMult. */ 748 if (zz == yy) { 749 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 750 * with alpha and beta both set to 1.0. */ 751 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 752 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 753 } else { 754 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 755 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 756 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 757 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 758 for (i=0; i<n; i++) { 759 z[i] += y[i]; 760 } 761 } 762 763 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 764 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 765 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 766 PetscFunctionReturn(0); 767 } 768 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 769 770 /* -------------------------- MatProduct code -------------------------- */ 771 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 772 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 773 { 774 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr; 775 sparse_matrix_t csrA,csrB,csrC; 776 PetscInt nrows,ncols; 777 PetscErrorCode ierr; 778 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 779 struct matrix_descr descr_type_gen; 780 PetscObjectState state; 781 782 PetscFunctionBegin; 783 /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does 784 * not handle sparse matrices with zero rows or columns. */ 785 if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N; 786 else nrows = A->cmap->N; 787 if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N; 788 else ncols = B->rmap->N; 789 790 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 791 if (!a->sparse_optimized || a->state != state) { 792 MatSeqAIJMKL_create_mkl_handle(A); 793 } 794 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 795 if (!b->sparse_optimized || b->state != state) { 796 MatSeqAIJMKL_create_mkl_handle(B); 797 } 798 csrA = a->csrA; 799 csrB = b->csrA; 800 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 801 802 if (csrA && csrB) { 803 stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC); 804 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()"); 805 } else { 806 csrC = PETSC_NULL; 807 } 808 809 ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);CHKERRQ(ierr); 810 811 PetscFunctionReturn(0); 812 } 813 814 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 815 { 816 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 817 sparse_matrix_t csrA, csrB, csrC; 818 PetscErrorCode ierr; 819 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 820 struct matrix_descr descr_type_gen; 821 PetscObjectState state; 822 823 PetscFunctionBegin; 824 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 825 if (!a->sparse_optimized || a->state != state) { 826 MatSeqAIJMKL_create_mkl_handle(A); 827 } 828 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 829 if (!b->sparse_optimized || b->state != state) { 830 MatSeqAIJMKL_create_mkl_handle(B); 831 } 832 csrA = a->csrA; 833 csrB = b->csrA; 834 csrC = c->csrA; 835 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 836 837 if (csrA && csrB) { 838 stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC); 839 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()"); 840 } else { 841 csrC = PETSC_NULL; 842 } 843 844 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 845 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 846 847 PetscFunctionReturn(0); 848 } 849 850 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 851 { 852 PetscErrorCode ierr; 853 854 PetscFunctionBegin; 855 ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 856 PetscFunctionReturn(0); 857 } 858 859 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 860 { 861 PetscErrorCode ierr; 862 863 PetscFunctionBegin; 864 ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 865 PetscFunctionReturn(0); 866 } 867 868 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 869 { 870 PetscErrorCode ierr; 871 872 PetscFunctionBegin; 873 ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 874 PetscFunctionReturn(0); 875 } 876 877 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 878 { 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 887 { 888 PetscErrorCode ierr; 889 890 PetscFunctionBegin; 891 ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr); 892 PetscFunctionReturn(0); 893 } 894 895 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 896 { 897 PetscErrorCode ierr; 898 899 PetscFunctionBegin; 900 ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 905 { 906 PetscErrorCode ierr; 907 Mat_Product *product = C->product; 908 Mat A = product->A,B = product->B; 909 910 PetscFunctionBegin; 911 ierr = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);CHKERRQ(ierr); 912 PetscFunctionReturn(0); 913 } 914 915 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 916 { 917 PetscErrorCode ierr; 918 Mat_Product *product = C->product; 919 Mat A = product->A,B = product->B; 920 PetscReal fill = product->fill; 921 922 PetscFunctionBegin; 923 ierr = MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);CHKERRQ(ierr); 924 C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL; 925 PetscFunctionReturn(0); 926 } 927 928 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C) 929 { 930 Mat Ct; 931 Vec zeros; 932 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 933 sparse_matrix_t csrA, csrP, csrC; 934 PetscBool set, flag; 935 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 936 struct matrix_descr descr_type_sym; 937 PetscObjectState state; 938 PetscErrorCode ierr; 939 940 PetscFunctionBegin; 941 ierr = MatIsSymmetricKnown(A,&set,&flag);CHKERRQ(ierr); 942 if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric"); 943 944 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 945 if (!a->sparse_optimized || a->state != state) { 946 MatSeqAIJMKL_create_mkl_handle(A); 947 } 948 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 949 if (!p->sparse_optimized || p->state != state) { 950 MatSeqAIJMKL_create_mkl_handle(P); 951 } 952 csrA = a->csrA; 953 csrP = p->csrA; 954 csrC = c->csrA; 955 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 956 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 957 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 958 959 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 960 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT); 961 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr()"); 962 963 /* Update the PETSc AIJ representation for matrix C from contents of MKL handle. 964 * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix, 965 * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix. 966 * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY 967 * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might 968 * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output 969 * the full matrix. */ 970 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 971 ierr = MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);CHKERRQ(ierr); 972 ierr = MatCreateVecs(C,&zeros,NULL);CHKERRQ(ierr); 973 ierr = VecSetFromOptions(zeros);CHKERRQ(ierr); 974 ierr = VecZeroEntries(zeros);CHKERRQ(ierr); 975 ierr = MatDiagonalSet(Ct,zeros,INSERT_VALUES);CHKERRQ(ierr); 976 ierr = MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 977 /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */ 978 ierr = MatProductCreateWithMat(A,P,PETSC_NULL,C);CHKERRQ(ierr); 979 ierr = MatProductSetType(C,MATPRODUCT_PtAP);CHKERRQ(ierr); 980 ierr = MatSeqAIJMKL_create_mkl_handle(C);CHKERRQ(ierr); 981 ierr = VecDestroy(&zeros);CHKERRQ(ierr); 982 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 983 PetscFunctionReturn(0); 984 } 985 986 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) 987 { 988 Mat_Product *product = C->product; 989 Mat A = product->A,P = product->B; 990 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr; 991 sparse_matrix_t csrA,csrP,csrC; 992 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 993 struct matrix_descr descr_type_sym; 994 PetscObjectState state; 995 PetscErrorCode ierr; 996 997 PetscFunctionBegin; 998 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 999 if (!a->sparse_optimized || a->state != state) { 1000 MatSeqAIJMKL_create_mkl_handle(A); 1001 } 1002 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 1003 if (!p->sparse_optimized || p->state != state) { 1004 MatSeqAIJMKL_create_mkl_handle(P); 1005 } 1006 csrA = a->csrA; 1007 csrP = p->csrA; 1008 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 1009 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 1010 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 1011 1012 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 1013 if (csrP && csrA) { 1014 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL); 1015 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr()"); 1016 } else { 1017 csrC = PETSC_NULL; 1018 } 1019 1020 /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle. 1021 * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain 1022 * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that 1023 * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this 1024 * is guaranteed. */ 1025 ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);CHKERRQ(ierr); 1026 1027 C->ops->productnumeric = MatProductNumeric_PtAP; 1028 PetscFunctionReturn(0); 1029 } 1030 1031 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) 1032 { 1033 PetscFunctionBegin; 1034 C->ops->productsymbolic = MatProductSymbolic_AB; 1035 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1036 PetscFunctionReturn(0); 1037 } 1038 1039 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) 1040 { 1041 PetscFunctionBegin; 1042 C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL; 1043 PetscFunctionReturn(0); 1044 } 1045 1046 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) 1047 { 1048 PetscFunctionBegin; 1049 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 1050 C->ops->productsymbolic = MatProductSymbolic_ABt; 1051 PetscFunctionReturn(0); 1052 } 1053 1054 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) 1055 { 1056 PetscErrorCode ierr; 1057 Mat_Product *product = C->product; 1058 Mat A = product->A; 1059 PetscBool set, flag; 1060 1061 PetscFunctionBegin; 1062 if (PetscDefined(USE_COMPLEX)) { 1063 /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used. 1064 * We do this in several other locations in this file. This works for the time being, but these 1065 * routines are considered unsafe and may be removed from the MatProduct code in the future. 1066 * TODO: Add proper MATSEQAIJMKL implementations */ 1067 C->ops->productsymbolic = NULL; 1068 } else { 1069 /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */ 1070 ierr = MatIsSymmetricKnown(A,&set,&flag);CHKERRQ(ierr); 1071 if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1072 else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 1073 /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(), 1074 * depending on whether the algorithm for the general case vs. the real symmetric one is used. */ 1075 } 1076 PetscFunctionReturn(0); 1077 } 1078 1079 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) 1080 { 1081 PetscFunctionBegin; 1082 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 1083 PetscFunctionReturn(0); 1084 } 1085 1086 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) 1087 { 1088 PetscFunctionBegin; 1089 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 1090 PetscFunctionReturn(0); 1091 } 1092 1093 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) 1094 { 1095 PetscErrorCode ierr; 1096 Mat_Product *product = C->product; 1097 1098 PetscFunctionBegin; 1099 switch (product->type) { 1100 case MATPRODUCT_AB: 1101 ierr = MatProductSetFromOptions_SeqAIJMKL_AB(C);CHKERRQ(ierr); 1102 break; 1103 case MATPRODUCT_AtB: 1104 ierr = MatProductSetFromOptions_SeqAIJMKL_AtB(C);CHKERRQ(ierr); 1105 break; 1106 case MATPRODUCT_ABt: 1107 ierr = MatProductSetFromOptions_SeqAIJMKL_ABt(C);CHKERRQ(ierr); 1108 break; 1109 case MATPRODUCT_PtAP: 1110 ierr = MatProductSetFromOptions_SeqAIJMKL_PtAP(C);CHKERRQ(ierr); 1111 break; 1112 case MATPRODUCT_RARt: 1113 ierr = MatProductSetFromOptions_SeqAIJMKL_RARt(C);CHKERRQ(ierr); 1114 break; 1115 case MATPRODUCT_ABC: 1116 ierr = MatProductSetFromOptions_SeqAIJMKL_ABC(C);CHKERRQ(ierr); 1117 break; 1118 default: 1119 break; 1120 } 1121 PetscFunctionReturn(0); 1122 } 1123 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 1124 /* ------------------------ End MatProduct code ------------------------ */ 1125 1126 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 1127 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 1128 * routine, but can also be used to convert an assembled SeqAIJ matrix 1129 * into a SeqAIJMKL one. */ 1130 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 1131 { 1132 PetscErrorCode ierr; 1133 Mat B = *newmat; 1134 Mat_SeqAIJMKL *aijmkl; 1135 PetscBool set; 1136 PetscBool sametype; 1137 1138 PetscFunctionBegin; 1139 if (reuse == MAT_INITIAL_MATRIX) { 1140 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1141 } 1142 1143 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 1144 if (sametype) PetscFunctionReturn(0); 1145 1146 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 1147 B->spptr = (void*) aijmkl; 1148 1149 /* Set function pointers for methods that we inherit from AIJ but override. 1150 * We also parse some command line options below, since those determine some of the methods we point to. */ 1151 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 1152 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 1153 B->ops->destroy = MatDestroy_SeqAIJMKL; 1154 1155 aijmkl->sparse_optimized = PETSC_FALSE; 1156 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1157 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 1158 #else 1159 aijmkl->no_SpMV2 = PETSC_TRUE; 1160 #endif 1161 aijmkl->eager_inspection = PETSC_FALSE; 1162 1163 /* Parse command line options. */ 1164 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 1165 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 1166 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 1167 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1168 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1169 if (!aijmkl->no_SpMV2) { 1170 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n");CHKERRQ(ierr); 1171 aijmkl->no_SpMV2 = PETSC_TRUE; 1172 } 1173 #endif 1174 1175 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1176 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 1177 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 1178 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1179 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 1180 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1181 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL; 1182 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1183 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1184 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL; 1185 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1186 # if !defined(PETSC_USE_COMPLEX) 1187 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1188 # else 1189 B->ops->ptapnumeric = NULL; 1190 # endif 1191 # endif 1192 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1193 1194 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1195 /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1196 * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1197 * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1198 * versions in which the older interface has not been deprecated, we use the old interface. */ 1199 if (aijmkl->no_SpMV2) { 1200 B->ops->mult = MatMult_SeqAIJMKL; 1201 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 1202 B->ops->multadd = MatMultAdd_SeqAIJMKL; 1203 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1204 } 1205 #endif 1206 1207 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 1208 1209 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 1210 *newmat = B; 1211 PetscFunctionReturn(0); 1212 } 1213 1214 /*@C 1215 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 1216 This type inherits from AIJ and is largely identical, but uses sparse BLAS 1217 routines from Intel MKL whenever possible. 1218 If the installed version of MKL supports the "SpMV2" sparse 1219 inspector-executor routines, then those are used by default. 1220 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 1221 symmetric A) operations are currently supported. 1222 Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 1223 1224 Collective 1225 1226 Input Parameters: 1227 + comm - MPI communicator, set to PETSC_COMM_SELF 1228 . m - number of rows 1229 . n - number of columns 1230 . nz - number of nonzeros per row (same for all rows) 1231 - nnz - array containing the number of nonzeros in the various rows 1232 (possibly different for each row) or NULL 1233 1234 Output Parameter: 1235 . A - the matrix 1236 1237 Options Database Keys: 1238 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1239 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 1240 1241 Notes: 1242 If nnz is given then nz is ignored 1243 1244 Level: intermediate 1245 1246 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 1247 @*/ 1248 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1249 { 1250 PetscErrorCode ierr; 1251 1252 PetscFunctionBegin; 1253 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1254 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1255 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 1256 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 1257 PetscFunctionReturn(0); 1258 } 1259 1260 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1261 { 1262 PetscErrorCode ierr; 1263 1264 PetscFunctionBegin; 1265 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1266 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 1267 PetscFunctionReturn(0); 1268 } 1269