1 2 /* 3 Defines basic operations for the MATSEQAIJMKL matrix class. 4 This class is derived from the MATSEQAIJ class and retains the 5 compressed row storage (aka Yale sparse matrix format) but uses 6 sparse BLAS operations from the Intel Math Kernel Library (MKL) 7 wherever possible. 8 */ 9 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12 #include <mkl_spblas.h> 13 14 typedef struct { 15 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 16 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 PetscObjectState state; 19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 28 { 29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 30 /* so we will ignore 'MatType type'. */ 31 PetscErrorCode ierr; 32 Mat B = *newmat; 33 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 34 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 35 #endif 36 37 PetscFunctionBegin; 38 if (reuse == MAT_INITIAL_MATRIX) { 39 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 45 B->ops->destroy = MatDestroy_SeqAIJ; 46 B->ops->mult = MatMult_SeqAIJ; 47 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 48 B->ops->multadd = MatMultAdd_SeqAIJ; 49 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ; 51 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 52 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 53 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 54 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; 55 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 56 57 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 58 59 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 60 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 61 * simply involves destroying the MKL sparse matrix handle and then freeing 62 * the spptr pointer. */ 63 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 64 65 if (aijmkl->sparse_optimized) { 66 sparse_status_t stat; 67 stat = mkl_sparse_destroy(aijmkl->csrA); 68 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize()"); 69 } 70 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 71 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 72 73 /* Change the type of B to MATSEQAIJ. */ 74 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 75 76 *newmat = B; 77 PetscFunctionReturn(0); 78 } 79 80 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 81 { 82 PetscErrorCode ierr; 83 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 84 85 PetscFunctionBegin; 86 87 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */ 88 if (aijmkl) { 89 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 90 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 91 if (aijmkl->sparse_optimized) { 92 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 93 stat = mkl_sparse_destroy(aijmkl->csrA); 94 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()"); 95 } 96 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 97 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 98 } 99 100 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 101 * to destroy everything that remains. */ 102 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 103 /* Note that I don't call MatSetType(). I believe this is because that 104 * is only to be called when *building* a matrix. I could be wrong, but 105 * that is how things work for the SuperLU matrix class. */ 106 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 107 PetscFunctionReturn(0); 108 } 109 110 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 111 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 112 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 113 * handle, creates a new one, and then calls mkl_sparse_optimize(). 114 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 115 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 116 * an unoptimized matrix handle here. */ 117 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 118 { 119 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 120 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 121 * does nothing. We make it callable anyway in this case because it cuts 122 * down on littering the code with #ifdefs. */ 123 PetscFunctionBegin; 124 PetscFunctionReturn(0); 125 #else 126 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 127 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 128 PetscInt m,n; 129 MatScalar *aa; 130 PetscInt *aj,*ai; 131 sparse_status_t stat; 132 PetscErrorCode ierr; 133 134 PetscFunctionBegin; 135 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 136 /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 137 * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 138 * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 139 * mkl_sparse_optimize() later. */ 140 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 141 #endif 142 143 if (aijmkl->sparse_optimized) { 144 /* Matrix has been previously assembled and optimized. Must destroy old 145 * matrix handle before running the optimization step again. */ 146 stat = mkl_sparse_destroy(aijmkl->csrA); 147 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()"); 148 } 149 aijmkl->sparse_optimized = PETSC_FALSE; 150 151 /* Now perform the SpMV2 setup and matrix optimization. */ 152 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 153 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 154 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 155 m = A->rmap->n; 156 n = A->cmap->n; 157 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 158 aa = a->a; /* Nonzero elements stored row-by-row. */ 159 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 160 if (a->nz && aa && !A->structure_only) { 161 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 162 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 163 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 164 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle, mkl_sparse_x_create_csr()"); 165 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 166 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()"); 167 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 168 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()"); 169 if (!aijmkl->no_SpMV2) { 170 stat = mkl_sparse_optimize(aijmkl->csrA); 171 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize()"); 172 } 173 aijmkl->sparse_optimized = PETSC_TRUE; 174 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 175 } else { 176 aijmkl->csrA = PETSC_NULL; 177 } 178 179 PetscFunctionReturn(0); 180 #endif 181 } 182 183 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 184 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */ 185 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A) 186 { 187 PetscErrorCode ierr; 188 sparse_status_t stat; 189 sparse_index_base_t indexing; 190 PetscInt m,n; 191 PetscInt *aj,*ai,*dummy; 192 MatScalar *aa; 193 Mat_SeqAIJMKL *aijmkl; 194 195 if (csrA) { 196 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 197 stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa); 198 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 199 if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()"); 200 } else { 201 aj = ai = PETSC_NULL; 202 aa = PETSC_NULL; 203 } 204 205 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 206 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 207 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 208 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 209 * they will be destroyed when the MKL handle is destroyed. 210 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 211 if (csrA) { 212 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);CHKERRQ(ierr); 213 } else { 214 /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */ 215 ierr = MatSetUp(A);CHKERRQ(ierr); 216 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 217 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 218 } 219 220 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 221 * Now turn it into a MATSEQAIJMKL. */ 222 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 223 224 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 225 aijmkl->csrA = csrA; 226 227 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 228 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 229 * and just need to be able to run the MKL optimization step. */ 230 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 231 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 232 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 233 if (csrA) { 234 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 235 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()"); 236 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 237 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()"); 238 } 239 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 240 PetscFunctionReturn(0); 241 } 242 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 243 244 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 245 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 246 * MatMatMultNumeric(). */ 247 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 248 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 249 { 250 PetscInt i; 251 PetscInt nrows,ncols; 252 PetscInt nz; 253 PetscInt *ai,*aj,*dummy; 254 PetscScalar *aa; 255 PetscErrorCode ierr; 256 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 257 sparse_status_t stat; 258 sparse_index_base_t indexing; 259 260 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 261 if (!aijmkl->csrA) PetscFunctionReturn(0); 262 263 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 264 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 265 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 266 267 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 268 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 269 for (i=0; i<nrows; i++) { 270 nz = ai[i+1] - ai[i]; 271 ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr); 272 } 273 274 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 275 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 276 277 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 278 /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation. 279 * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed 280 * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */ 281 aijmkl->sparse_optimized = PETSC_FALSE; 282 PetscFunctionReturn(0); 283 } 284 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 285 286 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 287 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer) 288 { 289 PetscInt i,j,k; 290 PetscInt nrows,ncols; 291 PetscInt nz; 292 PetscInt *ai,*aj,*dummy; 293 PetscScalar *aa; 294 PetscErrorCode ierr; 295 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 296 sparse_status_t stat; 297 sparse_index_base_t indexing; 298 299 ierr = PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");CHKERRQ(ierr); 300 301 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 302 if (!aijmkl->csrA) { 303 ierr = PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");CHKERRQ(ierr); 304 PetscFunctionReturn(0); 305 } 306 307 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 308 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 309 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 310 311 k = 0; 312 for (i=0; i<nrows; i++) { 313 ierr = PetscViewerASCIIPrintf(viewer,"row %D: ",i);CHKERRQ(ierr); 314 nz = ai[i+1] - ai[i]; 315 for (j=0; j<nz; j++) { 316 if (aa) { 317 ierr = PetscViewerASCIIPrintf(viewer,"(%D, %g) ",aj[k],PetscRealPart(aa[k]));CHKERRQ(ierr); 318 } else { 319 ierr = PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);CHKERRQ(ierr); 320 } 321 k++; 322 } 323 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 324 } 325 PetscFunctionReturn(0); 326 } 327 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 328 329 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 330 { 331 PetscErrorCode ierr; 332 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 333 Mat_SeqAIJMKL *aijmkl_dest; 334 335 PetscFunctionBegin; 336 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 337 aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr; 338 ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr); 339 aijmkl_dest->sparse_optimized = PETSC_FALSE; 340 if (aijmkl->eager_inspection) { 341 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 342 } 343 PetscFunctionReturn(0); 344 } 345 346 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 347 { 348 PetscErrorCode ierr; 349 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 350 Mat_SeqAIJMKL *aijmkl; 351 352 PetscFunctionBegin; 353 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 354 355 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 356 * extra information and some different methods, call the AssemblyEnd 357 * routine for a MATSEQAIJ. 358 * I'm not sure if this is the best way to do this, but it avoids 359 * a lot of code duplication. */ 360 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 361 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 362 363 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 364 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 365 aijmkl = (Mat_SeqAIJMKL*)A->spptr; 366 if (aijmkl->eager_inspection) { 367 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 368 } 369 370 PetscFunctionReturn(0); 371 } 372 373 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 374 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 375 { 376 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 377 const PetscScalar *x; 378 PetscScalar *y; 379 const MatScalar *aa; 380 PetscErrorCode ierr; 381 PetscInt m = A->rmap->n; 382 PetscInt n = A->cmap->n; 383 PetscScalar alpha = 1.0; 384 PetscScalar beta = 0.0; 385 const PetscInt *aj,*ai; 386 char matdescra[6]; 387 388 /* Variables not in MatMult_SeqAIJ. */ 389 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 390 391 PetscFunctionBegin; 392 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 393 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 394 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 395 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 396 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 397 aa = a->a; /* Nonzero elements stored row-by-row. */ 398 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 399 400 /* Call MKL sparse BLAS routine to do the MatMult. */ 401 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 402 403 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 404 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 405 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 406 PetscFunctionReturn(0); 407 } 408 #endif 409 410 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 411 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 412 { 413 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 414 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 415 const PetscScalar *x; 416 PetscScalar *y; 417 PetscErrorCode ierr; 418 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 419 PetscObjectState state; 420 421 PetscFunctionBegin; 422 423 /* If there are no nonzero entries, zero yy and return immediately. */ 424 if (!a->nz) { 425 PetscInt i; 426 PetscInt m=A->rmap->n; 427 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 428 for (i=0; i<m; i++) { 429 y[i] = 0.0; 430 } 431 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 432 PetscFunctionReturn(0); 433 } 434 435 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 436 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 437 438 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 439 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 440 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 441 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 442 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 443 MatSeqAIJMKL_create_mkl_handle(A); 444 } 445 446 /* Call MKL SpMV2 executor routine to do the MatMult. */ 447 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 448 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 449 450 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 451 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 452 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 453 PetscFunctionReturn(0); 454 } 455 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 456 457 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 458 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 459 { 460 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 461 const PetscScalar *x; 462 PetscScalar *y; 463 const MatScalar *aa; 464 PetscErrorCode ierr; 465 PetscInt m = A->rmap->n; 466 PetscInt n = A->cmap->n; 467 PetscScalar alpha = 1.0; 468 PetscScalar beta = 0.0; 469 const PetscInt *aj,*ai; 470 char matdescra[6]; 471 472 /* Variables not in MatMultTranspose_SeqAIJ. */ 473 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 474 475 PetscFunctionBegin; 476 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 477 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 478 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 479 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 480 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 481 aa = a->a; /* Nonzero elements stored row-by-row. */ 482 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 483 484 /* Call MKL sparse BLAS routine to do the MatMult. */ 485 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 486 487 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 488 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 489 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 490 PetscFunctionReturn(0); 491 } 492 #endif 493 494 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 495 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 496 { 497 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 498 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 499 const PetscScalar *x; 500 PetscScalar *y; 501 PetscErrorCode ierr; 502 sparse_status_t stat; 503 PetscObjectState state; 504 505 PetscFunctionBegin; 506 507 /* If there are no nonzero entries, zero yy and return immediately. */ 508 if (!a->nz) { 509 PetscInt i; 510 PetscInt n=A->cmap->n; 511 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 512 for (i=0; i<n; i++) { 513 y[i] = 0.0; 514 } 515 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 516 PetscFunctionReturn(0); 517 } 518 519 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 520 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 521 522 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 523 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 524 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 525 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 526 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 527 MatSeqAIJMKL_create_mkl_handle(A); 528 } 529 530 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 531 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 532 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 533 534 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 535 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 536 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 537 PetscFunctionReturn(0); 538 } 539 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 540 541 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 542 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 543 { 544 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 545 const PetscScalar *x; 546 PetscScalar *y,*z; 547 const MatScalar *aa; 548 PetscErrorCode ierr; 549 PetscInt m = A->rmap->n; 550 PetscInt n = A->cmap->n; 551 const PetscInt *aj,*ai; 552 PetscInt i; 553 554 /* Variables not in MatMultAdd_SeqAIJ. */ 555 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 556 PetscScalar alpha = 1.0; 557 PetscScalar beta; 558 char matdescra[6]; 559 560 PetscFunctionBegin; 561 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 562 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 563 564 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 565 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 566 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 567 aa = a->a; /* Nonzero elements stored row-by-row. */ 568 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 569 570 /* Call MKL sparse BLAS routine to do the MatMult. */ 571 if (zz == yy) { 572 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 573 beta = 1.0; 574 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 575 } else { 576 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 577 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 578 beta = 0.0; 579 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 580 for (i=0; i<m; i++) { 581 z[i] += y[i]; 582 } 583 } 584 585 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 586 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 587 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 588 PetscFunctionReturn(0); 589 } 590 #endif 591 592 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 593 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 594 { 595 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 596 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 597 const PetscScalar *x; 598 PetscScalar *y,*z; 599 PetscErrorCode ierr; 600 PetscInt m = A->rmap->n; 601 PetscInt i; 602 603 /* Variables not in MatMultAdd_SeqAIJ. */ 604 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 605 PetscObjectState state; 606 607 PetscFunctionBegin; 608 609 /* If there are no nonzero entries, set zz = yy and return immediately. */ 610 if (!a->nz) { 611 PetscInt i; 612 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 613 for (i=0; i<m; i++) { 614 z[i] = y[i]; 615 } 616 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 617 PetscFunctionReturn(0); 618 } 619 620 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 621 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 622 623 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 624 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 625 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 626 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 627 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 628 MatSeqAIJMKL_create_mkl_handle(A); 629 } 630 631 /* Call MKL sparse BLAS routine to do the MatMult. */ 632 if (zz == yy) { 633 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 634 * with alpha and beta both set to 1.0. */ 635 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 636 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 637 } else { 638 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 639 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 640 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 641 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 642 for (i=0; i<m; i++) { 643 z[i] += y[i]; 644 } 645 } 646 647 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 648 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 649 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 653 654 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 655 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 656 { 657 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 658 const PetscScalar *x; 659 PetscScalar *y,*z; 660 const MatScalar *aa; 661 PetscErrorCode ierr; 662 PetscInt m = A->rmap->n; 663 PetscInt n = A->cmap->n; 664 const PetscInt *aj,*ai; 665 PetscInt i; 666 667 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 668 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 669 PetscScalar alpha = 1.0; 670 PetscScalar beta; 671 char matdescra[6]; 672 673 PetscFunctionBegin; 674 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 675 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 676 677 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 678 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 679 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 680 aa = a->a; /* Nonzero elements stored row-by-row. */ 681 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 682 683 /* Call MKL sparse BLAS routine to do the MatMult. */ 684 if (zz == yy) { 685 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 686 beta = 1.0; 687 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 688 } else { 689 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 690 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 691 beta = 0.0; 692 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 693 for (i=0; i<n; i++) { 694 z[i] += y[i]; 695 } 696 } 697 698 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 699 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 700 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 #endif 704 705 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 706 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 707 { 708 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 709 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 710 const PetscScalar *x; 711 PetscScalar *y,*z; 712 PetscErrorCode ierr; 713 PetscInt n = A->cmap->n; 714 PetscInt i; 715 PetscObjectState state; 716 717 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 718 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 719 720 PetscFunctionBegin; 721 722 /* If there are no nonzero entries, set zz = yy and return immediately. */ 723 if (!a->nz) { 724 PetscInt i; 725 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 726 for (i=0; i<n; i++) { 727 z[i] = y[i]; 728 } 729 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 730 PetscFunctionReturn(0); 731 } 732 733 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 734 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 735 736 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 737 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 738 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 739 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 740 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 741 MatSeqAIJMKL_create_mkl_handle(A); 742 } 743 744 /* Call MKL sparse BLAS routine to do the MatMult. */ 745 if (zz == yy) { 746 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 747 * with alpha and beta both set to 1.0. */ 748 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 749 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 750 } else { 751 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 752 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 753 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 754 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 755 for (i=0; i<n; i++) { 756 z[i] += y[i]; 757 } 758 } 759 760 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 761 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 762 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 763 PetscFunctionReturn(0); 764 } 765 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 766 767 /* -------------------------- MatProduct code -------------------------- */ 768 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 769 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 770 { 771 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr; 772 sparse_matrix_t csrA,csrB,csrC; 773 PetscInt nrows,ncols; 774 PetscErrorCode ierr; 775 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 776 struct matrix_descr descr_type_gen; 777 PetscObjectState state; 778 779 PetscFunctionBegin; 780 /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does 781 * not handle sparse matrices with zero rows or columns. */ 782 if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N; 783 else nrows = A->cmap->N; 784 if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N; 785 else ncols = B->rmap->N; 786 787 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 788 if (!a->sparse_optimized || a->state != state) { 789 MatSeqAIJMKL_create_mkl_handle(A); 790 } 791 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 792 if (!b->sparse_optimized || b->state != state) { 793 MatSeqAIJMKL_create_mkl_handle(B); 794 } 795 csrA = a->csrA; 796 csrB = b->csrA; 797 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 798 799 if (csrA && csrB) { 800 stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC); 801 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()"); 802 } else { 803 csrC = PETSC_NULL; 804 } 805 806 ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);CHKERRQ(ierr); 807 808 PetscFunctionReturn(0); 809 } 810 811 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 812 { 813 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 814 sparse_matrix_t csrA, csrB, csrC; 815 PetscErrorCode ierr; 816 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 817 struct matrix_descr descr_type_gen; 818 PetscObjectState state; 819 820 PetscFunctionBegin; 821 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 822 if (!a->sparse_optimized || a->state != state) { 823 MatSeqAIJMKL_create_mkl_handle(A); 824 } 825 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 826 if (!b->sparse_optimized || b->state != state) { 827 MatSeqAIJMKL_create_mkl_handle(B); 828 } 829 csrA = a->csrA; 830 csrB = b->csrA; 831 csrC = c->csrA; 832 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 833 834 if (csrA && csrB) { 835 stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC); 836 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()"); 837 } else { 838 csrC = PETSC_NULL; 839 } 840 841 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 842 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 843 844 PetscFunctionReturn(0); 845 } 846 847 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 848 { 849 PetscErrorCode ierr; 850 851 PetscFunctionBegin; 852 ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 853 PetscFunctionReturn(0); 854 } 855 856 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 857 { 858 PetscErrorCode ierr; 859 860 PetscFunctionBegin; 861 ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 862 PetscFunctionReturn(0); 863 } 864 865 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 866 { 867 PetscErrorCode ierr; 868 869 PetscFunctionBegin; 870 ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 875 { 876 PetscErrorCode ierr; 877 878 PetscFunctionBegin; 879 ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 880 PetscFunctionReturn(0); 881 } 882 883 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 884 { 885 PetscErrorCode ierr; 886 887 PetscFunctionBegin; 888 ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr); 889 PetscFunctionReturn(0); 890 } 891 892 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 893 { 894 PetscErrorCode ierr; 895 896 PetscFunctionBegin; 897 ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr); 898 PetscFunctionReturn(0); 899 } 900 901 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 902 { 903 PetscErrorCode ierr; 904 Mat_Product *product = C->product; 905 Mat A = product->A,B = product->B; 906 907 PetscFunctionBegin; 908 ierr = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);CHKERRQ(ierr); 909 PetscFunctionReturn(0); 910 } 911 912 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 913 { 914 PetscErrorCode ierr; 915 Mat_Product *product = C->product; 916 Mat A = product->A,B = product->B; 917 PetscReal fill = product->fill; 918 919 PetscFunctionBegin; 920 ierr = MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);CHKERRQ(ierr); 921 C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL; 922 PetscFunctionReturn(0); 923 } 924 925 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C) 926 { 927 Mat Ct; 928 Vec zeros; 929 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 930 sparse_matrix_t csrA, csrP, csrC; 931 PetscBool set, flag; 932 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 933 struct matrix_descr descr_type_sym; 934 PetscObjectState state; 935 PetscErrorCode ierr; 936 937 PetscFunctionBegin; 938 ierr = MatIsSymmetricKnown(A,&set,&flag); 939 if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric"); 940 941 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 942 if (!a->sparse_optimized || a->state != state) { 943 MatSeqAIJMKL_create_mkl_handle(A); 944 } 945 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 946 if (!p->sparse_optimized || p->state != state) { 947 MatSeqAIJMKL_create_mkl_handle(P); 948 } 949 csrA = a->csrA; 950 csrP = p->csrA; 951 csrC = c->csrA; 952 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 953 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 954 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 955 956 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 957 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT); 958 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr()"); 959 960 /* Update the PETSc AIJ representation for matrix C from contents of MKL handle. 961 * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix, 962 * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix. 963 * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY 964 * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might 965 * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output 966 * the full matrix. */ 967 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 968 ierr = MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);CHKERRQ(ierr); 969 ierr = MatCreateVecs(C,&zeros,NULL);CHKERRQ(ierr); 970 ierr = VecSetFromOptions(zeros);CHKERRQ(ierr); 971 ierr = VecZeroEntries(zeros);CHKERRQ(ierr); 972 ierr = MatDiagonalSet(Ct,zeros,INSERT_VALUES);CHKERRQ(ierr); 973 ierr = MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 974 /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */ 975 ierr = MatProductCreateWithMat(A,P,PETSC_NULL,C);CHKERRQ(ierr); 976 ierr = MatProductSetType(C,MATPRODUCT_PtAP);CHKERRQ(ierr); 977 ierr = MatSeqAIJMKL_create_mkl_handle(C);CHKERRQ(ierr); 978 ierr = VecDestroy(&zeros);CHKERRQ(ierr); 979 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 980 PetscFunctionReturn(0); 981 } 982 983 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) 984 { 985 Mat_Product *product = C->product; 986 Mat A = product->A,P = product->B; 987 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr; 988 sparse_matrix_t csrA,csrP,csrC; 989 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 990 struct matrix_descr descr_type_sym; 991 PetscObjectState state; 992 PetscErrorCode ierr; 993 994 PetscFunctionBegin; 995 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 996 if (!a->sparse_optimized || a->state != state) { 997 MatSeqAIJMKL_create_mkl_handle(A); 998 } 999 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 1000 if (!p->sparse_optimized || p->state != state) { 1001 MatSeqAIJMKL_create_mkl_handle(P); 1002 } 1003 csrA = a->csrA; 1004 csrP = p->csrA; 1005 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 1006 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 1007 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 1008 1009 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 1010 if (csrP && csrA) { 1011 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL); 1012 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr()"); 1013 } else { 1014 csrC = PETSC_NULL; 1015 } 1016 1017 /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle. 1018 * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain 1019 * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that 1020 * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this 1021 * is guaranteed. */ 1022 ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);CHKERRQ(ierr); 1023 1024 C->ops->productnumeric = MatProductNumeric_PtAP; 1025 PetscFunctionReturn(0); 1026 } 1027 1028 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) 1029 { 1030 PetscFunctionBegin; 1031 C->ops->productsymbolic = MatProductSymbolic_AB; 1032 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1033 PetscFunctionReturn(0); 1034 } 1035 1036 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) 1037 { 1038 PetscFunctionBegin; 1039 C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL; 1040 PetscFunctionReturn(0); 1041 } 1042 1043 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) 1044 { 1045 PetscFunctionBegin; 1046 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 1047 C->ops->productsymbolic = MatProductSymbolic_ABt; 1048 PetscFunctionReturn(0); 1049 } 1050 1051 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) 1052 { 1053 PetscErrorCode ierr; 1054 Mat_Product *product = C->product; 1055 Mat A = product->A; 1056 PetscBool set, flag; 1057 1058 PetscFunctionBegin; 1059 if (PetscDefined(USE_COMPLEX)) { 1060 /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used. 1061 * We do this in several other locations in this file. This works for the time being, but these 1062 * routines are considered unsafe and may be removed from the MatProduct code in the future. 1063 * TODO: Add proper MATSEQAIJMKL implementations */ 1064 C->ops->productsymbolic = NULL; 1065 } else { 1066 /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */ 1067 ierr = MatIsSymmetricKnown(A,&set,&flag);CHKERRQ(ierr); 1068 if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1069 else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 1070 /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(), 1071 * depending on whether the algorithm for the general case vs. the real symmetric one is used. */ 1072 } 1073 PetscFunctionReturn(0); 1074 } 1075 1076 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) 1077 { 1078 PetscFunctionBegin; 1079 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 1080 PetscFunctionReturn(0); 1081 } 1082 1083 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) 1084 { 1085 PetscFunctionBegin; 1086 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 1087 PetscFunctionReturn(0); 1088 } 1089 1090 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) 1091 { 1092 PetscErrorCode ierr; 1093 Mat_Product *product = C->product; 1094 1095 PetscFunctionBegin; 1096 switch (product->type) { 1097 case MATPRODUCT_AB: 1098 ierr = MatProductSetFromOptions_SeqAIJMKL_AB(C);CHKERRQ(ierr); 1099 break; 1100 case MATPRODUCT_AtB: 1101 ierr = MatProductSetFromOptions_SeqAIJMKL_AtB(C);CHKERRQ(ierr); 1102 break; 1103 case MATPRODUCT_ABt: 1104 ierr = MatProductSetFromOptions_SeqAIJMKL_ABt(C);CHKERRQ(ierr); 1105 break; 1106 case MATPRODUCT_PtAP: 1107 ierr = MatProductSetFromOptions_SeqAIJMKL_PtAP(C);CHKERRQ(ierr); 1108 break; 1109 case MATPRODUCT_RARt: 1110 ierr = MatProductSetFromOptions_SeqAIJMKL_RARt(C);CHKERRQ(ierr); 1111 break; 1112 case MATPRODUCT_ABC: 1113 ierr = MatProductSetFromOptions_SeqAIJMKL_ABC(C);CHKERRQ(ierr); 1114 break; 1115 default: 1116 break; 1117 } 1118 PetscFunctionReturn(0); 1119 } 1120 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 1121 /* ------------------------ End MatProduct code ------------------------ */ 1122 1123 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 1124 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 1125 * routine, but can also be used to convert an assembled SeqAIJ matrix 1126 * into a SeqAIJMKL one. */ 1127 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 1128 { 1129 PetscErrorCode ierr; 1130 Mat B = *newmat; 1131 Mat_SeqAIJMKL *aijmkl; 1132 PetscBool set; 1133 PetscBool sametype; 1134 1135 PetscFunctionBegin; 1136 if (reuse == MAT_INITIAL_MATRIX) { 1137 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1138 } 1139 1140 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 1141 if (sametype) PetscFunctionReturn(0); 1142 1143 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 1144 B->spptr = (void*) aijmkl; 1145 1146 /* Set function pointers for methods that we inherit from AIJ but override. 1147 * We also parse some command line options below, since those determine some of the methods we point to. */ 1148 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 1149 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 1150 B->ops->destroy = MatDestroy_SeqAIJMKL; 1151 1152 aijmkl->sparse_optimized = PETSC_FALSE; 1153 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1154 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 1155 #else 1156 aijmkl->no_SpMV2 = PETSC_TRUE; 1157 #endif 1158 aijmkl->eager_inspection = PETSC_FALSE; 1159 1160 /* Parse command line options. */ 1161 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 1162 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 1163 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 1164 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1165 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1166 if (!aijmkl->no_SpMV2) { 1167 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 1168 aijmkl->no_SpMV2 = PETSC_TRUE; 1169 } 1170 #endif 1171 1172 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1173 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 1174 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 1175 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1176 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 1177 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1178 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL; 1179 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1180 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1181 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL; 1182 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1183 # if !defined(PETSC_USE_COMPLEX) 1184 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1185 # else 1186 B->ops->ptapnumeric = NULL; 1187 # endif 1188 # endif 1189 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1190 1191 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1192 /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1193 * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1194 * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1195 * versions in which the older interface has not been deprecated, we use the old interface. */ 1196 if (aijmkl->no_SpMV2) { 1197 B->ops->mult = MatMult_SeqAIJMKL; 1198 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 1199 B->ops->multadd = MatMultAdd_SeqAIJMKL; 1200 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1201 } 1202 #endif 1203 1204 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 1205 1206 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 1207 *newmat = B; 1208 PetscFunctionReturn(0); 1209 } 1210 1211 /*@C 1212 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 1213 This type inherits from AIJ and is largely identical, but uses sparse BLAS 1214 routines from Intel MKL whenever possible. 1215 If the installed version of MKL supports the "SpMV2" sparse 1216 inspector-executor routines, then those are used by default. 1217 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 1218 symmetric A) operations are currently supported. 1219 Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 1220 1221 Collective 1222 1223 Input Parameters: 1224 + comm - MPI communicator, set to PETSC_COMM_SELF 1225 . m - number of rows 1226 . n - number of columns 1227 . nz - number of nonzeros per row (same for all rows) 1228 - nnz - array containing the number of nonzeros in the various rows 1229 (possibly different for each row) or NULL 1230 1231 Output Parameter: 1232 . A - the matrix 1233 1234 Options Database Keys: 1235 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1236 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 1237 1238 Notes: 1239 If nnz is given then nz is ignored 1240 1241 Level: intermediate 1242 1243 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 1244 @*/ 1245 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1246 { 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1251 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1252 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 1253 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 1254 PetscFunctionReturn(0); 1255 } 1256 1257 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1258 { 1259 PetscErrorCode ierr; 1260 1261 PetscFunctionBegin; 1262 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1263 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 1264 PetscFunctionReturn(0); 1265 } 1266