1 2 /* 3 Defines basic operations for the MATSEQAIJMKL matrix class. 4 This class is derived from the MATSEQAIJ class and retains the 5 compressed row storage (aka Yale sparse matrix format) but uses 6 sparse BLAS operations from the Intel Math Kernel Library (MKL) 7 wherever possible. 8 */ 9 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12 #include <mkl_spblas.h> 13 14 typedef struct { 15 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 16 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 PetscObjectState state; 19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 28 { 29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 30 /* so we will ignore 'MatType type'. */ 31 PetscErrorCode ierr; 32 Mat B = *newmat; 33 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 34 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 35 #endif 36 37 PetscFunctionBegin; 38 if (reuse == MAT_INITIAL_MATRIX) { 39 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 45 B->ops->destroy = MatDestroy_SeqAIJ; 46 B->ops->mult = MatMult_SeqAIJ; 47 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 48 B->ops->multadd = MatMultAdd_SeqAIJ; 49 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ; 51 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 52 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 53 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 54 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; 55 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 56 57 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 58 59 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 60 if (!aijmkl->no_SpMV2) { 61 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 62 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 63 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 64 } 65 66 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 67 * simply involves destroying the MKL sparse matrix handle and then freeing 68 * the spptr pointer. */ 69 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 70 71 if (aijmkl->sparse_optimized) { 72 sparse_status_t stat; 73 stat = mkl_sparse_destroy(aijmkl->csrA); 74 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize()"); 75 } 76 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 77 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 78 79 /* Change the type of B to MATSEQAIJ. */ 80 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 81 82 *newmat = B; 83 PetscFunctionReturn(0); 84 } 85 86 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 87 { 88 PetscErrorCode ierr; 89 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 90 91 PetscFunctionBegin; 92 93 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */ 94 if (aijmkl) { 95 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 96 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 97 if (aijmkl->sparse_optimized) { 98 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 99 stat = mkl_sparse_destroy(aijmkl->csrA); 100 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()"); 101 } 102 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 103 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 104 } 105 106 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 107 * to destroy everything that remains. */ 108 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 109 /* Note that I don't call MatSetType(). I believe this is because that 110 * is only to be called when *building* a matrix. I could be wrong, but 111 * that is how things work for the SuperLU matrix class. */ 112 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 117 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 118 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 119 * handle, creates a new one, and then calls mkl_sparse_optimize(). 120 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 121 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 122 * an unoptimized matrix handle here. */ 123 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 124 { 125 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 126 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 127 * does nothing. We make it callable anyway in this case because it cuts 128 * down on littering the code with #ifdefs. */ 129 PetscFunctionBegin; 130 PetscFunctionReturn(0); 131 #else 132 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 133 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 134 PetscInt m,n; 135 MatScalar *aa; 136 PetscInt *aj,*ai; 137 sparse_status_t stat; 138 PetscErrorCode ierr; 139 140 PetscFunctionBegin; 141 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 142 /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 143 * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 144 * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 145 * mkl_sparse_optimize() later. */ 146 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 147 #endif 148 149 if (aijmkl->sparse_optimized) { 150 /* Matrix has been previously assembled and optimized. Must destroy old 151 * matrix handle before running the optimization step again. */ 152 stat = mkl_sparse_destroy(aijmkl->csrA); 153 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()"); 154 } 155 aijmkl->sparse_optimized = PETSC_FALSE; 156 157 /* Now perform the SpMV2 setup and matrix optimization. */ 158 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 159 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 160 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 161 m = A->rmap->n; 162 n = A->cmap->n; 163 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 164 aa = a->a; /* Nonzero elements stored row-by-row. */ 165 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 166 if (a->nz && aa && !A->structure_only) { 167 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 168 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 169 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 170 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle, mkl_sparse_x_create_csr()"); 171 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 172 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()"); 173 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 174 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()"); 175 if (!aijmkl->no_SpMV2) { 176 stat = mkl_sparse_optimize(aijmkl->csrA); 177 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize()"); 178 } 179 aijmkl->sparse_optimized = PETSC_TRUE; 180 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 181 } else { 182 aijmkl->csrA = PETSC_NULL; 183 } 184 185 PetscFunctionReturn(0); 186 #endif 187 } 188 189 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 190 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */ 191 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A) 192 { 193 PetscErrorCode ierr; 194 sparse_status_t stat; 195 sparse_index_base_t indexing; 196 PetscInt m,n; 197 PetscInt *aj,*ai,*dummy; 198 MatScalar *aa; 199 Mat_SeqAIJMKL *aijmkl; 200 201 if (csrA) { 202 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 203 stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa); 204 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 205 if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()"); 206 } else { 207 aj = ai = PETSC_NULL; 208 aa = PETSC_NULL; 209 } 210 211 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 212 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 213 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 214 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 215 * they will be destroyed when the MKL handle is destroyed. 216 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 217 if (csrA) { 218 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);CHKERRQ(ierr); 219 } else { 220 /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */ 221 ierr = MatSetUp(A);CHKERRQ(ierr); 222 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 223 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 224 } 225 226 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 227 * Now turn it into a MATSEQAIJMKL. */ 228 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 229 230 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 231 aijmkl->csrA = csrA; 232 233 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 234 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 235 * and just need to be able to run the MKL optimization step. */ 236 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 237 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 238 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 239 if (csrA) { 240 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 241 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()"); 242 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 243 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()"); 244 } 245 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 246 PetscFunctionReturn(0); 247 } 248 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 249 250 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 251 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 252 * MatMatMultNumeric(). */ 253 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 254 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 255 { 256 PetscInt i; 257 PetscInt nrows,ncols; 258 PetscInt nz; 259 PetscInt *ai,*aj,*dummy; 260 PetscScalar *aa; 261 PetscErrorCode ierr; 262 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 263 sparse_status_t stat; 264 sparse_index_base_t indexing; 265 266 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 267 if (!aijmkl->csrA) PetscFunctionReturn(0); 268 269 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 270 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 271 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 272 273 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 274 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 275 for (i=0; i<nrows; i++) { 276 nz = ai[i+1] - ai[i]; 277 ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr); 278 } 279 280 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 281 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 282 283 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 284 /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation. 285 * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed 286 * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */ 287 aijmkl->sparse_optimized = PETSC_FALSE; 288 PetscFunctionReturn(0); 289 } 290 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 291 292 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 293 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer) 294 { 295 PetscInt i,j,k; 296 PetscInt nrows,ncols; 297 PetscInt nz; 298 PetscInt *ai,*aj,*dummy; 299 PetscScalar *aa; 300 PetscErrorCode ierr; 301 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 302 sparse_status_t stat; 303 sparse_index_base_t indexing; 304 305 ierr = PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");CHKERRQ(ierr); 306 307 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 308 if (!aijmkl->csrA) { 309 ierr = PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");CHKERRQ(ierr); 310 PetscFunctionReturn(0); 311 } 312 313 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 314 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 315 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 316 317 k = 0; 318 for (i=0; i<nrows; i++) { 319 ierr = PetscViewerASCIIPrintf(viewer,"row %D: ",i);CHKERRQ(ierr); 320 nz = ai[i+1] - ai[i]; 321 for (j=0; j<nz; j++) { 322 if (aa) { 323 ierr = PetscViewerASCIIPrintf(viewer,"(%D, %g) ",aj[k],PetscRealPart(aa[k]));CHKERRQ(ierr); 324 } else { 325 ierr = PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);CHKERRQ(ierr); 326 } 327 k++; 328 } 329 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 330 } 331 PetscFunctionReturn(0); 332 } 333 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 334 335 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 336 { 337 PetscErrorCode ierr; 338 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 339 Mat_SeqAIJMKL *aijmkl_dest; 340 341 PetscFunctionBegin; 342 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 343 aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr; 344 ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr); 345 aijmkl_dest->sparse_optimized = PETSC_FALSE; 346 if (aijmkl->eager_inspection) { 347 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 348 } 349 PetscFunctionReturn(0); 350 } 351 352 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 353 { 354 PetscErrorCode ierr; 355 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 356 Mat_SeqAIJMKL *aijmkl; 357 358 PetscFunctionBegin; 359 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 360 361 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 362 * extra information and some different methods, call the AssemblyEnd 363 * routine for a MATSEQAIJ. 364 * I'm not sure if this is the best way to do this, but it avoids 365 * a lot of code duplication. */ 366 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 367 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 368 369 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 370 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 371 aijmkl = (Mat_SeqAIJMKL*)A->spptr; 372 if (aijmkl->eager_inspection) { 373 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 374 } 375 376 PetscFunctionReturn(0); 377 } 378 379 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 380 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 381 { 382 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 383 const PetscScalar *x; 384 PetscScalar *y; 385 const MatScalar *aa; 386 PetscErrorCode ierr; 387 PetscInt m = A->rmap->n; 388 PetscInt n = A->cmap->n; 389 PetscScalar alpha = 1.0; 390 PetscScalar beta = 0.0; 391 const PetscInt *aj,*ai; 392 char matdescra[6]; 393 394 395 /* Variables not in MatMult_SeqAIJ. */ 396 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 397 398 PetscFunctionBegin; 399 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 400 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 401 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 402 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 403 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 404 aa = a->a; /* Nonzero elements stored row-by-row. */ 405 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 406 407 /* Call MKL sparse BLAS routine to do the MatMult. */ 408 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 409 410 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 411 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 412 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 413 PetscFunctionReturn(0); 414 } 415 #endif 416 417 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 418 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 419 { 420 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 421 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 422 const PetscScalar *x; 423 PetscScalar *y; 424 PetscErrorCode ierr; 425 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 426 PetscObjectState state; 427 428 PetscFunctionBegin; 429 430 /* If there are no nonzero entries, zero yy and return immediately. */ 431 if (!a->nz) { 432 PetscInt i; 433 PetscInt m=A->rmap->n; 434 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 435 for (i=0; i<m; i++) { 436 y[i] = 0.0; 437 } 438 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 439 PetscFunctionReturn(0); 440 } 441 442 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 443 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 444 445 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 446 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 447 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 448 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 449 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 450 MatSeqAIJMKL_create_mkl_handle(A); 451 } 452 453 /* Call MKL SpMV2 executor routine to do the MatMult. */ 454 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 455 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 456 457 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 458 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 459 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 460 PetscFunctionReturn(0); 461 } 462 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 463 464 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 465 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 466 { 467 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 468 const PetscScalar *x; 469 PetscScalar *y; 470 const MatScalar *aa; 471 PetscErrorCode ierr; 472 PetscInt m = A->rmap->n; 473 PetscInt n = A->cmap->n; 474 PetscScalar alpha = 1.0; 475 PetscScalar beta = 0.0; 476 const PetscInt *aj,*ai; 477 char matdescra[6]; 478 479 /* Variables not in MatMultTranspose_SeqAIJ. */ 480 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 481 482 PetscFunctionBegin; 483 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 484 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 485 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 486 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 487 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 488 aa = a->a; /* Nonzero elements stored row-by-row. */ 489 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 490 491 /* Call MKL sparse BLAS routine to do the MatMult. */ 492 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 493 494 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 495 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 496 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 #endif 500 501 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 502 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 503 { 504 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 505 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 506 const PetscScalar *x; 507 PetscScalar *y; 508 PetscErrorCode ierr; 509 sparse_status_t stat; 510 PetscObjectState state; 511 512 PetscFunctionBegin; 513 514 /* If there are no nonzero entries, zero yy and return immediately. */ 515 if (!a->nz) { 516 PetscInt i; 517 PetscInt n=A->cmap->n; 518 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 519 for (i=0; i<n; i++) { 520 y[i] = 0.0; 521 } 522 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 527 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 528 529 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 530 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 531 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 532 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 533 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 534 MatSeqAIJMKL_create_mkl_handle(A); 535 } 536 537 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 538 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 539 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 540 541 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 542 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 543 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 544 PetscFunctionReturn(0); 545 } 546 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 547 548 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 549 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 550 { 551 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 552 const PetscScalar *x; 553 PetscScalar *y,*z; 554 const MatScalar *aa; 555 PetscErrorCode ierr; 556 PetscInt m = A->rmap->n; 557 PetscInt n = A->cmap->n; 558 const PetscInt *aj,*ai; 559 PetscInt i; 560 561 /* Variables not in MatMultAdd_SeqAIJ. */ 562 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 563 PetscScalar alpha = 1.0; 564 PetscScalar beta; 565 char matdescra[6]; 566 567 PetscFunctionBegin; 568 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 569 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 570 571 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 572 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 573 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 574 aa = a->a; /* Nonzero elements stored row-by-row. */ 575 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 576 577 /* Call MKL sparse BLAS routine to do the MatMult. */ 578 if (zz == yy) { 579 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 580 beta = 1.0; 581 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 582 } else { 583 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 584 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 585 beta = 0.0; 586 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 587 for (i=0; i<m; i++) { 588 z[i] += y[i]; 589 } 590 } 591 592 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 593 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 594 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 #endif 598 599 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 600 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 601 { 602 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 603 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 604 const PetscScalar *x; 605 PetscScalar *y,*z; 606 PetscErrorCode ierr; 607 PetscInt m = A->rmap->n; 608 PetscInt i; 609 610 /* Variables not in MatMultAdd_SeqAIJ. */ 611 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 612 PetscObjectState state; 613 614 PetscFunctionBegin; 615 616 /* If there are no nonzero entries, set zz = yy and return immediately. */ 617 if (!a->nz) { 618 PetscInt i; 619 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 620 for (i=0; i<m; i++) { 621 z[i] = y[i]; 622 } 623 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 624 PetscFunctionReturn(0); 625 } 626 627 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 628 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 629 630 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 631 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 632 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 633 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 634 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 635 MatSeqAIJMKL_create_mkl_handle(A); 636 } 637 638 /* Call MKL sparse BLAS routine to do the MatMult. */ 639 if (zz == yy) { 640 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 641 * with alpha and beta both set to 1.0. */ 642 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 643 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 644 } else { 645 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 646 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 647 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 648 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 649 for (i=0; i<m; i++) { 650 z[i] += y[i]; 651 } 652 } 653 654 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 655 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 656 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 657 PetscFunctionReturn(0); 658 } 659 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 660 661 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 662 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 663 { 664 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 665 const PetscScalar *x; 666 PetscScalar *y,*z; 667 const MatScalar *aa; 668 PetscErrorCode ierr; 669 PetscInt m = A->rmap->n; 670 PetscInt n = A->cmap->n; 671 const PetscInt *aj,*ai; 672 PetscInt i; 673 674 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 675 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 676 PetscScalar alpha = 1.0; 677 PetscScalar beta; 678 char matdescra[6]; 679 680 PetscFunctionBegin; 681 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 682 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 683 684 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 685 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 686 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 687 aa = a->a; /* Nonzero elements stored row-by-row. */ 688 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 689 690 /* Call MKL sparse BLAS routine to do the MatMult. */ 691 if (zz == yy) { 692 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 693 beta = 1.0; 694 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 695 } else { 696 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 697 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 698 beta = 0.0; 699 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 700 for (i=0; i<n; i++) { 701 z[i] += y[i]; 702 } 703 } 704 705 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 706 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 707 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 708 PetscFunctionReturn(0); 709 } 710 #endif 711 712 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 713 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 714 { 715 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 716 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 717 const PetscScalar *x; 718 PetscScalar *y,*z; 719 PetscErrorCode ierr; 720 PetscInt n = A->cmap->n; 721 PetscInt i; 722 PetscObjectState state; 723 724 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 725 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 726 727 PetscFunctionBegin; 728 729 /* If there are no nonzero entries, set zz = yy and return immediately. */ 730 if (!a->nz) { 731 PetscInt i; 732 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 733 for (i=0; i<n; i++) { 734 z[i] = y[i]; 735 } 736 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 741 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 742 743 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 744 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 745 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 746 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 747 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 748 MatSeqAIJMKL_create_mkl_handle(A); 749 } 750 751 /* Call MKL sparse BLAS routine to do the MatMult. */ 752 if (zz == yy) { 753 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 754 * with alpha and beta both set to 1.0. */ 755 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 756 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 757 } else { 758 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 759 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 760 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 761 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 762 for (i=0; i<n; i++) { 763 z[i] += y[i]; 764 } 765 } 766 767 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 768 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 769 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 770 PetscFunctionReturn(0); 771 } 772 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 773 774 /* -------------------------- MatProduct code -------------------------- */ 775 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 776 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 777 { 778 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr; 779 sparse_matrix_t csrA,csrB,csrC; 780 PetscInt nrows,ncols; 781 PetscErrorCode ierr; 782 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 783 struct matrix_descr descr_type_gen; 784 PetscObjectState state; 785 786 PetscFunctionBegin; 787 /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does 788 * not handle sparse matrices with zero rows or columns. */ 789 if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N; 790 else nrows = A->cmap->N; 791 if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N; 792 else ncols = B->rmap->N; 793 794 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 795 if (!a->sparse_optimized || a->state != state) { 796 MatSeqAIJMKL_create_mkl_handle(A); 797 } 798 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 799 if (!b->sparse_optimized || b->state != state) { 800 MatSeqAIJMKL_create_mkl_handle(B); 801 } 802 csrA = a->csrA; 803 csrB = b->csrA; 804 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 805 806 if (csrA && csrB) { 807 stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC); 808 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()"); 809 } else { 810 csrC = PETSC_NULL; 811 } 812 813 ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);CHKERRQ(ierr); 814 815 PetscFunctionReturn(0); 816 } 817 818 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 819 { 820 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 821 sparse_matrix_t csrA, csrB, csrC; 822 PetscErrorCode ierr; 823 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 824 struct matrix_descr descr_type_gen; 825 PetscObjectState state; 826 827 PetscFunctionBegin; 828 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 829 if (!a->sparse_optimized || a->state != state) { 830 MatSeqAIJMKL_create_mkl_handle(A); 831 } 832 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 833 if (!b->sparse_optimized || b->state != state) { 834 MatSeqAIJMKL_create_mkl_handle(B); 835 } 836 csrA = a->csrA; 837 csrB = b->csrA; 838 csrC = c->csrA; 839 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 840 841 if (csrA && csrB) { 842 stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC); 843 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()"); 844 } else { 845 csrC = PETSC_NULL; 846 } 847 848 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 849 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 850 851 PetscFunctionReturn(0); 852 } 853 854 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 855 { 856 PetscErrorCode ierr; 857 858 PetscFunctionBegin; 859 ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 860 PetscFunctionReturn(0); 861 } 862 863 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 864 { 865 PetscErrorCode ierr; 866 867 PetscFunctionBegin; 868 ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 869 PetscFunctionReturn(0); 870 } 871 872 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 873 { 874 PetscErrorCode ierr; 875 876 PetscFunctionBegin; 877 ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 878 PetscFunctionReturn(0); 879 } 880 881 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 882 { 883 PetscErrorCode ierr; 884 885 PetscFunctionBegin; 886 ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 887 PetscFunctionReturn(0); 888 } 889 890 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 891 { 892 PetscErrorCode ierr; 893 894 PetscFunctionBegin; 895 ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr); 896 PetscFunctionReturn(0); 897 } 898 899 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 900 { 901 PetscErrorCode ierr; 902 903 PetscFunctionBegin; 904 ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr); 905 PetscFunctionReturn(0); 906 } 907 908 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 909 { 910 PetscErrorCode ierr; 911 Mat_Product *product = C->product; 912 Mat A = product->A,B = product->B; 913 914 PetscFunctionBegin; 915 ierr = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);CHKERRQ(ierr); 916 PetscFunctionReturn(0); 917 } 918 919 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 920 { 921 PetscErrorCode ierr; 922 Mat_Product *product = C->product; 923 Mat A = product->A,B = product->B; 924 PetscReal fill = product->fill; 925 926 PetscFunctionBegin; 927 ierr = MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);CHKERRQ(ierr); 928 C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL; 929 PetscFunctionReturn(0); 930 } 931 932 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C) 933 { 934 Mat Ct; 935 Vec zeros; 936 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 937 sparse_matrix_t csrA, csrP, csrC; 938 PetscBool set, flag; 939 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 940 struct matrix_descr descr_type_sym; 941 PetscObjectState state; 942 PetscErrorCode ierr; 943 944 PetscFunctionBegin; 945 ierr = MatIsSymmetricKnown(A,&set,&flag); 946 if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric"); 947 948 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 949 if (!a->sparse_optimized || a->state != state) { 950 MatSeqAIJMKL_create_mkl_handle(A); 951 } 952 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 953 if (!p->sparse_optimized || p->state != state) { 954 MatSeqAIJMKL_create_mkl_handle(P); 955 } 956 csrA = a->csrA; 957 csrP = p->csrA; 958 csrC = c->csrA; 959 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 960 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 961 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 962 963 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 964 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT); 965 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr()"); 966 967 /* Update the PETSc AIJ representation for matrix C from contents of MKL handle. 968 * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix, 969 * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix. 970 * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY 971 * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might 972 * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output 973 * the full matrix. */ 974 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 975 ierr = MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);CHKERRQ(ierr); 976 ierr = MatCreateVecs(C,&zeros,NULL);CHKERRQ(ierr); 977 ierr = VecSetFromOptions(zeros);CHKERRQ(ierr); 978 ierr = VecZeroEntries(zeros);CHKERRQ(ierr); 979 ierr = MatDiagonalSet(Ct,zeros,INSERT_VALUES);CHKERRQ(ierr); 980 ierr = MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 981 /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */ 982 ierr = MatProductCreateWithMat(A,P,PETSC_NULL,C);CHKERRQ(ierr); 983 ierr = MatProductSetType(C,MATPRODUCT_PtAP);CHKERRQ(ierr); 984 ierr = MatSeqAIJMKL_create_mkl_handle(C);CHKERRQ(ierr); 985 ierr = VecDestroy(&zeros);CHKERRQ(ierr); 986 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 987 PetscFunctionReturn(0); 988 } 989 990 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) 991 { 992 Mat_Product *product = C->product; 993 Mat A = product->A,P = product->B; 994 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr; 995 sparse_matrix_t csrA,csrP,csrC; 996 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 997 struct matrix_descr descr_type_sym; 998 PetscObjectState state; 999 PetscErrorCode ierr; 1000 1001 PetscFunctionBegin; 1002 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 1003 if (!a->sparse_optimized || a->state != state) { 1004 MatSeqAIJMKL_create_mkl_handle(A); 1005 } 1006 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 1007 if (!p->sparse_optimized || p->state != state) { 1008 MatSeqAIJMKL_create_mkl_handle(P); 1009 } 1010 csrA = a->csrA; 1011 csrP = p->csrA; 1012 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 1013 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 1014 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 1015 1016 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 1017 if (csrP && csrA) { 1018 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL); 1019 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr()"); 1020 } else { 1021 csrC = PETSC_NULL; 1022 } 1023 1024 /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle. 1025 * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain 1026 * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that 1027 * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this 1028 * is guaranteed. */ 1029 ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);CHKERRQ(ierr); 1030 1031 C->ops->productnumeric = MatProductNumeric_PtAP; 1032 PetscFunctionReturn(0); 1033 } 1034 1035 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) 1036 { 1037 PetscFunctionBegin; 1038 C->ops->productsymbolic = MatProductSymbolic_AB; 1039 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1040 PetscFunctionReturn(0); 1041 } 1042 1043 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) 1044 { 1045 PetscFunctionBegin; 1046 C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL; 1047 PetscFunctionReturn(0); 1048 } 1049 1050 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) 1051 { 1052 PetscFunctionBegin; 1053 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 1054 C->ops->productsymbolic = MatProductSymbolic_ABt; 1055 PetscFunctionReturn(0); 1056 } 1057 1058 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) 1059 { 1060 PetscErrorCode ierr; 1061 Mat_Product *product = C->product; 1062 Mat A = product->A; 1063 PetscBool set, flag; 1064 1065 PetscFunctionBegin; 1066 #if defined(PETSC_USE_COMPLEX) 1067 /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Basic() will be used. 1068 * We do this in several other locations in this file. This works for the time being, but the _Basic() 1069 * routines are considered unsafe and may be removed from the MatProduct code in the future. 1070 * TODO: Add proper MATSEQAIJMKL implementations, instead of relying on the _Basic() routines. */ 1071 C->ops->productsymbolic = NULL; 1072 #else 1073 /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */ 1074 ierr = MatIsSymmetricKnown(A,&set,&flag);CHKERRQ(ierr); 1075 if (set && flag) { 1076 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1077 PetscFunctionReturn(0); 1078 } else { 1079 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */ 1080 } 1081 /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(), 1082 * depending on whether the algorithm for the general case vs. the real symmetric one is used. */ 1083 #endif 1084 PetscFunctionReturn(0); 1085 } 1086 1087 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) 1088 { 1089 PetscFunctionBegin; 1090 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */ 1091 PetscFunctionReturn(0); 1092 } 1093 1094 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) 1095 { 1096 PetscFunctionBegin; 1097 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */ 1098 PetscFunctionReturn(0); 1099 } 1100 1101 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) 1102 { 1103 PetscErrorCode ierr; 1104 Mat_Product *product = C->product; 1105 1106 PetscFunctionBegin; 1107 switch (product->type) { 1108 case MATPRODUCT_AB: 1109 ierr = MatProductSetFromOptions_SeqAIJMKL_AB(C);CHKERRQ(ierr); 1110 break; 1111 case MATPRODUCT_AtB: 1112 ierr = MatProductSetFromOptions_SeqAIJMKL_AtB(C);CHKERRQ(ierr); 1113 break; 1114 case MATPRODUCT_ABt: 1115 ierr = MatProductSetFromOptions_SeqAIJMKL_ABt(C);CHKERRQ(ierr); 1116 break; 1117 case MATPRODUCT_PtAP: 1118 ierr = MatProductSetFromOptions_SeqAIJMKL_PtAP(C);CHKERRQ(ierr); 1119 break; 1120 case MATPRODUCT_RARt: 1121 ierr = MatProductSetFromOptions_SeqAIJMKL_RARt(C);CHKERRQ(ierr); 1122 break; 1123 case MATPRODUCT_ABC: 1124 ierr = MatProductSetFromOptions_SeqAIJMKL_ABC(C);CHKERRQ(ierr); 1125 break; 1126 default: 1127 break; 1128 } 1129 PetscFunctionReturn(0); 1130 } 1131 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 1132 /* ------------------------ End MatProduct code ------------------------ */ 1133 1134 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 1135 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 1136 * routine, but can also be used to convert an assembled SeqAIJ matrix 1137 * into a SeqAIJMKL one. */ 1138 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 1139 { 1140 PetscErrorCode ierr; 1141 Mat B = *newmat; 1142 Mat_SeqAIJMKL *aijmkl; 1143 PetscBool set; 1144 PetscBool sametype; 1145 1146 PetscFunctionBegin; 1147 if (reuse == MAT_INITIAL_MATRIX) { 1148 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1149 } 1150 1151 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 1152 if (sametype) PetscFunctionReturn(0); 1153 1154 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 1155 B->spptr = (void*) aijmkl; 1156 1157 /* Set function pointers for methods that we inherit from AIJ but override. 1158 * We also parse some command line options below, since those determine some of the methods we point to. */ 1159 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 1160 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 1161 B->ops->destroy = MatDestroy_SeqAIJMKL; 1162 1163 aijmkl->sparse_optimized = PETSC_FALSE; 1164 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1165 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 1166 #else 1167 aijmkl->no_SpMV2 = PETSC_TRUE; 1168 #endif 1169 aijmkl->eager_inspection = PETSC_FALSE; 1170 1171 /* Parse command line options. */ 1172 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 1173 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 1174 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 1175 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1176 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1177 if (!aijmkl->no_SpMV2) { 1178 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 1179 aijmkl->no_SpMV2 = PETSC_TRUE; 1180 } 1181 #endif 1182 1183 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1184 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 1185 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 1186 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1187 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 1188 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1189 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL; 1190 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1191 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1192 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL; 1193 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1194 # if !defined(PETSC_USE_COMPLEX) 1195 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1196 # else 1197 B->ops->ptapnumeric = NULL; 1198 # endif 1199 # endif 1200 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1201 1202 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1203 /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1204 * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1205 * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1206 * versions in which the older interface has not been deprecated, we use the old interface. */ 1207 if (aijmkl->no_SpMV2) { 1208 B->ops->mult = MatMult_SeqAIJMKL; 1209 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 1210 B->ops->multadd = MatMultAdd_SeqAIJMKL; 1211 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1212 } 1213 #endif 1214 1215 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 1216 1217 if (!aijmkl->no_SpMV2) { 1218 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1219 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1220 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",MatProductSetFromOptions_SeqAIJMKL);CHKERRQ(ierr); 1221 #endif 1222 #endif 1223 } 1224 1225 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 1226 *newmat = B; 1227 PetscFunctionReturn(0); 1228 } 1229 1230 /*@C 1231 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 1232 This type inherits from AIJ and is largely identical, but uses sparse BLAS 1233 routines from Intel MKL whenever possible. 1234 If the installed version of MKL supports the "SpMV2" sparse 1235 inspector-executor routines, then those are used by default. 1236 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 1237 symmetric A) operations are currently supported. 1238 Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 1239 1240 Collective 1241 1242 Input Parameters: 1243 + comm - MPI communicator, set to PETSC_COMM_SELF 1244 . m - number of rows 1245 . n - number of columns 1246 . nz - number of nonzeros per row (same for all rows) 1247 - nnz - array containing the number of nonzeros in the various rows 1248 (possibly different for each row) or NULL 1249 1250 Output Parameter: 1251 . A - the matrix 1252 1253 Options Database Keys: 1254 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1255 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 1256 1257 Notes: 1258 If nnz is given then nz is ignored 1259 1260 Level: intermediate 1261 1262 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 1263 @*/ 1264 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1265 { 1266 PetscErrorCode ierr; 1267 1268 PetscFunctionBegin; 1269 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1270 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1271 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 1272 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 1273 PetscFunctionReturn(0); 1274 } 1275 1276 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1277 { 1278 PetscErrorCode ierr; 1279 1280 PetscFunctionBegin; 1281 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1282 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 1283 PetscFunctionReturn(0); 1284 } 1285