1 2 /* 3 Defines basic operations for the MATSEQAIJMKL matrix class. 4 This class is derived from the MATSEQAIJ class and retains the 5 compressed row storage (aka Yale sparse matrix format) but uses 6 sparse BLAS operations from the Intel Math Kernel Library (MKL) 7 wherever possible. 8 */ 9 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12 #include <mkl_spblas.h> 13 14 typedef struct { 15 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 16 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 PetscObjectState state; 19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 28 { 29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 30 /* so we will ignore 'MatType type'. */ 31 PetscErrorCode ierr; 32 Mat B = *newmat; 33 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 34 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 35 #endif 36 37 PetscFunctionBegin; 38 if (reuse == MAT_INITIAL_MATRIX) { 39 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 45 B->ops->destroy = MatDestroy_SeqAIJ; 46 B->ops->mult = MatMult_SeqAIJ; 47 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 48 B->ops->multadd = MatMultAdd_SeqAIJ; 49 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ; 51 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 52 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 53 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 54 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; 55 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 56 57 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 58 59 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 60 if (!aijmkl->no_SpMV2) { 61 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 62 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 63 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 64 } 65 66 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 67 * simply involves destroying the MKL sparse matrix handle and then freeing 68 * the spptr pointer. */ 69 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 70 71 if (aijmkl->sparse_optimized) { 72 sparse_status_t stat; 73 stat = mkl_sparse_destroy(aijmkl->csrA); 74 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize()"); 75 } 76 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 77 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 78 79 /* Change the type of B to MATSEQAIJ. */ 80 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 81 82 *newmat = B; 83 PetscFunctionReturn(0); 84 } 85 86 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 87 { 88 PetscErrorCode ierr; 89 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 90 91 PetscFunctionBegin; 92 93 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */ 94 if (aijmkl) { 95 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 96 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 97 if (aijmkl->sparse_optimized) { 98 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 99 stat = mkl_sparse_destroy(aijmkl->csrA); 100 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()"); 101 } 102 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 103 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 104 } 105 106 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 107 * to destroy everything that remains. */ 108 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 109 /* Note that I don't call MatSetType(). I believe this is because that 110 * is only to be called when *building* a matrix. I could be wrong, but 111 * that is how things work for the SuperLU matrix class. */ 112 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 117 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 118 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 119 * handle, creates a new one, and then calls mkl_sparse_optimize(). 120 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 121 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 122 * an unoptimized matrix handle here. */ 123 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 124 { 125 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 126 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 127 * does nothing. We make it callable anyway in this case because it cuts 128 * down on littering the code with #ifdefs. */ 129 PetscFunctionBegin; 130 PetscFunctionReturn(0); 131 #else 132 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 133 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 134 PetscInt m,n; 135 MatScalar *aa; 136 PetscInt *aj,*ai; 137 sparse_status_t stat; 138 PetscErrorCode ierr; 139 140 PetscFunctionBegin; 141 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 142 /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 143 * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 144 * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 145 * mkl_sparse_optimize() later. */ 146 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 147 #endif 148 149 if (aijmkl->sparse_optimized) { 150 /* Matrix has been previously assembled and optimized. Must destroy old 151 * matrix handle before running the optimization step again. */ 152 stat = mkl_sparse_destroy(aijmkl->csrA); 153 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()"); 154 } 155 aijmkl->sparse_optimized = PETSC_FALSE; 156 157 /* Now perform the SpMV2 setup and matrix optimization. */ 158 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 159 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 160 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 161 m = A->rmap->n; 162 n = A->cmap->n; 163 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 164 aa = a->a; /* Nonzero elements stored row-by-row. */ 165 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 166 if (a->nz && aa && !A->structure_only) { 167 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 168 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 169 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 170 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle, mkl_sparse_x_create_csr()"); 171 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 172 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()"); 173 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 174 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()"); 175 if (!aijmkl->no_SpMV2) { 176 stat = mkl_sparse_optimize(aijmkl->csrA); 177 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize()"); 178 } 179 aijmkl->sparse_optimized = PETSC_TRUE; 180 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 181 } else { 182 aijmkl->csrA = PETSC_NULL; 183 } 184 185 PetscFunctionReturn(0); 186 #endif 187 } 188 189 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 190 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */ 191 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A) 192 { 193 PetscErrorCode ierr; 194 sparse_status_t stat; 195 sparse_index_base_t indexing; 196 PetscInt m,n; 197 PetscInt *aj,*ai,*dummy; 198 MatScalar *aa; 199 Mat_SeqAIJMKL *aijmkl; 200 201 if (csrA) { 202 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 203 stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa); 204 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 205 if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()"); 206 } else { 207 aj = ai = PETSC_NULL; 208 aa = PETSC_NULL; 209 } 210 211 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 212 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 213 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 214 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 215 * they will be destroyed when the MKL handle is destroyed. 216 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 217 if (csrA) { 218 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);CHKERRQ(ierr); 219 } else { 220 /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */ 221 ierr = MatSetUp(A);CHKERRQ(ierr); 222 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 223 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 224 } 225 226 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 227 * Now turn it into a MATSEQAIJMKL. */ 228 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 229 230 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 231 aijmkl->csrA = csrA; 232 233 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 234 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 235 * and just need to be able to run the MKL optimization step. */ 236 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 237 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 238 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 239 if (csrA) { 240 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 241 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()"); 242 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 243 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()"); 244 } 245 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 246 PetscFunctionReturn(0); 247 } 248 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 249 250 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 251 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 252 * MatMatMultNumeric(). */ 253 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 254 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 255 { 256 PetscInt i; 257 PetscInt nrows,ncols; 258 PetscInt nz; 259 PetscInt *ai,*aj,*dummy; 260 PetscScalar *aa; 261 PetscErrorCode ierr; 262 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 263 sparse_status_t stat; 264 sparse_index_base_t indexing; 265 266 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 267 if (!aijmkl->csrA) PetscFunctionReturn(0); 268 269 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 270 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 271 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 272 273 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 274 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 275 for (i=0; i<nrows; i++) { 276 nz = ai[i+1] - ai[i]; 277 ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr); 278 } 279 280 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 281 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 282 283 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 284 /* We mark our matrix as having a valid, optimized MKL handle. 285 * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */ 286 aijmkl->sparse_optimized = PETSC_TRUE; 287 PetscFunctionReturn(0); 288 } 289 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 290 291 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 292 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer) 293 { 294 PetscInt i,j,k; 295 PetscInt nrows,ncols; 296 PetscInt nz; 297 PetscInt *ai,*aj,*dummy; 298 PetscScalar *aa; 299 PetscErrorCode ierr; 300 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 301 sparse_status_t stat; 302 sparse_index_base_t indexing; 303 304 ierr = PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");CHKERRQ(ierr); 305 306 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 307 if (!aijmkl->csrA) { 308 ierr = PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");CHKERRQ(ierr); 309 PetscFunctionReturn(0); 310 } 311 312 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 313 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 314 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 315 316 k = 0; 317 for (i=0; i<nrows; i++) { 318 ierr = PetscViewerASCIIPrintf(viewer,"row %D: ",i);CHKERRQ(ierr); 319 nz = ai[i+1] - ai[i]; 320 for (j=0; j<nz; j++) { 321 if (aa) { 322 ierr = PetscViewerASCIIPrintf(viewer,"(%D, %g) ",aj[k],aa[k]);CHKERRQ(ierr); 323 } else { 324 ierr = PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);CHKERRQ(ierr); 325 } 326 k++; 327 } 328 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 329 } 330 PetscFunctionReturn(0); 331 } 332 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 333 334 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 335 { 336 PetscErrorCode ierr; 337 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 338 Mat_SeqAIJMKL *aijmkl_dest; 339 340 PetscFunctionBegin; 341 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 342 aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr; 343 ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr); 344 aijmkl_dest->sparse_optimized = PETSC_FALSE; 345 if (aijmkl->eager_inspection) { 346 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 347 } 348 PetscFunctionReturn(0); 349 } 350 351 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 352 { 353 PetscErrorCode ierr; 354 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 355 Mat_SeqAIJMKL *aijmkl; 356 357 PetscFunctionBegin; 358 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 359 360 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 361 * extra information and some different methods, call the AssemblyEnd 362 * routine for a MATSEQAIJ. 363 * I'm not sure if this is the best way to do this, but it avoids 364 * a lot of code duplication. */ 365 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 366 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 367 368 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 369 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 370 aijmkl = (Mat_SeqAIJMKL*)A->spptr; 371 if (aijmkl->eager_inspection) { 372 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 373 } 374 375 PetscFunctionReturn(0); 376 } 377 378 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 379 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 380 { 381 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 382 const PetscScalar *x; 383 PetscScalar *y; 384 const MatScalar *aa; 385 PetscErrorCode ierr; 386 PetscInt m = A->rmap->n; 387 PetscInt n = A->cmap->n; 388 PetscScalar alpha = 1.0; 389 PetscScalar beta = 0.0; 390 const PetscInt *aj,*ai; 391 char matdescra[6]; 392 393 394 /* Variables not in MatMult_SeqAIJ. */ 395 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 396 397 PetscFunctionBegin; 398 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 399 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 400 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 401 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 402 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 403 aa = a->a; /* Nonzero elements stored row-by-row. */ 404 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 405 406 /* Call MKL sparse BLAS routine to do the MatMult. */ 407 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 408 409 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 410 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 411 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 412 PetscFunctionReturn(0); 413 } 414 #endif 415 416 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 417 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 418 { 419 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 420 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 421 const PetscScalar *x; 422 PetscScalar *y; 423 PetscErrorCode ierr; 424 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 425 PetscObjectState state; 426 427 PetscFunctionBegin; 428 429 /* If there are no nonzero entries, zero yy and return immediately. */ 430 if(!a->nz) { 431 PetscInt i; 432 PetscInt m=A->rmap->n; 433 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 434 for (i=0; i<m; i++) { 435 y[i] = 0.0; 436 } 437 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 438 PetscFunctionReturn(0); 439 } 440 441 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 442 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 443 444 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 445 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 446 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 447 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 448 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 449 MatSeqAIJMKL_create_mkl_handle(A); 450 } 451 452 /* Call MKL SpMV2 executor routine to do the MatMult. */ 453 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 454 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 455 456 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 457 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 458 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 459 PetscFunctionReturn(0); 460 } 461 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 462 463 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 464 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 465 { 466 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 467 const PetscScalar *x; 468 PetscScalar *y; 469 const MatScalar *aa; 470 PetscErrorCode ierr; 471 PetscInt m = A->rmap->n; 472 PetscInt n = A->cmap->n; 473 PetscScalar alpha = 1.0; 474 PetscScalar beta = 0.0; 475 const PetscInt *aj,*ai; 476 char matdescra[6]; 477 478 /* Variables not in MatMultTranspose_SeqAIJ. */ 479 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 480 481 PetscFunctionBegin; 482 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 483 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 484 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 485 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 486 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 487 aa = a->a; /* Nonzero elements stored row-by-row. */ 488 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 489 490 /* Call MKL sparse BLAS routine to do the MatMult. */ 491 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 492 493 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 494 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 495 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 496 PetscFunctionReturn(0); 497 } 498 #endif 499 500 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 501 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 502 { 503 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 504 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 505 const PetscScalar *x; 506 PetscScalar *y; 507 PetscErrorCode ierr; 508 sparse_status_t stat; 509 PetscObjectState state; 510 511 PetscFunctionBegin; 512 513 /* If there are no nonzero entries, zero yy and return immediately. */ 514 if(!a->nz) { 515 PetscInt i; 516 PetscInt n=A->cmap->n; 517 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 518 for (i=0; i<n; i++) { 519 y[i] = 0.0; 520 } 521 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 522 PetscFunctionReturn(0); 523 } 524 525 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 526 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 527 528 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 529 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 530 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 531 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 532 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 533 MatSeqAIJMKL_create_mkl_handle(A); 534 } 535 536 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 537 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 538 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 539 540 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 541 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 542 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 543 PetscFunctionReturn(0); 544 } 545 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 546 547 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 548 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 549 { 550 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 551 const PetscScalar *x; 552 PetscScalar *y,*z; 553 const MatScalar *aa; 554 PetscErrorCode ierr; 555 PetscInt m = A->rmap->n; 556 PetscInt n = A->cmap->n; 557 const PetscInt *aj,*ai; 558 PetscInt i; 559 560 /* Variables not in MatMultAdd_SeqAIJ. */ 561 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 562 PetscScalar alpha = 1.0; 563 PetscScalar beta; 564 char matdescra[6]; 565 566 PetscFunctionBegin; 567 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 568 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 569 570 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 571 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 572 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 573 aa = a->a; /* Nonzero elements stored row-by-row. */ 574 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 575 576 /* Call MKL sparse BLAS routine to do the MatMult. */ 577 if (zz == yy) { 578 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 579 beta = 1.0; 580 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 581 } else { 582 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 583 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 584 beta = 0.0; 585 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 586 for (i=0; i<m; i++) { 587 z[i] += y[i]; 588 } 589 } 590 591 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 592 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 593 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 #endif 597 598 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 599 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 600 { 601 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 602 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 603 const PetscScalar *x; 604 PetscScalar *y,*z; 605 PetscErrorCode ierr; 606 PetscInt m = A->rmap->n; 607 PetscInt i; 608 609 /* Variables not in MatMultAdd_SeqAIJ. */ 610 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 611 PetscObjectState state; 612 613 PetscFunctionBegin; 614 615 /* If there are no nonzero entries, set zz = yy and return immediately. */ 616 if(!a->nz) { 617 PetscInt i; 618 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 619 for (i=0; i<m; i++) { 620 z[i] = y[i]; 621 } 622 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 627 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 628 629 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 630 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 631 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 632 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 633 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 634 MatSeqAIJMKL_create_mkl_handle(A); 635 } 636 637 /* Call MKL sparse BLAS routine to do the MatMult. */ 638 if (zz == yy) { 639 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 640 * with alpha and beta both set to 1.0. */ 641 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 642 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 643 } else { 644 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 645 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 646 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 647 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 648 for (i=0; i<m; i++) { 649 z[i] += y[i]; 650 } 651 } 652 653 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 654 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 655 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 656 PetscFunctionReturn(0); 657 } 658 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 659 660 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 661 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 662 { 663 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 664 const PetscScalar *x; 665 PetscScalar *y,*z; 666 const MatScalar *aa; 667 PetscErrorCode ierr; 668 PetscInt m = A->rmap->n; 669 PetscInt n = A->cmap->n; 670 const PetscInt *aj,*ai; 671 PetscInt i; 672 673 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 674 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 675 PetscScalar alpha = 1.0; 676 PetscScalar beta; 677 char matdescra[6]; 678 679 PetscFunctionBegin; 680 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 681 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 682 683 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 684 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 685 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 686 aa = a->a; /* Nonzero elements stored row-by-row. */ 687 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 688 689 /* Call MKL sparse BLAS routine to do the MatMult. */ 690 if (zz == yy) { 691 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 692 beta = 1.0; 693 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 694 } else { 695 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 696 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 697 beta = 0.0; 698 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 699 for (i=0; i<n; i++) { 700 z[i] += y[i]; 701 } 702 } 703 704 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 705 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 706 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 707 PetscFunctionReturn(0); 708 } 709 #endif 710 711 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 712 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 713 { 714 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 715 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 716 const PetscScalar *x; 717 PetscScalar *y,*z; 718 PetscErrorCode ierr; 719 PetscInt n = A->cmap->n; 720 PetscInt i; 721 PetscObjectState state; 722 723 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 724 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 725 726 PetscFunctionBegin; 727 728 /* If there are no nonzero entries, set zz = yy and return immediately. */ 729 if(!a->nz) { 730 PetscInt i; 731 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 732 for (i=0; i<n; i++) { 733 z[i] = y[i]; 734 } 735 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 736 PetscFunctionReturn(0); 737 } 738 739 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 740 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 741 742 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 743 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 744 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 745 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 746 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 747 MatSeqAIJMKL_create_mkl_handle(A); 748 } 749 750 /* Call MKL sparse BLAS routine to do the MatMult. */ 751 if (zz == yy) { 752 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 753 * with alpha and beta both set to 1.0. */ 754 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 755 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 756 } else { 757 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 758 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 759 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 760 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()"); 761 for (i=0; i<n; i++) { 762 z[i] += y[i]; 763 } 764 } 765 766 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 767 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 768 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 769 PetscFunctionReturn(0); 770 } 771 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 772 773 /* -------------------------- MatProduct code -------------------------- */ 774 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 775 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 776 { 777 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr; 778 sparse_matrix_t csrA,csrB,csrC; 779 PetscInt nrows,ncols; 780 PetscErrorCode ierr; 781 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 782 struct matrix_descr descr_type_gen; 783 PetscObjectState state; 784 785 PetscFunctionBegin; 786 /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does 787 * not handle sparse matrices with zero rows or columns. */ 788 if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N; 789 else nrows = A->cmap->N; 790 if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N; 791 else ncols = B->rmap->N; 792 793 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 794 if (!a->sparse_optimized || a->state != state) { 795 MatSeqAIJMKL_create_mkl_handle(A); 796 } 797 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 798 if (!b->sparse_optimized || b->state != state) { 799 MatSeqAIJMKL_create_mkl_handle(B); 800 } 801 csrA = a->csrA; 802 csrB = b->csrA; 803 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 804 805 if (csrA && csrB) { 806 stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC); 807 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()"); 808 } else { 809 csrC = PETSC_NULL; 810 } 811 812 ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);CHKERRQ(ierr); 813 814 PetscFunctionReturn(0); 815 } 816 817 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 818 { 819 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 820 sparse_matrix_t csrA, csrB, csrC; 821 PetscErrorCode ierr; 822 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 823 struct matrix_descr descr_type_gen; 824 PetscObjectState state; 825 826 PetscFunctionBegin; 827 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 828 if (!a->sparse_optimized || a->state != state) { 829 MatSeqAIJMKL_create_mkl_handle(A); 830 } 831 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 832 if (!b->sparse_optimized || b->state != state) { 833 MatSeqAIJMKL_create_mkl_handle(B); 834 } 835 csrA = a->csrA; 836 csrB = b->csrA; 837 csrC = c->csrA; 838 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 839 840 if (csrA && csrB) { 841 stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC); 842 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()"); 843 } else { 844 csrC = PETSC_NULL; 845 } 846 847 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 848 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 849 850 PetscFunctionReturn(0); 851 } 852 853 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 854 { 855 PetscErrorCode ierr; 856 857 PetscFunctionBegin; 858 ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 859 PetscFunctionReturn(0); 860 } 861 862 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 863 { 864 PetscErrorCode ierr; 865 866 PetscFunctionBegin; 867 ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 868 PetscFunctionReturn(0); 869 } 870 871 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 872 { 873 PetscErrorCode ierr; 874 875 PetscFunctionBegin; 876 ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 877 PetscFunctionReturn(0); 878 } 879 880 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 881 { 882 PetscErrorCode ierr; 883 884 PetscFunctionBegin; 885 ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 886 PetscFunctionReturn(0); 887 } 888 889 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 890 { 891 PetscErrorCode ierr; 892 893 PetscFunctionBegin; 894 ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr); 895 PetscFunctionReturn(0); 896 } 897 898 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 899 { 900 PetscErrorCode ierr; 901 902 PetscFunctionBegin; 903 ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } 906 907 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 908 { 909 PetscErrorCode ierr; 910 Mat_Product *product = C->product; 911 Mat A = product->A,B = product->B; 912 913 PetscFunctionBegin; 914 ierr = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);CHKERRQ(ierr); 915 PetscFunctionReturn(0); 916 } 917 918 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 919 { 920 PetscErrorCode ierr; 921 Mat_Product *product = C->product; 922 Mat A = product->A,B = product->B; 923 PetscReal fill = product->fill; 924 925 PetscFunctionBegin; 926 ierr = MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);CHKERRQ(ierr); 927 C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL; 928 PetscFunctionReturn(0); 929 } 930 931 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C) 932 { 933 Mat Ct; 934 Vec zeros; 935 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 936 sparse_matrix_t csrA, csrP, csrC; 937 PetscBool set, flag; 938 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 939 struct matrix_descr descr_type_sym; 940 PetscObjectState state; 941 PetscErrorCode ierr; 942 943 PetscFunctionBegin; 944 ierr = MatIsSymmetricKnown(A,&set,&flag); 945 if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric"); 946 947 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 948 if (!a->sparse_optimized || a->state != state) { 949 MatSeqAIJMKL_create_mkl_handle(A); 950 } 951 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 952 if (!p->sparse_optimized || p->state != state) { 953 MatSeqAIJMKL_create_mkl_handle(P); 954 } 955 csrA = a->csrA; 956 csrP = p->csrA; 957 csrC = c->csrA; 958 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 959 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 960 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 961 962 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 963 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT); 964 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr()"); 965 966 /* Update the PETSc AIJ representation for matrix C from contents of MKL handle. 967 * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix, 968 * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix. 969 * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY 970 * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might 971 * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output 972 * the full matrix. */ 973 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 974 ierr = MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);CHKERRQ(ierr); 975 ierr = MatCreateVecs(C,&zeros,NULL);CHKERRQ(ierr); 976 ierr = VecSetFromOptions(zeros);CHKERRQ(ierr); 977 ierr = VecZeroEntries(zeros);CHKERRQ(ierr); 978 ierr = MatDiagonalSet(Ct,zeros,INSERT_VALUES);CHKERRQ(ierr); 979 ierr = MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 980 /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */ 981 ierr = MatProductCreateWithMat(A,P,PETSC_NULL,C);CHKERRQ(ierr); 982 ierr = MatProductSetType(C,MATPRODUCT_PtAP);CHKERRQ(ierr); 983 ierr = MatSeqAIJMKL_create_mkl_handle(C);CHKERRQ(ierr); 984 ierr = VecDestroy(&zeros);CHKERRQ(ierr); 985 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 986 PetscFunctionReturn(0); 987 } 988 989 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) 990 { 991 Mat_Product *product = C->product; 992 Mat A = product->A,P = product->B; 993 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr; 994 sparse_matrix_t csrA,csrP,csrC; 995 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 996 struct matrix_descr descr_type_sym; 997 PetscObjectState state; 998 PetscErrorCode ierr; 999 1000 PetscFunctionBegin; 1001 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 1002 if (!a->sparse_optimized || a->state != state) { 1003 MatSeqAIJMKL_create_mkl_handle(A); 1004 } 1005 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 1006 if (!p->sparse_optimized || p->state != state) { 1007 MatSeqAIJMKL_create_mkl_handle(P); 1008 } 1009 csrA = a->csrA; 1010 csrP = p->csrA; 1011 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 1012 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 1013 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 1014 1015 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 1016 if (csrP && csrA) { 1017 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL); 1018 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr()"); 1019 } else { 1020 csrC = PETSC_NULL; 1021 } 1022 1023 /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle. 1024 * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain 1025 * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that 1026 * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this 1027 * is guaranteed. */ 1028 ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);CHKERRQ(ierr); 1029 1030 C->ops->productnumeric = MatProductNumeric_PtAP; 1031 PetscFunctionReturn(0); 1032 } 1033 1034 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) 1035 { 1036 PetscFunctionBegin; 1037 C->ops->productsymbolic = MatProductSymbolic_AB; 1038 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1039 PetscFunctionReturn(0); 1040 } 1041 1042 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) 1043 { 1044 PetscFunctionBegin; 1045 C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL; 1046 PetscFunctionReturn(0); 1047 } 1048 1049 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) 1050 { 1051 PetscFunctionBegin; 1052 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 1053 C->ops->productsymbolic = MatProductSymbolic_ABt; 1054 PetscFunctionReturn(0); 1055 } 1056 1057 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) 1058 { 1059 PetscErrorCode ierr; 1060 Mat_Product *product = C->product; 1061 Mat A = product->A; 1062 PetscBool set, flag; 1063 1064 PetscFunctionBegin; 1065 #if defined(PETSC_USE_COMPLEX) 1066 /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Basic() will be used. 1067 * We do this in several other locations in this file. This works for the time being, but the _Basic() 1068 * routines are considered unsafe and may be removed from the MatProduct code in the future. 1069 * TODO: Add proper MATSEQAIJMKL implementations, instead of relying on the _Basic() routines. */ 1070 C->ops->productsymbolic = NULL; 1071 #else 1072 /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */ 1073 ierr = MatIsSymmetricKnown(A,&set,&flag);CHKERRQ(ierr); 1074 if (set && flag) { 1075 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1076 PetscFunctionReturn(0); 1077 } else { 1078 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */ 1079 } 1080 /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(), 1081 * depending on whether the algorithm for the general case vs. the real symmetric one is used. */ 1082 #endif 1083 PetscFunctionReturn(0); 1084 } 1085 1086 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) 1087 { 1088 PetscFunctionBegin; 1089 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */ 1090 PetscFunctionReturn(0); 1091 } 1092 1093 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) 1094 { 1095 PetscFunctionBegin; 1096 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */ 1097 PetscFunctionReturn(0); 1098 } 1099 1100 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) 1101 { 1102 PetscErrorCode ierr; 1103 Mat_Product *product = C->product; 1104 1105 PetscFunctionBegin; 1106 switch (product->type) { 1107 case MATPRODUCT_AB: 1108 ierr = MatProductSetFromOptions_SeqAIJMKL_AB(C);CHKERRQ(ierr); 1109 break; 1110 case MATPRODUCT_AtB: 1111 ierr = MatProductSetFromOptions_SeqAIJMKL_AtB(C);CHKERRQ(ierr); 1112 break; 1113 case MATPRODUCT_ABt: 1114 ierr = MatProductSetFromOptions_SeqAIJMKL_ABt(C);CHKERRQ(ierr); 1115 break; 1116 case MATPRODUCT_PtAP: 1117 ierr = MatProductSetFromOptions_SeqAIJMKL_PtAP(C);CHKERRQ(ierr); 1118 break; 1119 case MATPRODUCT_RARt: 1120 ierr = MatProductSetFromOptions_SeqAIJMKL_RARt(C);CHKERRQ(ierr); 1121 break; 1122 case MATPRODUCT_ABC: 1123 ierr = MatProductSetFromOptions_SeqAIJMKL_ABC(C);CHKERRQ(ierr); 1124 break; 1125 default: 1126 break; 1127 } 1128 PetscFunctionReturn(0); 1129 } 1130 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 1131 /* ------------------------ End MatProduct code ------------------------ */ 1132 1133 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 1134 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 1135 * routine, but can also be used to convert an assembled SeqAIJ matrix 1136 * into a SeqAIJMKL one. */ 1137 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 1138 { 1139 PetscErrorCode ierr; 1140 Mat B = *newmat; 1141 Mat_SeqAIJMKL *aijmkl; 1142 PetscBool set; 1143 PetscBool sametype; 1144 1145 PetscFunctionBegin; 1146 if (reuse == MAT_INITIAL_MATRIX) { 1147 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1148 } 1149 1150 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 1151 if (sametype) PetscFunctionReturn(0); 1152 1153 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 1154 B->spptr = (void*) aijmkl; 1155 1156 /* Set function pointers for methods that we inherit from AIJ but override. 1157 * We also parse some command line options below, since those determine some of the methods we point to. */ 1158 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 1159 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 1160 B->ops->destroy = MatDestroy_SeqAIJMKL; 1161 1162 aijmkl->sparse_optimized = PETSC_FALSE; 1163 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1164 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 1165 #else 1166 aijmkl->no_SpMV2 = PETSC_TRUE; 1167 #endif 1168 aijmkl->eager_inspection = PETSC_FALSE; 1169 1170 /* Parse command line options. */ 1171 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 1172 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 1173 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 1174 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1175 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1176 if(!aijmkl->no_SpMV2) { 1177 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 1178 aijmkl->no_SpMV2 = PETSC_TRUE; 1179 } 1180 #endif 1181 1182 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1183 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 1184 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 1185 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1186 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 1187 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1188 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL; 1189 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1190 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1191 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL; 1192 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1193 # if !defined(PETSC_USE_COMPLEX) 1194 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1195 # else 1196 B->ops->ptapnumeric = NULL; 1197 # endif 1198 # endif 1199 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1200 1201 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1202 /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1203 * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1204 * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1205 * versions in which the older interface has not been deprecated, we use the old interface. */ 1206 if (aijmkl->no_SpMV2) { 1207 B->ops->mult = MatMult_SeqAIJMKL; 1208 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 1209 B->ops->multadd = MatMultAdd_SeqAIJMKL; 1210 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1211 } 1212 #endif 1213 1214 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 1215 1216 if(!aijmkl->no_SpMV2) { 1217 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1218 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1219 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",MatProductSetFromOptions_SeqAIJMKL);CHKERRQ(ierr); 1220 #endif 1221 #endif 1222 } 1223 1224 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 1225 *newmat = B; 1226 PetscFunctionReturn(0); 1227 } 1228 1229 /*@C 1230 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 1231 This type inherits from AIJ and is largely identical, but uses sparse BLAS 1232 routines from Intel MKL whenever possible. 1233 If the installed version of MKL supports the "SpMV2" sparse 1234 inspector-executor routines, then those are used by default. 1235 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 1236 symmetric A) operations are currently supported. 1237 Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 1238 1239 Collective 1240 1241 Input Parameters: 1242 + comm - MPI communicator, set to PETSC_COMM_SELF 1243 . m - number of rows 1244 . n - number of columns 1245 . nz - number of nonzeros per row (same for all rows) 1246 - nnz - array containing the number of nonzeros in the various rows 1247 (possibly different for each row) or NULL 1248 1249 Output Parameter: 1250 . A - the matrix 1251 1252 Options Database Keys: 1253 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1254 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 1255 1256 Notes: 1257 If nnz is given then nz is ignored 1258 1259 Level: intermediate 1260 1261 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 1262 @*/ 1263 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1264 { 1265 PetscErrorCode ierr; 1266 1267 PetscFunctionBegin; 1268 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1269 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1270 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 1271 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 1272 PetscFunctionReturn(0); 1273 } 1274 1275 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1276 { 1277 PetscErrorCode ierr; 1278 1279 PetscFunctionBegin; 1280 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1281 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 1282 PetscFunctionReturn(0); 1283 } 1284