1 2 /* 3 Defines basic operations for the MATSEQAIJMKL matrix class. 4 This class is derived from the MATSEQAIJ class and retains the 5 compressed row storage (aka Yale sparse matrix format) but uses 6 sparse BLAS operations from the Intel Math Kernel Library (MKL) 7 wherever possible. 8 */ 9 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12 #include <mkl_spblas.h> 13 14 typedef struct { 15 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 16 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 PetscObjectState state; 19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 28 { 29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 30 /* so we will ignore 'MatType type'. */ 31 PetscErrorCode ierr; 32 Mat B = *newmat; 33 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 34 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 35 #endif 36 37 PetscFunctionBegin; 38 if (reuse == MAT_INITIAL_MATRIX) { 39 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 45 B->ops->destroy = MatDestroy_SeqAIJ; 46 B->ops->mult = MatMult_SeqAIJ; 47 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 48 B->ops->multadd = MatMultAdd_SeqAIJ; 49 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ; 51 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 52 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 53 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 54 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; 55 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 56 57 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 58 59 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 60 if (!aijmkl->no_SpMV2) { 61 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 62 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 63 #endif 64 } 65 66 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 67 * simply involves destroying the MKL sparse matrix handle and then freeing 68 * the spptr pointer. */ 69 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 70 71 if (aijmkl->sparse_optimized) { 72 sparse_status_t stat; 73 stat = mkl_sparse_destroy(aijmkl->csrA); 74 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 75 } 76 #endif 77 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 78 79 /* Change the type of B to MATSEQAIJ. */ 80 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 81 82 *newmat = B; 83 PetscFunctionReturn(0); 84 } 85 86 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 87 { 88 PetscErrorCode ierr; 89 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 90 91 PetscFunctionBegin; 92 93 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 94 * spptr pointer. */ 95 if (aijmkl) { 96 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 97 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 98 if (aijmkl->sparse_optimized) { 99 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 100 stat = mkl_sparse_destroy(aijmkl->csrA); 101 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 102 } 103 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 104 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 105 } 106 107 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 108 * to destroy everything that remains. */ 109 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 110 /* Note that I don't call MatSetType(). I believe this is because that 111 * is only to be called when *building* a matrix. I could be wrong, but 112 * that is how things work for the SuperLU matrix class. */ 113 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 114 PetscFunctionReturn(0); 115 } 116 117 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 118 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 119 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 120 * handle, creates a new one, and then calls mkl_sparse_optimize(). 121 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 122 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 123 * an unoptimized matrix handle here. */ 124 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 125 { 126 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 127 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 128 * does nothing. We make it callable anyway in this case because it cuts 129 * down on littering the code with #ifdefs. */ 130 PetscFunctionBegin; 131 PetscFunctionReturn(0); 132 #else 133 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 134 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 135 PetscInt m,n; 136 MatScalar *aa; 137 PetscInt *aj,*ai; 138 sparse_status_t stat; 139 PetscErrorCode ierr; 140 141 PetscFunctionBegin; 142 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 143 /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 144 * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 145 * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 146 * mkl_sparse_optimize() later. */ 147 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 148 #endif 149 150 if (aijmkl->sparse_optimized) { 151 /* Matrix has been previously assembled and optimized. Must destroy old 152 * matrix handle before running the optimization step again. */ 153 stat = mkl_sparse_destroy(aijmkl->csrA); 154 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 155 } 156 aijmkl->sparse_optimized = PETSC_FALSE; 157 158 /* Now perform the SpMV2 setup and matrix optimization. */ 159 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 160 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 161 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 162 m = A->rmap->n; 163 n = A->cmap->n; 164 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 165 aa = a->a; /* Nonzero elements stored row-by-row. */ 166 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 167 if (a->nz && aa && !A->structure_only) { 168 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 169 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 170 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 171 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle"); 172 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 173 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 174 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 175 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 176 if (!aijmkl->no_SpMV2) { 177 stat = mkl_sparse_optimize(aijmkl->csrA); 178 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 179 } 180 aijmkl->sparse_optimized = PETSC_TRUE; 181 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 182 } else { 183 aijmkl->csrA = PETSC_NULL; 184 } 185 186 PetscFunctionReturn(0); 187 #endif 188 } 189 190 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 191 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */ 192 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A) 193 { 194 PetscErrorCode ierr; 195 sparse_status_t stat; 196 sparse_index_base_t indexing; 197 PetscInt m,n; 198 PetscInt *aj,*ai,*dummy; 199 MatScalar *aa; 200 Mat_SeqAIJMKL *aijmkl; 201 202 if (csrA) { 203 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 204 stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa); 205 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 206 if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()"); 207 } else { 208 aj = ai = PETSC_NULL; 209 aa = PETSC_NULL; 210 } 211 212 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 213 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 214 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 215 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 216 * they will be destroyed when the MKL handle is destroyed. 217 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 218 if (csrA) { 219 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);CHKERRQ(ierr); 220 } else { 221 /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */ 222 ierr = MatSetUp(A);CHKERRQ(ierr); 223 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 224 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 225 } 226 227 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 228 * Now turn it into a MATSEQAIJMKL. */ 229 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 230 231 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 232 aijmkl->csrA = csrA; 233 234 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 235 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 236 * and just need to be able to run the MKL optimization step. */ 237 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 238 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 239 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 240 if (csrA) { 241 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 242 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 243 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 244 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 245 } 246 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 250 251 252 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 253 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 254 * MatMatMultNumeric(). */ 255 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 256 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 257 { 258 PetscInt i; 259 PetscInt nrows,ncols; 260 PetscInt nz; 261 PetscInt *ai,*aj,*dummy; 262 PetscScalar *aa; 263 PetscErrorCode ierr; 264 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 265 sparse_status_t stat; 266 sparse_index_base_t indexing; 267 268 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 269 if (!aijmkl->csrA) PetscFunctionReturn(0); 270 271 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 272 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 273 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 274 275 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 276 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 277 for (i=0; i<nrows; i++) { 278 nz = ai[i+1] - ai[i]; 279 ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr); 280 } 281 282 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 283 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 284 285 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 286 /* We mark our matrix as having a valid, optimized MKL handle. 287 * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */ 288 aijmkl->sparse_optimized = PETSC_TRUE; 289 PetscFunctionReturn(0); 290 } 291 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 292 293 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 294 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer) 295 { 296 PetscInt i,j,k; 297 PetscInt nrows,ncols; 298 PetscInt nz; 299 PetscInt *ai,*aj,*dummy; 300 PetscScalar *aa; 301 PetscErrorCode ierr; 302 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 303 sparse_status_t stat; 304 sparse_index_base_t indexing; 305 306 ierr = PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");CHKERRQ(ierr); 307 308 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 309 if (!aijmkl->csrA) { 310 ierr = PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");CHKERRQ(ierr); 311 PetscFunctionReturn(0); 312 } 313 314 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 315 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 316 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 317 318 k = 0; 319 for (i=0; i<nrows; i++) { 320 ierr = PetscViewerASCIIPrintf(viewer,"row %D: ",i);CHKERRQ(ierr); 321 nz = ai[i+1] - ai[i]; 322 for (j=0; j<nz; j++) { 323 if (aa) { 324 ierr = PetscViewerASCIIPrintf(viewer,"(%D, %g) ",aj[k],aa[k]);CHKERRQ(ierr); 325 } else { 326 ierr = PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);CHKERRQ(ierr); 327 } 328 k++; 329 } 330 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 331 } 332 PetscFunctionReturn(0); 333 } 334 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 335 336 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 337 { 338 PetscErrorCode ierr; 339 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 340 Mat_SeqAIJMKL *aijmkl_dest; 341 342 PetscFunctionBegin; 343 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 344 aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr; 345 ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr); 346 aijmkl_dest->sparse_optimized = PETSC_FALSE; 347 if (aijmkl->eager_inspection) { 348 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 349 } 350 PetscFunctionReturn(0); 351 } 352 353 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 354 { 355 PetscErrorCode ierr; 356 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 357 Mat_SeqAIJMKL *aijmkl; 358 359 PetscFunctionBegin; 360 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 361 362 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 363 * extra information and some different methods, call the AssemblyEnd 364 * routine for a MATSEQAIJ. 365 * I'm not sure if this is the best way to do this, but it avoids 366 * a lot of code duplication. */ 367 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 368 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 369 370 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 371 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 372 aijmkl = (Mat_SeqAIJMKL*)A->spptr; 373 if (aijmkl->eager_inspection) { 374 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 375 } 376 377 PetscFunctionReturn(0); 378 } 379 380 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 381 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 382 { 383 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 384 const PetscScalar *x; 385 PetscScalar *y; 386 const MatScalar *aa; 387 PetscErrorCode ierr; 388 PetscInt m = A->rmap->n; 389 PetscInt n = A->cmap->n; 390 PetscScalar alpha = 1.0; 391 PetscScalar beta = 0.0; 392 const PetscInt *aj,*ai; 393 char matdescra[6]; 394 395 396 /* Variables not in MatMult_SeqAIJ. */ 397 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 398 399 PetscFunctionBegin; 400 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 401 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 402 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 403 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 404 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 405 aa = a->a; /* Nonzero elements stored row-by-row. */ 406 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 407 408 /* Call MKL sparse BLAS routine to do the MatMult. */ 409 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 410 411 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 412 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 413 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 #endif 417 418 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 419 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 420 { 421 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 422 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 423 const PetscScalar *x; 424 PetscScalar *y; 425 PetscErrorCode ierr; 426 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 427 PetscObjectState state; 428 429 PetscFunctionBegin; 430 431 /* If there are no nonzero entries, zero yy and return immediately. */ 432 if(!a->nz) { 433 PetscInt i; 434 PetscInt m=A->rmap->n; 435 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 436 for (i=0; i<m; i++) { 437 y[i] = 0.0; 438 } 439 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 440 PetscFunctionReturn(0); 441 } 442 443 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 444 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 445 446 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 447 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 448 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 449 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 450 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 451 MatSeqAIJMKL_create_mkl_handle(A); 452 } 453 454 /* Call MKL SpMV2 executor routine to do the MatMult. */ 455 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 456 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 457 458 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 459 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 460 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 464 465 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 466 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 467 { 468 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 469 const PetscScalar *x; 470 PetscScalar *y; 471 const MatScalar *aa; 472 PetscErrorCode ierr; 473 PetscInt m = A->rmap->n; 474 PetscInt n = A->cmap->n; 475 PetscScalar alpha = 1.0; 476 PetscScalar beta = 0.0; 477 const PetscInt *aj,*ai; 478 char matdescra[6]; 479 480 /* Variables not in MatMultTranspose_SeqAIJ. */ 481 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 482 483 PetscFunctionBegin; 484 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 485 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 486 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 487 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 488 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 489 aa = a->a; /* Nonzero elements stored row-by-row. */ 490 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 491 492 /* Call MKL sparse BLAS routine to do the MatMult. */ 493 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 494 495 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 496 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 497 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 498 PetscFunctionReturn(0); 499 } 500 #endif 501 502 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 503 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 504 { 505 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 506 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 507 const PetscScalar *x; 508 PetscScalar *y; 509 PetscErrorCode ierr; 510 sparse_status_t stat; 511 PetscObjectState state; 512 513 PetscFunctionBegin; 514 515 /* If there are no nonzero entries, zero yy and return immediately. */ 516 if(!a->nz) { 517 PetscInt i; 518 PetscInt n=A->cmap->n; 519 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 520 for (i=0; i<n; i++) { 521 y[i] = 0.0; 522 } 523 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 524 PetscFunctionReturn(0); 525 } 526 527 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 528 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 529 530 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 531 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 532 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 533 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 534 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 535 MatSeqAIJMKL_create_mkl_handle(A); 536 } 537 538 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 539 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 540 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 541 542 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 543 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 544 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 545 PetscFunctionReturn(0); 546 } 547 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 548 549 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 550 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 551 { 552 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 553 const PetscScalar *x; 554 PetscScalar *y,*z; 555 const MatScalar *aa; 556 PetscErrorCode ierr; 557 PetscInt m = A->rmap->n; 558 PetscInt n = A->cmap->n; 559 const PetscInt *aj,*ai; 560 PetscInt i; 561 562 /* Variables not in MatMultAdd_SeqAIJ. */ 563 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 564 PetscScalar alpha = 1.0; 565 PetscScalar beta; 566 char matdescra[6]; 567 568 PetscFunctionBegin; 569 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 570 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 571 572 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 573 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 574 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 575 aa = a->a; /* Nonzero elements stored row-by-row. */ 576 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 577 578 /* Call MKL sparse BLAS routine to do the MatMult. */ 579 if (zz == yy) { 580 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 581 beta = 1.0; 582 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 583 } else { 584 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 585 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 586 beta = 0.0; 587 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 588 for (i=0; i<m; i++) { 589 z[i] += y[i]; 590 } 591 } 592 593 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 594 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 595 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 596 PetscFunctionReturn(0); 597 } 598 #endif 599 600 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 601 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 602 { 603 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 604 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 605 const PetscScalar *x; 606 PetscScalar *y,*z; 607 PetscErrorCode ierr; 608 PetscInt m = A->rmap->n; 609 PetscInt i; 610 611 /* Variables not in MatMultAdd_SeqAIJ. */ 612 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 613 PetscObjectState state; 614 615 PetscFunctionBegin; 616 617 /* If there are no nonzero entries, set zz = yy and return immediately. */ 618 if(!a->nz) { 619 PetscInt i; 620 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 621 for (i=0; i<m; i++) { 622 z[i] = y[i]; 623 } 624 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 625 PetscFunctionReturn(0); 626 } 627 628 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 629 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 630 631 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 632 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 633 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 634 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 635 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 636 MatSeqAIJMKL_create_mkl_handle(A); 637 } 638 639 /* Call MKL sparse BLAS routine to do the MatMult. */ 640 if (zz == yy) { 641 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 642 * with alpha and beta both set to 1.0. */ 643 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 644 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 645 } else { 646 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 647 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 648 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 649 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 650 for (i=0; i<m; i++) { 651 z[i] += y[i]; 652 } 653 } 654 655 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 656 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 657 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 658 PetscFunctionReturn(0); 659 } 660 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 661 662 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 663 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 664 { 665 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 666 const PetscScalar *x; 667 PetscScalar *y,*z; 668 const MatScalar *aa; 669 PetscErrorCode ierr; 670 PetscInt m = A->rmap->n; 671 PetscInt n = A->cmap->n; 672 const PetscInt *aj,*ai; 673 PetscInt i; 674 675 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 676 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 677 PetscScalar alpha = 1.0; 678 PetscScalar beta; 679 char matdescra[6]; 680 681 PetscFunctionBegin; 682 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 683 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 684 685 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 686 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 687 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 688 aa = a->a; /* Nonzero elements stored row-by-row. */ 689 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 690 691 /* Call MKL sparse BLAS routine to do the MatMult. */ 692 if (zz == yy) { 693 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 694 beta = 1.0; 695 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 696 } else { 697 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 698 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 699 beta = 0.0; 700 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 701 for (i=0; i<n; i++) { 702 z[i] += y[i]; 703 } 704 } 705 706 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 707 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 708 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 #endif 712 713 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 714 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 715 { 716 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 717 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 718 const PetscScalar *x; 719 PetscScalar *y,*z; 720 PetscErrorCode ierr; 721 PetscInt n = A->cmap->n; 722 PetscInt i; 723 PetscObjectState state; 724 725 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 726 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 727 728 PetscFunctionBegin; 729 730 /* If there are no nonzero entries, set zz = yy and return immediately. */ 731 if(!a->nz) { 732 PetscInt i; 733 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 734 for (i=0; i<n; i++) { 735 z[i] = y[i]; 736 } 737 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 738 PetscFunctionReturn(0); 739 } 740 741 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 742 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 743 744 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 745 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 746 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 747 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 748 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 749 MatSeqAIJMKL_create_mkl_handle(A); 750 } 751 752 /* Call MKL sparse BLAS routine to do the MatMult. */ 753 if (zz == yy) { 754 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 755 * with alpha and beta both set to 1.0. */ 756 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 757 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 758 } else { 759 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 760 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 761 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 762 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 763 for (i=0; i<n; i++) { 764 z[i] += y[i]; 765 } 766 } 767 768 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 769 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 770 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 771 PetscFunctionReturn(0); 772 } 773 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 774 775 /* -------------------------- MatProduct code -------------------------- */ 776 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 777 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 778 { 779 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr; 780 sparse_matrix_t csrA,csrB,csrC; 781 PetscInt nrows,ncols; 782 PetscErrorCode ierr; 783 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 784 struct matrix_descr descr_type_gen; 785 PetscObjectState state; 786 787 PetscFunctionBegin; 788 /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does 789 * not handle sparse matrices with zero rows or columns. */ 790 if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N; 791 else nrows = A->cmap->N; 792 if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N; 793 else ncols = B->rmap->N; 794 795 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 796 if (!a->sparse_optimized || a->state != state) { 797 MatSeqAIJMKL_create_mkl_handle(A); 798 } 799 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 800 if (!b->sparse_optimized || b->state != state) { 801 MatSeqAIJMKL_create_mkl_handle(B); 802 } 803 csrA = a->csrA; 804 csrB = b->csrA; 805 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 806 807 if (csrA && csrB) { 808 stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC); 809 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply"); 810 } else { 811 csrC = PETSC_NULL; 812 } 813 814 ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);CHKERRQ(ierr); 815 816 PetscFunctionReturn(0); 817 } 818 819 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 820 { 821 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 822 sparse_matrix_t csrA, csrB, csrC; 823 PetscErrorCode ierr; 824 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 825 struct matrix_descr descr_type_gen; 826 PetscObjectState state; 827 828 PetscFunctionBegin; 829 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 830 if (!a->sparse_optimized || a->state != state) { 831 MatSeqAIJMKL_create_mkl_handle(A); 832 } 833 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 834 if (!b->sparse_optimized || b->state != state) { 835 MatSeqAIJMKL_create_mkl_handle(B); 836 } 837 csrA = a->csrA; 838 csrB = b->csrA; 839 csrC = c->csrA; 840 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 841 842 if (csrA && csrB) { 843 stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC); 844 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply"); 845 } else { 846 csrC = PETSC_NULL; 847 } 848 849 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 850 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 851 852 PetscFunctionReturn(0); 853 } 854 855 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 856 { 857 PetscErrorCode ierr; 858 859 PetscFunctionBegin; 860 ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 861 PetscFunctionReturn(0); 862 } 863 864 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 865 { 866 PetscErrorCode ierr; 867 868 PetscFunctionBegin; 869 ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 874 { 875 PetscErrorCode ierr; 876 877 PetscFunctionBegin; 878 ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 879 PetscFunctionReturn(0); 880 } 881 882 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 883 { 884 PetscErrorCode ierr; 885 886 PetscFunctionBegin; 887 ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 888 PetscFunctionReturn(0); 889 } 890 891 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 892 { 893 PetscErrorCode ierr; 894 895 PetscFunctionBegin; 896 ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 901 { 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr); 906 PetscFunctionReturn(0); 907 } 908 909 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 910 { 911 PetscErrorCode ierr; 912 Mat_Product *product = C->product; 913 Mat A = product->A,B = product->B; 914 915 PetscFunctionBegin; 916 ierr = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);CHKERRQ(ierr); 917 PetscFunctionReturn(0); 918 } 919 920 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 921 { 922 PetscErrorCode ierr; 923 Mat_Product *product = C->product; 924 Mat A = product->A,B = product->B; 925 PetscReal fill = product->fill; 926 927 PetscFunctionBegin; 928 ierr = MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);CHKERRQ(ierr); 929 C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL; 930 PetscFunctionReturn(0); 931 } 932 933 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C) 934 { 935 Mat Ct; 936 Vec zeros; 937 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr; 938 sparse_matrix_t csrA, csrP, csrC; 939 PetscBool set, flag; 940 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 941 struct matrix_descr descr_type_sym; 942 PetscObjectState state; 943 PetscErrorCode ierr; 944 945 PetscFunctionBegin; 946 ierr = MatIsSymmetricKnown(A,&set,&flag); 947 if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric"); 948 949 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 950 if (!a->sparse_optimized || a->state != state) { 951 MatSeqAIJMKL_create_mkl_handle(A); 952 } 953 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 954 if (!p->sparse_optimized || p->state != state) { 955 MatSeqAIJMKL_create_mkl_handle(P); 956 } 957 csrA = a->csrA; 958 csrP = p->csrA; 959 csrC = c->csrA; 960 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 961 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 962 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 963 964 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 965 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT); 966 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr"); 967 968 /* Update the PETSc AIJ representation for matrix C from contents of MKL handle. 969 * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix, 970 * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix. 971 * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY 972 * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might 973 * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output 974 * the full matrix. */ 975 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 976 ierr = MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);CHKERRQ(ierr); 977 ierr = MatCreateVecs(C,&zeros,NULL);CHKERRQ(ierr); 978 ierr = VecSetFromOptions(zeros);CHKERRQ(ierr); 979 ierr = VecZeroEntries(zeros);CHKERRQ(ierr); 980 ierr = MatDiagonalSet(Ct,zeros,INSERT_VALUES);CHKERRQ(ierr); 981 ierr = MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 982 /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */ 983 ierr = MatProductCreateWithMat(A,P,PETSC_NULL,C);CHKERRQ(ierr); 984 ierr = MatProductSetType(C,MATPRODUCT_PtAP);CHKERRQ(ierr); 985 ierr = MatSeqAIJMKL_create_mkl_handle(C);CHKERRQ(ierr); 986 ierr = VecDestroy(&zeros);CHKERRQ(ierr); 987 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 988 PetscFunctionReturn(0); 989 } 990 991 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) 992 { 993 Mat_Product *product = C->product; 994 Mat A = product->A,P = product->B; 995 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr; 996 sparse_matrix_t csrA,csrP,csrC; 997 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 998 struct matrix_descr descr_type_sym; 999 PetscObjectState state; 1000 PetscErrorCode ierr; 1001 1002 PetscFunctionBegin; 1003 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 1004 if (!a->sparse_optimized || a->state != state) { 1005 MatSeqAIJMKL_create_mkl_handle(A); 1006 } 1007 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 1008 if (!p->sparse_optimized || p->state != state) { 1009 MatSeqAIJMKL_create_mkl_handle(P); 1010 } 1011 csrA = a->csrA; 1012 csrP = p->csrA; 1013 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 1014 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 1015 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 1016 1017 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 1018 if (csrP && csrA) { 1019 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL); 1020 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr"); 1021 } else { 1022 csrC = PETSC_NULL; 1023 } 1024 1025 /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle. 1026 * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain 1027 * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that 1028 * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this 1029 * is guaranteed. */ 1030 ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);CHKERRQ(ierr); 1031 1032 C->ops->productnumeric = MatProductNumeric_PtAP; 1033 PetscFunctionReturn(0); 1034 } 1035 1036 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) 1037 { 1038 PetscFunctionBegin; 1039 C->ops->productsymbolic = MatProductSymbolic_AB; 1040 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1041 PetscFunctionReturn(0); 1042 } 1043 1044 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) 1045 { 1046 PetscFunctionBegin; 1047 C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL; 1048 PetscFunctionReturn(0); 1049 } 1050 1051 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) 1052 { 1053 PetscFunctionBegin; 1054 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 1055 C->ops->productsymbolic = MatProductSymbolic_ABt; 1056 PetscFunctionReturn(0); 1057 } 1058 1059 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) 1060 { 1061 PetscErrorCode ierr; 1062 Mat_Product *product = C->product; 1063 Mat A = product->A; 1064 PetscBool set, flag; 1065 1066 PetscFunctionBegin; 1067 #if defined(PETSC_USE_COMPLEX) 1068 /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Basic() will be used. 1069 * We do this in several other locations in this file. This works for the time being, but the _Basic() 1070 * routines are considered unsafe and may be removed from the MatProduct code in the future. 1071 * TODO: Add proper MATSEQAIJMKL implementations, instead of relying on the _Basic() routines. */ 1072 C->ops->productsymbolic = NULL; 1073 #else 1074 /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */ 1075 ierr = MatIsSymmetricKnown(A,&set,&flag);CHKERRQ(ierr); 1076 if (set && flag) { 1077 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1078 PetscFunctionReturn(0); 1079 } else { 1080 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */ 1081 } 1082 /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(), 1083 * depending on whether the algorithm for the general case vs. the real symmetric one is used. */ 1084 #endif 1085 PetscFunctionReturn(0); 1086 } 1087 1088 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) 1089 { 1090 PetscFunctionBegin; 1091 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */ 1092 PetscFunctionReturn(0); 1093 } 1094 1095 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) 1096 { 1097 PetscFunctionBegin; 1098 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */ 1099 PetscFunctionReturn(0); 1100 } 1101 1102 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) 1103 { 1104 PetscErrorCode ierr; 1105 Mat_Product *product = C->product; 1106 1107 PetscFunctionBegin; 1108 switch (product->type) { 1109 case MATPRODUCT_AB: 1110 ierr = MatProductSetFromOptions_SeqAIJMKL_AB(C);CHKERRQ(ierr); 1111 break; 1112 case MATPRODUCT_AtB: 1113 ierr = MatProductSetFromOptions_SeqAIJMKL_AtB(C);CHKERRQ(ierr); 1114 break; 1115 case MATPRODUCT_ABt: 1116 ierr = MatProductSetFromOptions_SeqAIJMKL_ABt(C);CHKERRQ(ierr); 1117 break; 1118 case MATPRODUCT_PtAP: 1119 ierr = MatProductSetFromOptions_SeqAIJMKL_PtAP(C);CHKERRQ(ierr); 1120 break; 1121 case MATPRODUCT_RARt: 1122 ierr = MatProductSetFromOptions_SeqAIJMKL_RARt(C);CHKERRQ(ierr); 1123 break; 1124 case MATPRODUCT_ABC: 1125 ierr = MatProductSetFromOptions_SeqAIJMKL_ABC(C);CHKERRQ(ierr); 1126 break; 1127 default: 1128 break; 1129 } 1130 PetscFunctionReturn(0); 1131 } 1132 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 1133 /* ------------------------ End MatProduct code ------------------------ */ 1134 1135 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 1136 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 1137 * routine, but can also be used to convert an assembled SeqAIJ matrix 1138 * into a SeqAIJMKL one. */ 1139 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 1140 { 1141 PetscErrorCode ierr; 1142 Mat B = *newmat; 1143 Mat_SeqAIJMKL *aijmkl; 1144 PetscBool set; 1145 PetscBool sametype; 1146 1147 PetscFunctionBegin; 1148 if (reuse == MAT_INITIAL_MATRIX) { 1149 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1150 } 1151 1152 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 1153 if (sametype) PetscFunctionReturn(0); 1154 1155 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 1156 B->spptr = (void*) aijmkl; 1157 1158 /* Set function pointers for methods that we inherit from AIJ but override. 1159 * We also parse some command line options below, since those determine some of the methods we point to. */ 1160 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 1161 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 1162 B->ops->destroy = MatDestroy_SeqAIJMKL; 1163 1164 aijmkl->sparse_optimized = PETSC_FALSE; 1165 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1166 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 1167 #else 1168 aijmkl->no_SpMV2 = PETSC_TRUE; 1169 #endif 1170 aijmkl->eager_inspection = PETSC_FALSE; 1171 1172 /* Parse command line options. */ 1173 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 1174 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 1175 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 1176 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1177 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1178 if(!aijmkl->no_SpMV2) { 1179 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 1180 aijmkl->no_SpMV2 = PETSC_TRUE; 1181 } 1182 #endif 1183 1184 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1185 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 1186 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 1187 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1188 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 1189 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1190 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL; 1191 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1192 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1193 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL; 1194 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1195 # if !defined(PETSC_USE_COMPLEX) 1196 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1197 # else 1198 B->ops->ptapnumeric = NULL; 1199 # endif 1200 # endif 1201 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1202 1203 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1204 /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1205 * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1206 * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1207 * versions in which the older interface has not been deprecated, we use the old interface. */ 1208 if (aijmkl->no_SpMV2) { 1209 B->ops->mult = MatMult_SeqAIJMKL; 1210 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 1211 B->ops->multadd = MatMultAdd_SeqAIJMKL; 1212 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1213 } 1214 #endif 1215 1216 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 1217 1218 if(!aijmkl->no_SpMV2) { 1219 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1220 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1221 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",MatProductSetFromOptions_SeqAIJMKL);CHKERRQ(ierr); 1222 #endif 1223 #endif 1224 } 1225 1226 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 1227 *newmat = B; 1228 PetscFunctionReturn(0); 1229 } 1230 1231 /*@C 1232 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 1233 This type inherits from AIJ and is largely identical, but uses sparse BLAS 1234 routines from Intel MKL whenever possible. 1235 If the installed version of MKL supports the "SpMV2" sparse 1236 inspector-executor routines, then those are used by default. 1237 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 1238 symmetric A) operations are currently supported. 1239 Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 1240 1241 Collective 1242 1243 Input Parameters: 1244 + comm - MPI communicator, set to PETSC_COMM_SELF 1245 . m - number of rows 1246 . n - number of columns 1247 . nz - number of nonzeros per row (same for all rows) 1248 - nnz - array containing the number of nonzeros in the various rows 1249 (possibly different for each row) or NULL 1250 1251 Output Parameter: 1252 . A - the matrix 1253 1254 Options Database Keys: 1255 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1256 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 1257 1258 Notes: 1259 If nnz is given then nz is ignored 1260 1261 Level: intermediate 1262 1263 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 1264 @*/ 1265 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1266 { 1267 PetscErrorCode ierr; 1268 1269 PetscFunctionBegin; 1270 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1271 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1272 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 1273 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 1274 PetscFunctionReturn(0); 1275 } 1276 1277 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1278 { 1279 PetscErrorCode ierr; 1280 1281 PetscFunctionBegin; 1282 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1283 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 1284 PetscFunctionReturn(0); 1285 } 1286