1 2 /* 3 Defines basic operations for the MATSEQAIJMKL matrix class. 4 This class is derived from the MATSEQAIJ class and retains the 5 compressed row storage (aka Yale sparse matrix format) but uses 6 sparse BLAS operations from the Intel Math Kernel Library (MKL) 7 wherever possible. 8 */ 9 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12 #include <mkl_spblas.h> 13 14 typedef struct { 15 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 16 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 PetscObjectState state; 19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 28 { 29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 30 /* so we will ignore 'MatType type'. */ 31 PetscErrorCode ierr; 32 Mat B = *newmat; 33 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 34 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 35 #endif 36 37 PetscFunctionBegin; 38 if (reuse == MAT_INITIAL_MATRIX) { 39 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 45 B->ops->destroy = MatDestroy_SeqAIJ; 46 B->ops->mult = MatMult_SeqAIJ; 47 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 48 B->ops->multadd = MatMultAdd_SeqAIJ; 49 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ; 51 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 52 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 53 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 54 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; 55 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 56 57 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 58 59 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 60 if (!aijmkl->no_SpMV2) { 61 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 62 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 63 #endif 64 } 65 66 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 67 * simply involves destroying the MKL sparse matrix handle and then freeing 68 * the spptr pointer. */ 69 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 70 71 if (aijmkl->sparse_optimized) { 72 sparse_status_t stat; 73 stat = mkl_sparse_destroy(aijmkl->csrA); 74 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 75 } 76 #endif 77 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 78 79 /* Change the type of B to MATSEQAIJ. */ 80 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 81 82 *newmat = B; 83 PetscFunctionReturn(0); 84 } 85 86 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 87 { 88 PetscErrorCode ierr; 89 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 90 91 PetscFunctionBegin; 92 93 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 94 * spptr pointer. */ 95 if (aijmkl) { 96 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 97 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 98 if (aijmkl->sparse_optimized) { 99 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 100 stat = mkl_sparse_destroy(aijmkl->csrA); 101 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 102 } 103 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 104 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 105 } 106 107 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 108 * to destroy everything that remains. */ 109 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 110 /* Note that I don't call MatSetType(). I believe this is because that 111 * is only to be called when *building* a matrix. I could be wrong, but 112 * that is how things work for the SuperLU matrix class. */ 113 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 114 PetscFunctionReturn(0); 115 } 116 117 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 118 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 119 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 120 * handle, creates a new one, and then calls mkl_sparse_optimize(). 121 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 122 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 123 * an unoptimized matrix handle here. */ 124 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 125 { 126 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 127 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 128 * does nothing. We make it callable anyway in this case because it cuts 129 * down on littering the code with #ifdefs. */ 130 PetscFunctionBegin; 131 PetscFunctionReturn(0); 132 #else 133 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 134 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 135 PetscInt m,n; 136 MatScalar *aa; 137 PetscInt *aj,*ai; 138 sparse_status_t stat; 139 PetscErrorCode ierr; 140 141 PetscFunctionBegin; 142 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 143 /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 144 * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 145 * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 146 * mkl_sparse_optimize() later. */ 147 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 148 #endif 149 150 if (aijmkl->sparse_optimized) { 151 /* Matrix has been previously assembled and optimized. Must destroy old 152 * matrix handle before running the optimization step again. */ 153 stat = mkl_sparse_destroy(aijmkl->csrA); 154 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 155 } 156 aijmkl->sparse_optimized = PETSC_FALSE; 157 158 /* Now perform the SpMV2 setup and matrix optimization. */ 159 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 160 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 161 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 162 m = A->rmap->n; 163 n = A->cmap->n; 164 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 165 aa = a->a; /* Nonzero elements stored row-by-row. */ 166 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 167 if ((a->nz!=0) && aa && !(A->structure_only)) { 168 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 169 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 170 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 171 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle"); 172 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 173 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 174 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 175 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 176 if (!aijmkl->no_SpMV2) { 177 stat = mkl_sparse_optimize(aijmkl->csrA); 178 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 179 } 180 aijmkl->sparse_optimized = PETSC_TRUE; 181 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 182 } else { 183 aijmkl->csrA = PETSC_NULL; 184 } 185 186 PetscFunctionReturn(0); 187 #endif 188 } 189 190 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 191 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */ 192 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A) 193 { 194 PetscErrorCode ierr; 195 sparse_status_t stat; 196 sparse_index_base_t indexing; 197 PetscInt m,n; 198 PetscInt *aj,*ai,*dummy; 199 MatScalar *aa; 200 Mat_SeqAIJMKL *aijmkl; 201 202 if (csrA) { 203 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 204 stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa); 205 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 206 if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()"); 207 } else { 208 aj = ai = PETSC_NULL; 209 aa = PETSC_NULL; 210 } 211 212 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 213 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 214 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 215 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 216 * they will be destroyed when the MKL handle is destroyed. 217 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 218 if (csrA) { 219 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);CHKERRQ(ierr); 220 } else { 221 /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */ 222 ierr = MatSetUp(A);CHKERRQ(ierr); 223 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 224 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 225 } 226 227 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 228 * Now turn it into a MATSEQAIJMKL. */ 229 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 230 231 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 232 aijmkl->csrA = csrA; 233 234 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 235 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 236 * and just need to be able to run the MKL optimization step. */ 237 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 238 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 239 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 240 if (csrA) { 241 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 242 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 243 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 244 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 245 } 246 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 247 248 PetscFunctionReturn(0); 249 } 250 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 251 252 253 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 254 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 255 * MatMatMultNumeric(). */ 256 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 257 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 258 { 259 PetscInt i; 260 PetscInt nrows,ncols; 261 PetscInt nz; 262 PetscInt *ai,*aj,*dummy; 263 PetscScalar *aa; 264 PetscErrorCode ierr; 265 Mat_SeqAIJMKL *aijmkl; 266 sparse_status_t stat; 267 sparse_index_base_t indexing; 268 269 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 270 271 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 272 if (!aijmkl->csrA) PetscFunctionReturn(0); 273 274 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 275 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 276 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 277 278 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 279 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 280 for (i=0; i<nrows; i++) { 281 nz = ai[i+1] - ai[i]; 282 ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr); 283 } 284 285 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 286 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 287 288 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 289 /* We mark our matrix as having a valid, optimized MKL handle. 290 * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */ 291 aijmkl->sparse_optimized = PETSC_TRUE; 292 293 PetscFunctionReturn(0); 294 } 295 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 296 297 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 298 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer) 299 { 300 PetscInt i,j,k; 301 PetscInt nrows,ncols; 302 PetscInt nz; 303 PetscInt *ai,*aj,*dummy; 304 PetscScalar *aa; 305 PetscErrorCode ierr; 306 Mat_SeqAIJMKL *aijmkl; 307 sparse_status_t stat; 308 sparse_index_base_t indexing; 309 310 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 311 312 ierr = PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");CHKERRQ(ierr); 313 314 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 315 if (!aijmkl->csrA) { 316 ierr = PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");CHKERRQ(ierr); 317 PetscFunctionReturn(0); 318 } 319 320 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 321 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 322 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 323 324 k = 0; 325 for (i=0; i<nrows; i++) { 326 ierr = PetscViewerASCIIPrintf(viewer,"row %D: ",i);CHKERRQ(ierr); 327 nz = ai[i+1] - ai[i]; 328 for (j=0; j<nz; j++) { 329 if (aa) { 330 ierr = PetscViewerASCIIPrintf(viewer,"(%D, %g) ",aj[k],aa[k]);CHKERRQ(ierr); 331 } else { 332 ierr = PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);CHKERRQ(ierr); 333 } 334 k++; 335 } 336 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 337 } 338 PetscFunctionReturn(0); 339 } 340 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 341 342 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 343 { 344 PetscErrorCode ierr; 345 Mat_SeqAIJMKL *aijmkl; 346 Mat_SeqAIJMKL *aijmkl_dest; 347 348 PetscFunctionBegin; 349 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 350 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 351 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 352 ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr); 353 aijmkl_dest->sparse_optimized = PETSC_FALSE; 354 if (aijmkl->eager_inspection) { 355 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 356 } 357 PetscFunctionReturn(0); 358 } 359 360 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 361 { 362 PetscErrorCode ierr; 363 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 364 Mat_SeqAIJMKL *aijmkl; 365 366 PetscFunctionBegin; 367 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 368 369 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 370 * extra information and some different methods, call the AssemblyEnd 371 * routine for a MATSEQAIJ. 372 * I'm not sure if this is the best way to do this, but it avoids 373 * a lot of code duplication. */ 374 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 375 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 376 377 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 378 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 379 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 380 if (aijmkl->eager_inspection) { 381 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 382 } 383 384 PetscFunctionReturn(0); 385 } 386 387 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 388 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 389 { 390 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 391 const PetscScalar *x; 392 PetscScalar *y; 393 const MatScalar *aa; 394 PetscErrorCode ierr; 395 PetscInt m = A->rmap->n; 396 PetscInt n = A->cmap->n; 397 PetscScalar alpha = 1.0; 398 PetscScalar beta = 0.0; 399 const PetscInt *aj,*ai; 400 char matdescra[6]; 401 402 403 /* Variables not in MatMult_SeqAIJ. */ 404 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 405 406 PetscFunctionBegin; 407 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 408 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 409 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 410 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 411 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 412 aa = a->a; /* Nonzero elements stored row-by-row. */ 413 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 414 415 /* Call MKL sparse BLAS routine to do the MatMult. */ 416 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 417 418 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 419 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 420 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 421 PetscFunctionReturn(0); 422 } 423 #endif 424 425 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 426 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 427 { 428 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 429 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 430 const PetscScalar *x; 431 PetscScalar *y; 432 PetscErrorCode ierr; 433 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 434 PetscObjectState state; 435 436 PetscFunctionBegin; 437 438 /* If there are no nonzero entries, zero yy and return immediately. */ 439 if(!a->nz) { 440 PetscInt i; 441 PetscInt m=A->rmap->n; 442 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 443 for (i=0; i<m; i++) { 444 y[i] = 0.0; 445 } 446 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 451 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 452 453 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 454 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 455 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 456 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 457 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 458 MatSeqAIJMKL_create_mkl_handle(A); 459 } 460 461 /* Call MKL SpMV2 executor routine to do the MatMult. */ 462 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 463 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 464 465 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 466 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 467 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 468 PetscFunctionReturn(0); 469 } 470 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 471 472 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 473 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 474 { 475 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 476 const PetscScalar *x; 477 PetscScalar *y; 478 const MatScalar *aa; 479 PetscErrorCode ierr; 480 PetscInt m = A->rmap->n; 481 PetscInt n = A->cmap->n; 482 PetscScalar alpha = 1.0; 483 PetscScalar beta = 0.0; 484 const PetscInt *aj,*ai; 485 char matdescra[6]; 486 487 /* Variables not in MatMultTranspose_SeqAIJ. */ 488 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 489 490 PetscFunctionBegin; 491 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 492 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 493 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 494 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 495 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 496 aa = a->a; /* Nonzero elements stored row-by-row. */ 497 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 498 499 /* Call MKL sparse BLAS routine to do the MatMult. */ 500 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 501 502 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 503 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 504 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 505 PetscFunctionReturn(0); 506 } 507 #endif 508 509 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 510 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 511 { 512 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 513 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 514 const PetscScalar *x; 515 PetscScalar *y; 516 PetscErrorCode ierr; 517 sparse_status_t stat; 518 PetscObjectState state; 519 520 PetscFunctionBegin; 521 522 /* If there are no nonzero entries, zero yy and return immediately. */ 523 if(!a->nz) { 524 PetscInt i; 525 PetscInt n=A->cmap->n; 526 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 527 for (i=0; i<n; i++) { 528 y[i] = 0.0; 529 } 530 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 531 PetscFunctionReturn(0); 532 } 533 534 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 535 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 536 537 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 538 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 539 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 540 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 541 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 542 MatSeqAIJMKL_create_mkl_handle(A); 543 } 544 545 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 546 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 547 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 548 549 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 550 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 551 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 552 PetscFunctionReturn(0); 553 } 554 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 555 556 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 557 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 558 { 559 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 560 const PetscScalar *x; 561 PetscScalar *y,*z; 562 const MatScalar *aa; 563 PetscErrorCode ierr; 564 PetscInt m = A->rmap->n; 565 PetscInt n = A->cmap->n; 566 const PetscInt *aj,*ai; 567 PetscInt i; 568 569 /* Variables not in MatMultAdd_SeqAIJ. */ 570 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 571 PetscScalar alpha = 1.0; 572 PetscScalar beta; 573 char matdescra[6]; 574 575 PetscFunctionBegin; 576 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 577 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 578 579 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 580 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 581 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 582 aa = a->a; /* Nonzero elements stored row-by-row. */ 583 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 584 585 /* Call MKL sparse BLAS routine to do the MatMult. */ 586 if (zz == yy) { 587 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 588 beta = 1.0; 589 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 590 } else { 591 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 592 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 593 beta = 0.0; 594 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 595 for (i=0; i<m; i++) { 596 z[i] += y[i]; 597 } 598 } 599 600 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 601 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 602 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 603 PetscFunctionReturn(0); 604 } 605 #endif 606 607 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 608 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 609 { 610 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 611 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 612 const PetscScalar *x; 613 PetscScalar *y,*z; 614 PetscErrorCode ierr; 615 PetscInt m = A->rmap->n; 616 PetscInt i; 617 618 /* Variables not in MatMultAdd_SeqAIJ. */ 619 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 620 PetscObjectState state; 621 622 PetscFunctionBegin; 623 624 /* If there are no nonzero entries, set zz = yy and return immediately. */ 625 if(!a->nz) { 626 PetscInt i; 627 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 628 for (i=0; i<m; i++) { 629 z[i] = y[i]; 630 } 631 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 632 PetscFunctionReturn(0); 633 } 634 635 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 636 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 637 638 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 639 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 640 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 641 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 642 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 643 MatSeqAIJMKL_create_mkl_handle(A); 644 } 645 646 /* Call MKL sparse BLAS routine to do the MatMult. */ 647 if (zz == yy) { 648 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 649 * with alpha and beta both set to 1.0. */ 650 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 651 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 652 } else { 653 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 654 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 655 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 656 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 657 for (i=0; i<m; i++) { 658 z[i] += y[i]; 659 } 660 } 661 662 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 663 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 664 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 665 PetscFunctionReturn(0); 666 } 667 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 668 669 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 670 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 671 { 672 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 673 const PetscScalar *x; 674 PetscScalar *y,*z; 675 const MatScalar *aa; 676 PetscErrorCode ierr; 677 PetscInt m = A->rmap->n; 678 PetscInt n = A->cmap->n; 679 const PetscInt *aj,*ai; 680 PetscInt i; 681 682 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 683 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 684 PetscScalar alpha = 1.0; 685 PetscScalar beta; 686 char matdescra[6]; 687 688 PetscFunctionBegin; 689 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 690 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 691 692 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 693 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 694 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 695 aa = a->a; /* Nonzero elements stored row-by-row. */ 696 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 697 698 /* Call MKL sparse BLAS routine to do the MatMult. */ 699 if (zz == yy) { 700 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 701 beta = 1.0; 702 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 703 } else { 704 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 705 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 706 beta = 0.0; 707 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 708 for (i=0; i<n; i++) { 709 z[i] += y[i]; 710 } 711 } 712 713 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 714 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 715 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 716 PetscFunctionReturn(0); 717 } 718 #endif 719 720 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 721 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 722 { 723 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 724 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 725 const PetscScalar *x; 726 PetscScalar *y,*z; 727 PetscErrorCode ierr; 728 PetscInt n = A->cmap->n; 729 PetscInt i; 730 PetscObjectState state; 731 732 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 733 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 734 735 PetscFunctionBegin; 736 737 /* If there are no nonzero entries, set zz = yy and return immediately. */ 738 if(!a->nz) { 739 PetscInt i; 740 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 741 for (i=0; i<n; i++) { 742 z[i] = y[i]; 743 } 744 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 745 PetscFunctionReturn(0); 746 } 747 748 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 749 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 750 751 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 752 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 753 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 754 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 755 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 756 MatSeqAIJMKL_create_mkl_handle(A); 757 } 758 759 /* Call MKL sparse BLAS routine to do the MatMult. */ 760 if (zz == yy) { 761 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 762 * with alpha and beta both set to 1.0. */ 763 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 764 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 765 } else { 766 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 767 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 768 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 769 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 770 for (i=0; i<n; i++) { 771 z[i] += y[i]; 772 } 773 } 774 775 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 776 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 777 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 778 PetscFunctionReturn(0); 779 } 780 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 781 782 /* -------------------------- MatProduct code -------------------------- */ 783 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 784 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 785 { 786 Mat_SeqAIJMKL *a, *b; 787 sparse_matrix_t csrA, csrB, csrC; 788 PetscInt nrows,ncols; 789 PetscErrorCode ierr; 790 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 791 struct matrix_descr descr_type_gen; 792 PetscObjectState state; 793 794 PetscFunctionBegin; 795 /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does 796 * not handle sparse matrices with zero rows or columns. */ 797 if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N; 798 else nrows = A->cmap->N; 799 if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N; 800 else ncols = B->rmap->N; 801 802 a = (Mat_SeqAIJMKL*)A->spptr; 803 b = (Mat_SeqAIJMKL*)B->spptr; 804 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 805 if (!a->sparse_optimized || a->state != state) { 806 MatSeqAIJMKL_create_mkl_handle(A); 807 } 808 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 809 if (!b->sparse_optimized || b->state != state) { 810 MatSeqAIJMKL_create_mkl_handle(B); 811 } 812 csrA = a->csrA; 813 csrB = b->csrA; 814 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 815 816 if (csrA && csrB) { 817 stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC); 818 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply"); 819 } else { 820 csrC = PETSC_NULL; 821 } 822 823 ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);CHKERRQ(ierr); 824 825 PetscFunctionReturn(0); 826 } 827 828 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 829 { 830 Mat_SeqAIJMKL *a, *b, *c; 831 sparse_matrix_t csrA, csrB, csrC; 832 PetscErrorCode ierr; 833 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 834 struct matrix_descr descr_type_gen; 835 PetscObjectState state; 836 837 PetscFunctionBegin; 838 a = (Mat_SeqAIJMKL*)A->spptr; 839 b = (Mat_SeqAIJMKL*)B->spptr; 840 c = (Mat_SeqAIJMKL*)C->spptr; 841 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 842 if (!a->sparse_optimized || a->state != state) { 843 MatSeqAIJMKL_create_mkl_handle(A); 844 } 845 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 846 if (!b->sparse_optimized || b->state != state) { 847 MatSeqAIJMKL_create_mkl_handle(B); 848 } 849 csrA = a->csrA; 850 csrB = b->csrA; 851 csrC = c->csrA; 852 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 853 854 if (csrA && csrB) { 855 stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC); 856 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply"); 857 } else { 858 csrC = PETSC_NULL; 859 } 860 861 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 862 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 863 864 PetscFunctionReturn(0); 865 } 866 867 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 868 { 869 PetscErrorCode ierr; 870 871 PetscFunctionBegin; 872 ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 873 PetscFunctionReturn(0); 874 } 875 876 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 877 { 878 PetscErrorCode ierr; 879 880 PetscFunctionBegin; 881 ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 882 PetscFunctionReturn(0); 883 } 884 885 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 886 { 887 PetscErrorCode ierr; 888 889 PetscFunctionBegin; 890 ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 891 PetscFunctionReturn(0); 892 } 893 894 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 895 { 896 PetscErrorCode ierr; 897 898 PetscFunctionBegin; 899 ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 900 PetscFunctionReturn(0); 901 } 902 903 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 904 { 905 PetscErrorCode ierr; 906 907 PetscFunctionBegin; 908 ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr); 909 PetscFunctionReturn(0); 910 } 911 912 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 913 { 914 PetscErrorCode ierr; 915 916 PetscFunctionBegin; 917 ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr); 918 PetscFunctionReturn(0); 919 } 920 921 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 922 { 923 PetscErrorCode ierr; 924 Mat_Product *product = C->product; 925 Mat A = product->A,B = product->B; 926 927 PetscFunctionBegin; 928 ierr = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);CHKERRQ(ierr); 929 PetscFunctionReturn(0); 930 } 931 932 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 933 { 934 PetscErrorCode ierr; 935 Mat_Product *product = C->product; 936 Mat A = product->A,B = product->B; 937 PetscReal fill = product->fill; 938 939 PetscFunctionBegin; 940 ierr = MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);CHKERRQ(ierr); 941 942 C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL; 943 PetscFunctionReturn(0); 944 } 945 946 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat P,Mat C) 947 { 948 Mat Ct; 949 Vec zeros; 950 Mat_SeqAIJMKL *a, *p, *c; 951 sparse_matrix_t csrA, csrP, csrC; 952 PetscBool set, flag; 953 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 954 struct matrix_descr descr_type_sym; 955 PetscObjectState state; 956 PetscErrorCode ierr; 957 958 PetscFunctionBegin; 959 ierr = MatIsSymmetricKnown(A,&set,&flag); 960 if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL() called on matrix A not marked as symmetric"); 961 962 a = (Mat_SeqAIJMKL*)A->spptr; 963 p = (Mat_SeqAIJMKL*)P->spptr; 964 c = (Mat_SeqAIJMKL*)C->spptr; 965 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 966 if (!a->sparse_optimized || a->state != state) { 967 MatSeqAIJMKL_create_mkl_handle(A); 968 } 969 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 970 if (!p->sparse_optimized || p->state != state) { 971 MatSeqAIJMKL_create_mkl_handle(P); 972 } 973 csrA = a->csrA; 974 csrP = p->csrA; 975 csrC = c->csrA; 976 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 977 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 978 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 979 980 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 981 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT); 982 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr"); 983 984 /* Update the PETSc AIJ representation for matrix C from contents of MKL handle. 985 * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix, 986 * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix. 987 * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY 988 * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might 989 * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output 990 * the full matrix. */ 991 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 992 ierr = MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);CHKERRQ(ierr); 993 ierr = MatCreateVecs(C,&zeros,NULL);CHKERRQ(ierr); 994 ierr = VecSetFromOptions(zeros);CHKERRQ(ierr); 995 ierr = VecZeroEntries(zeros);CHKERRQ(ierr); 996 ierr = MatDiagonalSet(Ct,zeros,INSERT_VALUES);CHKERRQ(ierr); 997 ierr = MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 998 /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */ 999 ierr = MatProductCreateWithMat(A,P,PETSC_NULL,C);CHKERRQ(ierr); 1000 C->product->type = MATPRODUCT_PtAP; 1001 ierr = MatSeqAIJMKL_create_mkl_handle(C);CHKERRQ(ierr); 1002 ierr = VecDestroy(&zeros);CHKERRQ(ierr); 1003 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 1004 1005 PetscFunctionReturn(0); 1006 } 1007 1008 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) 1009 { 1010 Mat_Product *product = C->product; 1011 Mat A = product->A,P = product->B; 1012 Mat_SeqAIJMKL *a,*p; 1013 sparse_matrix_t csrA,csrP,csrC; 1014 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 1015 struct matrix_descr descr_type_sym; 1016 PetscObjectState state; 1017 PetscErrorCode ierr; 1018 1019 PetscFunctionBegin; 1020 a = (Mat_SeqAIJMKL*)A->spptr; 1021 p = (Mat_SeqAIJMKL*)P->spptr; 1022 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 1023 if (!a->sparse_optimized || a->state != state) { 1024 MatSeqAIJMKL_create_mkl_handle(A); 1025 } 1026 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 1027 if (!p->sparse_optimized || p->state != state) { 1028 MatSeqAIJMKL_create_mkl_handle(P); 1029 } 1030 csrA = a->csrA; 1031 csrP = p->csrA; 1032 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 1033 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 1034 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 1035 1036 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 1037 if (csrP && csrA) { 1038 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL); 1039 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr"); 1040 } else { 1041 csrC = PETSC_NULL; 1042 } 1043 1044 /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle. 1045 * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain 1046 * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL(). I believe that leaving things 1047 * in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this is 1048 * guaranteed. */ 1049 ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);CHKERRQ(ierr); 1050 1051 C->ops->productnumeric = MatProductNumeric_PtAP; 1052 PetscFunctionReturn(0); 1053 } 1054 1055 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) 1056 { 1057 PetscFunctionBegin; 1058 C->ops->productsymbolic = MatProductSymbolic_AB; 1059 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1060 PetscFunctionReturn(0); 1061 } 1062 1063 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) 1064 { 1065 PetscFunctionBegin; 1066 C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL; 1067 PetscFunctionReturn(0); 1068 } 1069 1070 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) 1071 { 1072 PetscFunctionBegin; 1073 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 1074 C->ops->productsymbolic = MatProductSymbolic_ABt; 1075 PetscFunctionReturn(0); 1076 } 1077 1078 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) 1079 { 1080 PetscErrorCode ierr; 1081 Mat_Product *product = C->product; 1082 Mat A = product->A; 1083 PetscBool set, flag; 1084 1085 PetscFunctionBegin; 1086 #if defined(PETSC_USE_COMPLEX) 1087 /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Basic() will be used. 1088 * We do this in several other locations in this file. This works for the time being, but the _Basic() 1089 * routines are considered unsafe and may be removed from the MatProduct code in the future. 1090 * TODO: Add proper MATSEQAIJMKL implementations, instead of relying on the _Basic() routines. */ 1091 C->ops->productsymbolic = NULL; 1092 #else 1093 /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */ 1094 ierr = MatIsSymmetricKnown(A,&set,&flag);CHKERRQ(ierr); 1095 if (set && flag) { 1096 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1097 PetscFunctionReturn(0); 1098 } else { 1099 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */ 1100 } 1101 /* Note that we don't set C->ops->productnumeric here, as this has must happen in MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL(), 1102 * depending on whether the algorithm for the general case vs. the real symmetric one is used. */ 1103 #endif 1104 PetscFunctionReturn(0); 1105 } 1106 1107 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) 1108 { 1109 PetscFunctionBegin; 1110 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */ 1111 PetscFunctionReturn(0); 1112 } 1113 1114 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) 1115 { 1116 PetscFunctionBegin; 1117 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */ 1118 PetscFunctionReturn(0); 1119 } 1120 1121 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) 1122 { 1123 PetscErrorCode ierr; 1124 Mat_Product *product = C->product; 1125 1126 PetscFunctionBegin; 1127 switch (product->type) { 1128 case MATPRODUCT_AB: 1129 ierr = MatProductSetFromOptions_SeqAIJMKL_AB(C);CHKERRQ(ierr); 1130 break; 1131 case MATPRODUCT_AtB: 1132 ierr = MatProductSetFromOptions_SeqAIJMKL_AtB(C);CHKERRQ(ierr); 1133 break; 1134 case MATPRODUCT_ABt: 1135 ierr = MatProductSetFromOptions_SeqAIJMKL_ABt(C);CHKERRQ(ierr); 1136 break; 1137 case MATPRODUCT_PtAP: 1138 ierr = MatProductSetFromOptions_SeqAIJMKL_PtAP(C);CHKERRQ(ierr); 1139 break; 1140 case MATPRODUCT_RARt: 1141 ierr = MatProductSetFromOptions_SeqAIJMKL_RARt(C);CHKERRQ(ierr); 1142 break; 1143 case MATPRODUCT_ABC: 1144 ierr = MatProductSetFromOptions_SeqAIJMKL_ABC(C);CHKERRQ(ierr); 1145 break; 1146 default: 1147 break; 1148 } 1149 PetscFunctionReturn(0); 1150 } 1151 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 1152 /* ------------------------ End MatProduct code ------------------------ */ 1153 1154 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 1155 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 1156 * routine, but can also be used to convert an assembled SeqAIJ matrix 1157 * into a SeqAIJMKL one. */ 1158 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 1159 { 1160 PetscErrorCode ierr; 1161 Mat B = *newmat; 1162 Mat_SeqAIJMKL *aijmkl; 1163 PetscBool set; 1164 PetscBool sametype; 1165 1166 PetscFunctionBegin; 1167 if (reuse == MAT_INITIAL_MATRIX) { 1168 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1169 } 1170 1171 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 1172 if (sametype) PetscFunctionReturn(0); 1173 1174 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 1175 B->spptr = (void*) aijmkl; 1176 1177 /* Set function pointers for methods that we inherit from AIJ but override. 1178 * We also parse some command line options below, since those determine some of the methods we point to. */ 1179 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 1180 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 1181 B->ops->destroy = MatDestroy_SeqAIJMKL; 1182 1183 aijmkl->sparse_optimized = PETSC_FALSE; 1184 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1185 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 1186 #else 1187 aijmkl->no_SpMV2 = PETSC_TRUE; 1188 #endif 1189 aijmkl->eager_inspection = PETSC_FALSE; 1190 1191 /* Parse command line options. */ 1192 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 1193 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 1194 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 1195 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1196 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1197 if(!aijmkl->no_SpMV2) { 1198 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 1199 aijmkl->no_SpMV2 = PETSC_TRUE; 1200 } 1201 #endif 1202 1203 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1204 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 1205 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 1206 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1207 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 1208 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1209 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL; 1210 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1211 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1212 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL; 1213 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1214 # if !defined(PETSC_USE_COMPLEX) 1215 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL; 1216 # else 1217 B->ops->ptapnumeric = NULL; 1218 # endif 1219 # endif 1220 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1221 1222 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1223 /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1224 * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1225 * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1226 * versions in which the older interface has not been deprecated, we use the old interface. */ 1227 if (aijmkl->no_SpMV2) { 1228 B->ops->mult = MatMult_SeqAIJMKL; 1229 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 1230 B->ops->multadd = MatMultAdd_SeqAIJMKL; 1231 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1232 } 1233 #endif 1234 1235 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 1236 1237 if(!aijmkl->no_SpMV2) { 1238 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1239 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1240 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",MatProductSetFromOptions_SeqAIJMKL);CHKERRQ(ierr); 1241 #endif 1242 #endif 1243 } 1244 1245 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 1246 *newmat = B; 1247 PetscFunctionReturn(0); 1248 } 1249 1250 /*@C 1251 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 1252 This type inherits from AIJ and is largely identical, but uses sparse BLAS 1253 routines from Intel MKL whenever possible. 1254 If the installed version of MKL supports the "SpMV2" sparse 1255 inspector-executor routines, then those are used by default. 1256 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 1257 symmetric A) operations are currently supported. 1258 Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 1259 1260 Collective 1261 1262 Input Parameters: 1263 + comm - MPI communicator, set to PETSC_COMM_SELF 1264 . m - number of rows 1265 . n - number of columns 1266 . nz - number of nonzeros per row (same for all rows) 1267 - nnz - array containing the number of nonzeros in the various rows 1268 (possibly different for each row) or NULL 1269 1270 Output Parameter: 1271 . A - the matrix 1272 1273 Options Database Keys: 1274 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1275 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 1276 1277 Notes: 1278 If nnz is given then nz is ignored 1279 1280 Level: intermediate 1281 1282 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 1283 @*/ 1284 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1285 { 1286 PetscErrorCode ierr; 1287 1288 PetscFunctionBegin; 1289 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1290 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1291 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 1292 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 1293 PetscFunctionReturn(0); 1294 } 1295 1296 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1297 { 1298 PetscErrorCode ierr; 1299 1300 PetscFunctionBegin; 1301 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1302 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305