1 2 /* 3 Defines basic operations for the MATSEQAIJMKL matrix class. 4 This class is derived from the MATSEQAIJ class and retains the 5 compressed row storage (aka Yale sparse matrix format) but uses 6 sparse BLAS operations from the Intel Math Kernel Library (MKL) 7 wherever possible. 8 */ 9 10 #include <../src/mat/impls/aij/seq/aij.h> 11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12 #include <mkl_spblas.h> 13 14 typedef struct { 15 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 16 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 PetscObjectState state; 19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 20 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 21 struct matrix_descr descr; 22 #endif 23 } Mat_SeqAIJMKL; 24 25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 28 { 29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 30 /* so we will ignore 'MatType type'. */ 31 PetscErrorCode ierr; 32 Mat B = *newmat; 33 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 34 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 35 #endif 36 37 PetscFunctionBegin; 38 if (reuse == MAT_INITIAL_MATRIX) { 39 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 45 B->ops->destroy = MatDestroy_SeqAIJ; 46 B->ops->mult = MatMult_SeqAIJ; 47 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 48 B->ops->multadd = MatMultAdd_SeqAIJ; 49 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ; 51 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 52 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 53 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 54 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; 55 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 56 57 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 58 59 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 60 if (!aijmkl->no_SpMV2) { 61 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 62 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr); 63 #endif 64 } 65 66 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 67 * simply involves destroying the MKL sparse matrix handle and then freeing 68 * the spptr pointer. */ 69 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr; 70 71 if (aijmkl->sparse_optimized) { 72 sparse_status_t stat; 73 stat = mkl_sparse_destroy(aijmkl->csrA); 74 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize"); 75 } 76 #endif 77 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 78 79 /* Change the type of B to MATSEQAIJ. */ 80 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 81 82 *newmat = B; 83 PetscFunctionReturn(0); 84 } 85 86 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 87 { 88 PetscErrorCode ierr; 89 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 90 91 PetscFunctionBegin; 92 93 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 94 * spptr pointer. */ 95 if (aijmkl) { 96 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 97 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 98 if (aijmkl->sparse_optimized) { 99 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 100 stat = mkl_sparse_destroy(aijmkl->csrA); 101 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 102 } 103 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 104 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 105 } 106 107 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 108 * to destroy everything that remains. */ 109 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 110 /* Note that I don't call MatSetType(). I believe this is because that 111 * is only to be called when *building* a matrix. I could be wrong, but 112 * that is how things work for the SuperLU matrix class. */ 113 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 114 PetscFunctionReturn(0); 115 } 116 117 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 118 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 119 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 120 * handle, creates a new one, and then calls mkl_sparse_optimize(). 121 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 122 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 123 * an unoptimized matrix handle here. */ 124 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 125 { 126 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 127 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 128 * does nothing. We make it callable anyway in this case because it cuts 129 * down on littering the code with #ifdefs. */ 130 PetscFunctionBegin; 131 PetscFunctionReturn(0); 132 #else 133 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 134 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 135 PetscInt m,n; 136 MatScalar *aa; 137 PetscInt *aj,*ai; 138 sparse_status_t stat; 139 PetscErrorCode ierr; 140 141 PetscFunctionBegin; 142 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 143 /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 144 * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 145 * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 146 * mkl_sparse_optimize() later. */ 147 if (aijmkl->no_SpMV2) PetscFunctionReturn(0); 148 #endif 149 150 if (aijmkl->sparse_optimized) { 151 /* Matrix has been previously assembled and optimized. Must destroy old 152 * matrix handle before running the optimization step again. */ 153 stat = mkl_sparse_destroy(aijmkl->csrA); 154 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy"); 155 } 156 aijmkl->sparse_optimized = PETSC_FALSE; 157 158 /* Now perform the SpMV2 setup and matrix optimization. */ 159 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 160 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 161 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 162 m = A->rmap->n; 163 n = A->cmap->n; 164 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 165 aa = a->a; /* Nonzero elements stored row-by-row. */ 166 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 167 if ((a->nz!=0) && aa && !(A->structure_only)) { 168 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 169 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 170 stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 171 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle"); 172 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 173 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 174 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 175 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 176 if (!aijmkl->no_SpMV2) { 177 stat = mkl_sparse_optimize(aijmkl->csrA); 178 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 179 } 180 aijmkl->sparse_optimized = PETSC_TRUE; 181 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 182 } else { 183 aijmkl->csrA = PETSC_NULL; 184 } 185 186 PetscFunctionReturn(0); 187 #endif 188 } 189 190 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 191 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */ 192 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A) 193 { 194 PetscErrorCode ierr; 195 sparse_status_t stat; 196 sparse_index_base_t indexing; 197 PetscInt m,n; 198 PetscInt *aj,*ai,*dummy; 199 MatScalar *aa; 200 Mat_SeqAIJMKL *aijmkl; 201 202 if (csrA) { 203 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 204 stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa); 205 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 206 if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()"); 207 } else { 208 aj = ai = PETSC_NULL; 209 aa = PETSC_NULL; 210 } 211 212 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 213 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr); 214 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 215 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 216 * they will be destroyed when the MKL handle is destroyed. 217 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 218 if (csrA) { 219 ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);CHKERRQ(ierr); 220 } else { 221 /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */ 222 ierr = MatSetUp(A);CHKERRQ(ierr); 223 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 224 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 225 } 226 227 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 228 * Now turn it into a MATSEQAIJMKL. */ 229 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 230 231 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 232 aijmkl->csrA = csrA; 233 234 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 235 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 236 * and just need to be able to run the MKL optimization step. */ 237 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 238 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 239 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 240 if (csrA) { 241 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 242 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint"); 243 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 244 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint"); 245 } 246 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 247 248 PetscFunctionReturn(0); 249 } 250 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 251 252 253 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 254 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 255 * MatMatMultNumeric(). */ 256 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 257 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 258 { 259 PetscInt i; 260 PetscInt nrows,ncols; 261 PetscInt nz; 262 PetscInt *ai,*aj,*dummy; 263 PetscScalar *aa; 264 PetscErrorCode ierr; 265 Mat_SeqAIJMKL *aijmkl; 266 sparse_status_t stat; 267 sparse_index_base_t indexing; 268 269 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 270 271 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 272 if (!aijmkl->csrA) PetscFunctionReturn(0); 273 274 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 275 stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa); 276 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()"); 277 278 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 279 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 280 for (i=0; i<nrows; i++) { 281 nz = ai[i+1] - ai[i]; 282 ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr); 283 } 284 285 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 286 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 287 288 ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr); 289 /* We mark our matrix as having a valid, optimized MKL handle. 290 * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */ 291 aijmkl->sparse_optimized = PETSC_TRUE; 292 293 PetscFunctionReturn(0); 294 } 295 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 296 297 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 298 { 299 PetscErrorCode ierr; 300 Mat_SeqAIJMKL *aijmkl; 301 Mat_SeqAIJMKL *aijmkl_dest; 302 303 PetscFunctionBegin; 304 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 305 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 306 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 307 ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr); 308 aijmkl_dest->sparse_optimized = PETSC_FALSE; 309 if (aijmkl->eager_inspection) { 310 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 311 } 312 PetscFunctionReturn(0); 313 } 314 315 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 316 { 317 PetscErrorCode ierr; 318 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 319 Mat_SeqAIJMKL *aijmkl; 320 321 PetscFunctionBegin; 322 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 323 324 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 325 * extra information and some different methods, call the AssemblyEnd 326 * routine for a MATSEQAIJ. 327 * I'm not sure if this is the best way to do this, but it avoids 328 * a lot of code duplication. */ 329 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 330 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 331 332 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 333 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 334 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 335 if (aijmkl->eager_inspection) { 336 ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr); 337 } 338 339 PetscFunctionReturn(0); 340 } 341 342 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 343 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 344 { 345 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 346 const PetscScalar *x; 347 PetscScalar *y; 348 const MatScalar *aa; 349 PetscErrorCode ierr; 350 PetscInt m = A->rmap->n; 351 PetscInt n = A->cmap->n; 352 PetscScalar alpha = 1.0; 353 PetscScalar beta = 0.0; 354 const PetscInt *aj,*ai; 355 char matdescra[6]; 356 357 358 /* Variables not in MatMult_SeqAIJ. */ 359 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 360 361 PetscFunctionBegin; 362 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 363 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 364 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 365 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 366 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 367 aa = a->a; /* Nonzero elements stored row-by-row. */ 368 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 369 370 /* Call MKL sparse BLAS routine to do the MatMult. */ 371 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 372 373 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 374 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 375 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 376 PetscFunctionReturn(0); 377 } 378 #endif 379 380 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 381 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 382 { 383 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 384 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 385 const PetscScalar *x; 386 PetscScalar *y; 387 PetscErrorCode ierr; 388 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 389 PetscObjectState state; 390 391 PetscFunctionBegin; 392 393 /* If there are no nonzero entries, zero yy and return immediately. */ 394 if(!a->nz) { 395 PetscInt i; 396 PetscInt m=A->rmap->n; 397 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 398 for (i=0; i<m; i++) { 399 y[i] = 0.0; 400 } 401 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 402 PetscFunctionReturn(0); 403 } 404 405 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 406 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 407 408 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 409 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 410 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 411 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 412 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 413 MatSeqAIJMKL_create_mkl_handle(A); 414 } 415 416 /* Call MKL SpMV2 executor routine to do the MatMult. */ 417 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 418 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 419 420 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 421 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 422 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 426 427 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 428 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 429 { 430 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 431 const PetscScalar *x; 432 PetscScalar *y; 433 const MatScalar *aa; 434 PetscErrorCode ierr; 435 PetscInt m = A->rmap->n; 436 PetscInt n = A->cmap->n; 437 PetscScalar alpha = 1.0; 438 PetscScalar beta = 0.0; 439 const PetscInt *aj,*ai; 440 char matdescra[6]; 441 442 /* Variables not in MatMultTranspose_SeqAIJ. */ 443 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 444 445 PetscFunctionBegin; 446 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 447 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 448 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 449 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 450 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 451 aa = a->a; /* Nonzero elements stored row-by-row. */ 452 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 453 454 /* Call MKL sparse BLAS routine to do the MatMult. */ 455 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 456 457 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 458 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 459 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 460 PetscFunctionReturn(0); 461 } 462 #endif 463 464 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 465 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 466 { 467 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 468 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 469 const PetscScalar *x; 470 PetscScalar *y; 471 PetscErrorCode ierr; 472 sparse_status_t stat; 473 PetscObjectState state; 474 475 PetscFunctionBegin; 476 477 /* If there are no nonzero entries, zero yy and return immediately. */ 478 if(!a->nz) { 479 PetscInt i; 480 PetscInt n=A->cmap->n; 481 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 482 for (i=0; i<n; i++) { 483 y[i] = 0.0; 484 } 485 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 486 PetscFunctionReturn(0); 487 } 488 489 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 490 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 491 492 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 493 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 494 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 495 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 496 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 497 MatSeqAIJMKL_create_mkl_handle(A); 498 } 499 500 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 501 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 502 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 503 504 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 505 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 506 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 510 511 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 512 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 513 { 514 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 515 const PetscScalar *x; 516 PetscScalar *y,*z; 517 const MatScalar *aa; 518 PetscErrorCode ierr; 519 PetscInt m = A->rmap->n; 520 PetscInt n = A->cmap->n; 521 const PetscInt *aj,*ai; 522 PetscInt i; 523 524 /* Variables not in MatMultAdd_SeqAIJ. */ 525 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 526 PetscScalar alpha = 1.0; 527 PetscScalar beta; 528 char matdescra[6]; 529 530 PetscFunctionBegin; 531 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 532 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 533 534 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 535 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 536 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 537 aa = a->a; /* Nonzero elements stored row-by-row. */ 538 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 539 540 /* Call MKL sparse BLAS routine to do the MatMult. */ 541 if (zz == yy) { 542 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 543 beta = 1.0; 544 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 545 } else { 546 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 547 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 548 beta = 0.0; 549 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 550 for (i=0; i<m; i++) { 551 z[i] += y[i]; 552 } 553 } 554 555 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 556 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 557 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 #endif 561 562 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 563 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 564 { 565 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 566 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 567 const PetscScalar *x; 568 PetscScalar *y,*z; 569 PetscErrorCode ierr; 570 PetscInt m = A->rmap->n; 571 PetscInt i; 572 573 /* Variables not in MatMultAdd_SeqAIJ. */ 574 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 575 PetscObjectState state; 576 577 PetscFunctionBegin; 578 579 /* If there are no nonzero entries, set zz = yy and return immediately. */ 580 if(!a->nz) { 581 PetscInt i; 582 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 583 for (i=0; i<m; i++) { 584 z[i] = y[i]; 585 } 586 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 587 PetscFunctionReturn(0); 588 } 589 590 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 591 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 592 593 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 594 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 595 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 596 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 597 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 598 MatSeqAIJMKL_create_mkl_handle(A); 599 } 600 601 /* Call MKL sparse BLAS routine to do the MatMult. */ 602 if (zz == yy) { 603 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 604 * with alpha and beta both set to 1.0. */ 605 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 606 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 607 } else { 608 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 609 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 610 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 611 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 612 for (i=0; i<m; i++) { 613 z[i] += y[i]; 614 } 615 } 616 617 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 618 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 619 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 620 PetscFunctionReturn(0); 621 } 622 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 623 624 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 625 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 626 { 627 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 628 const PetscScalar *x; 629 PetscScalar *y,*z; 630 const MatScalar *aa; 631 PetscErrorCode ierr; 632 PetscInt m = A->rmap->n; 633 PetscInt n = A->cmap->n; 634 const PetscInt *aj,*ai; 635 PetscInt i; 636 637 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 638 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 639 PetscScalar alpha = 1.0; 640 PetscScalar beta; 641 char matdescra[6]; 642 643 PetscFunctionBegin; 644 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 645 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 646 647 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 648 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 649 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 650 aa = a->a; /* Nonzero elements stored row-by-row. */ 651 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 652 653 /* Call MKL sparse BLAS routine to do the MatMult. */ 654 if (zz == yy) { 655 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 656 beta = 1.0; 657 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 658 } else { 659 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 660 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 661 beta = 0.0; 662 mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z); 663 for (i=0; i<n; i++) { 664 z[i] += y[i]; 665 } 666 } 667 668 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 669 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 670 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 671 PetscFunctionReturn(0); 672 } 673 #endif 674 675 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 676 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 677 { 678 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 679 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr; 680 const PetscScalar *x; 681 PetscScalar *y,*z; 682 PetscErrorCode ierr; 683 PetscInt n = A->cmap->n; 684 PetscInt i; 685 PetscObjectState state; 686 687 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 688 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 689 690 PetscFunctionBegin; 691 692 /* If there are no nonzero entries, set zz = yy and return immediately. */ 693 if(!a->nz) { 694 PetscInt i; 695 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 696 for (i=0; i<n; i++) { 697 z[i] = y[i]; 698 } 699 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 704 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 705 706 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 707 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 708 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 709 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 710 if (!aijmkl->sparse_optimized || aijmkl->state != state) { 711 MatSeqAIJMKL_create_mkl_handle(A); 712 } 713 714 /* Call MKL sparse BLAS routine to do the MatMult. */ 715 if (zz == yy) { 716 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 717 * with alpha and beta both set to 1.0. */ 718 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z); 719 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 720 } else { 721 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 722 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 723 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z); 724 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv"); 725 for (i=0; i<n; i++) { 726 z[i] += y[i]; 727 } 728 } 729 730 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 731 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 732 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 733 PetscFunctionReturn(0); 734 } 735 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 736 737 /* -------------------------- MatProduct code -------------------------- */ 738 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 739 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 740 { 741 Mat_SeqAIJMKL *a, *b; 742 sparse_matrix_t csrA, csrB, csrC; 743 PetscInt nrows,ncols; 744 PetscErrorCode ierr; 745 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 746 struct matrix_descr descr_type_gen; 747 PetscObjectState state; 748 749 PetscFunctionBegin; 750 /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does 751 * not handle sparse matrices with zero rows or columns. */ 752 if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N; 753 else nrows = A->cmap->N; 754 if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N; 755 else ncols = B->rmap->N; 756 757 a = (Mat_SeqAIJMKL*)A->spptr; 758 b = (Mat_SeqAIJMKL*)B->spptr; 759 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 760 if (!a->sparse_optimized || a->state != state) { 761 MatSeqAIJMKL_create_mkl_handle(A); 762 } 763 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 764 if (!b->sparse_optimized || b->state != state) { 765 MatSeqAIJMKL_create_mkl_handle(B); 766 } 767 csrA = a->csrA; 768 csrB = b->csrA; 769 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 770 771 if (csrA && csrB) { 772 stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC); 773 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply"); 774 } else { 775 csrC = PETSC_NULL; 776 } 777 778 ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);CHKERRQ(ierr); 779 780 PetscFunctionReturn(0); 781 } 782 783 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C) 784 { 785 Mat_SeqAIJMKL *a, *b, *c; 786 sparse_matrix_t csrA, csrB, csrC; 787 PetscErrorCode ierr; 788 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 789 struct matrix_descr descr_type_gen; 790 PetscObjectState state; 791 792 PetscFunctionBegin; 793 a = (Mat_SeqAIJMKL*)A->spptr; 794 b = (Mat_SeqAIJMKL*)B->spptr; 795 c = (Mat_SeqAIJMKL*)C->spptr; 796 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 797 if (!a->sparse_optimized || a->state != state) { 798 MatSeqAIJMKL_create_mkl_handle(A); 799 } 800 ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr); 801 if (!b->sparse_optimized || b->state != state) { 802 MatSeqAIJMKL_create_mkl_handle(B); 803 } 804 csrA = a->csrA; 805 csrB = b->csrA; 806 csrC = c->csrA; 807 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 808 809 if (csrA && csrB) { 810 stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC); 811 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply"); 812 } else { 813 csrC = PETSC_NULL; 814 } 815 816 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 817 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 818 819 PetscFunctionReturn(0); 820 } 821 822 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 823 { 824 PetscErrorCode ierr; 825 826 PetscFunctionBegin; 827 ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 828 PetscFunctionReturn(0); 829 } 830 831 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 832 { 833 PetscErrorCode ierr; 834 835 PetscFunctionBegin; 836 ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 837 PetscFunctionReturn(0); 838 } 839 840 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 841 { 842 PetscErrorCode ierr; 843 844 PetscFunctionBegin; 845 ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 846 PetscFunctionReturn(0); 847 } 848 849 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 850 { 851 PetscErrorCode ierr; 852 853 PetscFunctionBegin; 854 ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr); 855 PetscFunctionReturn(0); 856 } 857 858 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C) 859 { 860 PetscErrorCode ierr; 861 862 PetscFunctionBegin; 863 ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr); 864 PetscFunctionReturn(0); 865 } 866 867 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C) 868 { 869 PetscErrorCode ierr; 870 871 PetscFunctionBegin; 872 ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr); 873 PetscFunctionReturn(0); 874 } 875 876 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 877 { 878 PetscErrorCode ierr; 879 Mat_Product *product = C->product; 880 Mat A = product->A,B = product->B; 881 882 PetscFunctionBegin; 883 ierr = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);CHKERRQ(ierr); 884 PetscFunctionReturn(0); 885 } 886 887 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 888 { 889 PetscErrorCode ierr; 890 Mat_Product *product = C->product; 891 Mat A = product->A,B = product->B; 892 PetscReal fill = product->fill; 893 894 PetscFunctionBegin; 895 ierr = MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);CHKERRQ(ierr); 896 897 C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL; 898 PetscFunctionReturn(0); 899 } 900 901 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat P,Mat C) 902 { 903 Mat Ct; 904 Vec zeros; 905 Mat_SeqAIJMKL *a, *p, *c; 906 sparse_matrix_t csrA, csrP, csrC; 907 PetscBool set, flag; 908 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 909 struct matrix_descr descr_type_sym; 910 PetscObjectState state; 911 PetscErrorCode ierr; 912 913 PetscFunctionBegin; 914 ierr = MatIsSymmetricKnown(A,&set,&flag); 915 if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL() called on matrix A not marked as symmetric"); 916 917 a = (Mat_SeqAIJMKL*)A->spptr; 918 p = (Mat_SeqAIJMKL*)P->spptr; 919 c = (Mat_SeqAIJMKL*)C->spptr; 920 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 921 if (!a->sparse_optimized || a->state != state) { 922 MatSeqAIJMKL_create_mkl_handle(A); 923 } 924 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 925 if (!p->sparse_optimized || p->state != state) { 926 MatSeqAIJMKL_create_mkl_handle(P); 927 } 928 csrA = a->csrA; 929 csrP = p->csrA; 930 csrC = c->csrA; 931 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 932 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 933 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 934 935 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 936 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT); 937 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr"); 938 939 /* Update the PETSc AIJ representation for matrix C from contents of MKL handle. 940 * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix, 941 * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix. 942 * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY 943 * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might 944 * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output 945 * the full matrix. */ 946 ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr); 947 ierr = MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);CHKERRQ(ierr); 948 ierr = MatCreateVecs(C,&zeros,NULL);CHKERRQ(ierr); 949 ierr = VecSetFromOptions(zeros);CHKERRQ(ierr); 950 ierr = VecZeroEntries(zeros);CHKERRQ(ierr); 951 ierr = MatDiagonalSet(Ct,zeros,INSERT_VALUES);CHKERRQ(ierr); 952 ierr = MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 953 /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */ 954 ierr = MatProductCreateWithMat(A,P,PETSC_NULL,C);CHKERRQ(ierr); 955 C->product->type = MATPRODUCT_PtAP; 956 ierr = MatSeqAIJMKL_create_mkl_handle(C);CHKERRQ(ierr); 957 ierr = VecDestroy(&zeros);CHKERRQ(ierr); 958 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 959 960 PetscFunctionReturn(0); 961 } 962 963 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) 964 { 965 Mat_Product *product = C->product; 966 Mat A = product->A,P = product->B; 967 Mat_SeqAIJMKL *a,*p; 968 sparse_matrix_t csrA,csrP,csrC; 969 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 970 struct matrix_descr descr_type_sym; 971 PetscObjectState state; 972 PetscErrorCode ierr; 973 974 PetscFunctionBegin; 975 a = (Mat_SeqAIJMKL*)A->spptr; 976 p = (Mat_SeqAIJMKL*)P->spptr; 977 ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr); 978 if (!a->sparse_optimized || a->state != state) { 979 MatSeqAIJMKL_create_mkl_handle(A); 980 } 981 ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr); 982 if (!p->sparse_optimized || p->state != state) { 983 MatSeqAIJMKL_create_mkl_handle(P); 984 } 985 csrA = a->csrA; 986 csrP = p->csrA; 987 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 988 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 989 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 990 991 /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 992 if (csrP && csrA) { 993 stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL); 994 if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr"); 995 } else { 996 csrC = PETSC_NULL; 997 } 998 999 /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle. 1000 * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain 1001 * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL(). I believe that leaving things 1002 * in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this is 1003 * guaranteed. */ 1004 ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);CHKERRQ(ierr); 1005 1006 C->ops->productnumeric = MatProductNumeric_PtAP; 1007 PetscFunctionReturn(0); 1008 } 1009 1010 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) 1011 { 1012 PetscFunctionBegin; 1013 C->ops->productsymbolic = MatProductSymbolic_AB; 1014 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1015 PetscFunctionReturn(0); 1016 } 1017 1018 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) 1019 { 1020 PetscFunctionBegin; 1021 C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL; 1022 PetscFunctionReturn(0); 1023 } 1024 1025 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) 1026 { 1027 PetscFunctionBegin; 1028 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 1029 C->ops->productsymbolic = MatProductSymbolic_ABt; 1030 PetscFunctionReturn(0); 1031 } 1032 1033 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) 1034 { 1035 PetscErrorCode ierr; 1036 Mat_Product *product = C->product; 1037 Mat A = product->A; 1038 PetscBool set, flag; 1039 1040 PetscFunctionBegin; 1041 #if defined(PETSC_USE_COMPLEX) 1042 /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Basic() will be used. 1043 * We do this in several other locations in this file. This works for the time being, but the _Basic() 1044 * routines are considered unsafe and may be removed from the MatProduct code in the future. 1045 * TODO: Add proper MATSEQAIJMKL implementations, instead of relying on the _Basic() routines. */ 1046 C->ops->productsymbolic = NULL; 1047 #else 1048 /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */ 1049 ierr = MatIsSymmetricKnown(A,&set,&flag);CHKERRQ(ierr); 1050 if (set && flag) { 1051 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1052 PetscFunctionReturn(0); 1053 } else { 1054 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */ 1055 } 1056 /* Note that we don't set C->ops->productnumeric here, as this has must happen in MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL(), 1057 * depending on whether the algorithm for the general case vs. the real symmetric one is used. */ 1058 #endif 1059 PetscFunctionReturn(0); 1060 } 1061 1062 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) 1063 { 1064 PetscFunctionBegin; 1065 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */ 1066 PetscFunctionReturn(0); 1067 } 1068 1069 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) 1070 { 1071 PetscFunctionBegin; 1072 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */ 1073 PetscFunctionReturn(0); 1074 } 1075 1076 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) 1077 { 1078 PetscErrorCode ierr; 1079 Mat_Product *product = C->product; 1080 1081 PetscFunctionBegin; 1082 switch (product->type) { 1083 case MATPRODUCT_AB: 1084 ierr = MatProductSetFromOptions_SeqAIJMKL_AB(C);CHKERRQ(ierr); 1085 break; 1086 case MATPRODUCT_AtB: 1087 ierr = MatProductSetFromOptions_SeqAIJMKL_AtB(C);CHKERRQ(ierr); 1088 break; 1089 case MATPRODUCT_ABt: 1090 ierr = MatProductSetFromOptions_SeqAIJMKL_ABt(C);CHKERRQ(ierr); 1091 break; 1092 case MATPRODUCT_PtAP: 1093 ierr = MatProductSetFromOptions_SeqAIJMKL_PtAP(C);CHKERRQ(ierr); 1094 break; 1095 case MATPRODUCT_RARt: 1096 ierr = MatProductSetFromOptions_SeqAIJMKL_RARt(C);CHKERRQ(ierr); 1097 break; 1098 case MATPRODUCT_ABC: 1099 ierr = MatProductSetFromOptions_SeqAIJMKL_ABC(C);CHKERRQ(ierr); 1100 break; 1101 default: 1102 break; 1103 } 1104 PetscFunctionReturn(0); 1105 } 1106 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 1107 /* ------------------------ End MatProduct code ------------------------ */ 1108 1109 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 1110 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 1111 * routine, but can also be used to convert an assembled SeqAIJ matrix 1112 * into a SeqAIJMKL one. */ 1113 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 1114 { 1115 PetscErrorCode ierr; 1116 Mat B = *newmat; 1117 Mat_SeqAIJMKL *aijmkl; 1118 PetscBool set; 1119 PetscBool sametype; 1120 1121 PetscFunctionBegin; 1122 if (reuse == MAT_INITIAL_MATRIX) { 1123 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1124 } 1125 1126 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 1127 if (sametype) PetscFunctionReturn(0); 1128 1129 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 1130 B->spptr = (void*) aijmkl; 1131 1132 /* Set function pointers for methods that we inherit from AIJ but override. 1133 * We also parse some command line options below, since those determine some of the methods we point to. */ 1134 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 1135 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 1136 B->ops->destroy = MatDestroy_SeqAIJMKL; 1137 1138 aijmkl->sparse_optimized = PETSC_FALSE; 1139 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1140 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 1141 #else 1142 aijmkl->no_SpMV2 = PETSC_TRUE; 1143 #endif 1144 aijmkl->eager_inspection = PETSC_FALSE; 1145 1146 /* Parse command line options. */ 1147 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 1148 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 1149 ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr); 1150 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1151 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1152 if(!aijmkl->no_SpMV2) { 1153 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 1154 aijmkl->no_SpMV2 = PETSC_TRUE; 1155 } 1156 #endif 1157 1158 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1159 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 1160 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 1161 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1162 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 1163 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1164 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL; 1165 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1166 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1167 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL; 1168 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1169 # if !defined(PETSC_USE_COMPLEX) 1170 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL; 1171 # else 1172 B->ops->ptapnumeric = NULL; 1173 # endif 1174 # endif 1175 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1176 1177 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1178 /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1179 * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1180 * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1181 * versions in which the older interface has not been deprecated, we use the old interface. */ 1182 if (aijmkl->no_SpMV2) { 1183 B->ops->mult = MatMult_SeqAIJMKL; 1184 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 1185 B->ops->multadd = MatMultAdd_SeqAIJMKL; 1186 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1187 } 1188 #endif 1189 1190 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 1191 1192 if(!aijmkl->no_SpMV2) { 1193 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1194 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1195 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",MatProductSetFromOptions_SeqAIJMKL);CHKERRQ(ierr); 1196 #endif 1197 #endif 1198 } 1199 1200 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 1201 *newmat = B; 1202 PetscFunctionReturn(0); 1203 } 1204 1205 /*@C 1206 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 1207 This type inherits from AIJ and is largely identical, but uses sparse BLAS 1208 routines from Intel MKL whenever possible. 1209 If the installed version of MKL supports the "SpMV2" sparse 1210 inspector-executor routines, then those are used by default. 1211 MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for 1212 symmetric A) operations are currently supported. 1213 Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric. 1214 1215 Collective 1216 1217 Input Parameters: 1218 + comm - MPI communicator, set to PETSC_COMM_SELF 1219 . m - number of rows 1220 . n - number of columns 1221 . nz - number of nonzeros per row (same for all rows) 1222 - nnz - array containing the number of nonzeros in the various rows 1223 (possibly different for each row) or NULL 1224 1225 Output Parameter: 1226 . A - the matrix 1227 1228 Options Database Keys: 1229 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1230 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied 1231 1232 Notes: 1233 If nnz is given then nz is ignored 1234 1235 Level: intermediate 1236 1237 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 1238 @*/ 1239 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1240 { 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1245 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1246 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 1247 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 1248 PetscFunctionReturn(0); 1249 } 1250 1251 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1252 { 1253 PetscErrorCode ierr; 1254 1255 PetscFunctionBegin; 1256 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1257 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 1258 PetscFunctionReturn(0); 1259 } 1260