1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 19 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 20 struct matrix_descr descr; 21 #endif 22 } Mat_SeqAIJMKL; 23 24 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatConvert_SeqAIJMKL_SeqAIJ" 28 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 29 { 30 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 31 /* so we will ignore 'MatType type'. */ 32 PetscErrorCode ierr; 33 Mat B = *newmat; 34 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 35 36 PetscFunctionBegin; 37 if (reuse == MAT_INITIAL_MATRIX) { 38 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 39 aijmkl = (Mat_SeqAIJMKL*)B->spptr; 40 } 41 42 /* Reset the original function pointers. */ 43 B->ops->duplicate = MatDuplicate_SeqAIJ; 44 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 45 B->ops->destroy = MatDestroy_SeqAIJ; 46 B->ops->mult = MatMult_SeqAIJ; 47 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 48 B->ops->multadd = MatMultAdd_SeqAIJ; 49 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 50 51 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 52 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 53 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 54 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 55 56 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 57 * simply involves destroying the MKL sparse matrix handle and then freeing 58 * the spptr pointer. */ 59 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 60 if (aijmkl->sparse_optimized) { 61 sparse_status_t stat; 62 stat = mkl_sparse_destroy(aijmkl->csrA); 63 if (stat != SPARSE_STATUS_SUCCESS) { 64 PetscFunctionReturn(PETSC_ERR_LIB); 65 } 66 } 67 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 68 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 69 70 /* Change the type of B to MATSEQAIJ. */ 71 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 72 73 *newmat = B; 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNCT__ 78 #define __FUNCT__ "MatDestroy_SeqAIJMKL" 79 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 80 { 81 PetscErrorCode ierr; 82 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 83 84 PetscFunctionBegin; 85 86 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 87 * spptr pointer. */ 88 if (aijmkl) { 89 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 90 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 91 if (aijmkl->sparse_optimized) { 92 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 93 stat = mkl_sparse_destroy(aijmkl->csrA); 94 if (stat != SPARSE_STATUS_SUCCESS) { 95 PetscFunctionReturn(PETSC_ERR_LIB); 96 } 97 } 98 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 99 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 100 } 101 102 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 103 * to destroy everything that remains. */ 104 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 105 /* Note that I don't call MatSetType(). I believe this is because that 106 * is only to be called when *building* a matrix. I could be wrong, but 107 * that is how things work for the SuperLU matrix class. */ 108 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 109 PetscFunctionReturn(0); 110 } 111 112 #undef __FUNCT__ 113 #define __FUNCT__ "MatDuplicate_SeqAIJMKL" 114 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 115 { 116 PetscErrorCode ierr; 117 Mat_SeqAIJMKL *aijmkl; 118 Mat_SeqAIJMKL *aijmkl_dest; 119 120 PetscFunctionBegin; 121 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 122 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 123 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 124 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 125 aijmkl_dest->sparse_optimized = PETSC_FALSE; 126 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 127 aijmkl_dest->csrA = NULL; 128 if (!aijmkl->no_SpMV2) { 129 sparse_status_t stat; 130 stat = mkl_sparse_copy(aijmkl->csrA,aijmkl->descr,&aijmkl_dest->csrA); 131 if (stat != SPARSE_STATUS_SUCCESS) { 132 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_copy"); 133 PetscFunctionReturn(PETSC_ERR_LIB); 134 } 135 stat = mkl_sparse_optimize(aijmkl_dest->csrA); 136 if (stat != SPARSE_STATUS_SUCCESS) { 137 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 138 PetscFunctionReturn(PETSC_ERR_LIB); 139 } 140 aijmkl_dest->sparse_optimized = PETSC_TRUE; 141 } 142 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 143 PetscFunctionReturn(0); 144 } 145 146 #undef __FUNCT__ 147 #define __FUNCT__ "MatAssemblyEnd_SeqAIJMKL" 148 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 149 { 150 PetscErrorCode ierr; 151 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 152 Mat_SeqAIJMKL *aijmkl; 153 154 MatScalar *aa; 155 PetscInt m,n; 156 PetscInt *aj,*ai; 157 158 PetscFunctionBegin; 159 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 160 161 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 162 * extra information and some different methods, call the AssemblyEnd 163 * routine for a MATSEQAIJ. 164 * I'm not sure if this is the best way to do this, but it avoids 165 * a lot of code duplication. 166 * I also note that currently MATSEQAIJMKL doesn't know anything about 167 * the Mat_CompressedRow data structure that SeqAIJ now uses when there 168 * are many zero rows. If the SeqAIJ assembly end routine decides to use 169 * this, this may break things. (Don't know... haven't looked at it. 170 * Do I need to disable this somehow?) */ 171 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 172 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 173 174 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 175 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 176 if (!aijmkl->no_SpMV2) { 177 sparse_status_t stat; 178 if (aijmkl->sparse_optimized) { 179 /* Matrix has been previously assembled and optimized. Must destroy old 180 * matrix handle before running the optimization step again. */ 181 sparse_status_t stat; 182 stat = mkl_sparse_destroy(aijmkl->csrA); 183 if (stat != SPARSE_STATUS_SUCCESS) { 184 PetscFunctionReturn(PETSC_ERR_LIB); 185 } 186 } 187 /* Now perform the SpMV2 setup and matrix optimization. */ 188 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 189 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 190 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 191 m = A->rmap->n; 192 n = A->cmap->n; 193 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 194 aa = a->a; /* Nonzero elements stored row-by-row. */ 195 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 196 stat = mkl_sparse_x_create_csr (&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 197 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 198 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 199 stat = mkl_sparse_optimize(aijmkl->csrA); 200 if (stat != SPARSE_STATUS_SUCCESS) { 201 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 202 PetscFunctionReturn(PETSC_ERR_LIB); 203 } 204 aijmkl->sparse_optimized = PETSC_TRUE; 205 } 206 #endif 207 208 PetscFunctionReturn(0); 209 } 210 211 #undef __FUNCT__ 212 #define __FUNCT__ "MatMult_SeqAIJMKL" 213 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 214 { 215 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 216 const PetscScalar *x; 217 PetscScalar *y; 218 const MatScalar *aa; 219 PetscErrorCode ierr; 220 PetscInt m=A->rmap->n; 221 const PetscInt *aj,*ai; 222 223 /* Variables not in MatMult_SeqAIJ. */ 224 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 225 226 PetscFunctionBegin; 227 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 228 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 229 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 230 aa = a->a; /* Nonzero elements stored row-by-row. */ 231 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 232 233 /* Call MKL sparse BLAS routine to do the MatMult. */ 234 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 235 236 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 237 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 238 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 239 PetscFunctionReturn(0); 240 } 241 242 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatMult_SeqAIJMKL_SpMV2" 245 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 246 { 247 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 248 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 249 const PetscScalar *x; 250 PetscScalar *y; 251 PetscErrorCode ierr; 252 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 253 254 PetscFunctionBegin; 255 256 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 257 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 258 259 /* Call MKL SpMV2 executor routine to do the MatMult. */ 260 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 261 262 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 263 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 264 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 265 if (stat != SPARSE_STATUS_SUCCESS) { 266 PetscFunctionReturn(PETSC_ERR_LIB); 267 } 268 PetscFunctionReturn(0); 269 } 270 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL" 274 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 275 { 276 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 277 const PetscScalar *x; 278 PetscScalar *y; 279 const MatScalar *aa; 280 PetscErrorCode ierr; 281 PetscInt m=A->rmap->n; 282 const PetscInt *aj,*ai; 283 284 /* Variables not in MatMultTranspose_SeqAIJ. */ 285 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 286 287 PetscFunctionBegin; 288 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 289 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 290 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 291 aa = a->a; /* Nonzero elements stored row-by-row. */ 292 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 293 294 /* Call MKL sparse BLAS routine to do the MatMult. */ 295 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 296 297 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 298 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 299 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 304 #undef __FUNCT__ 305 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL_SpMV2" 306 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 307 { 308 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 309 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 310 const PetscScalar *x; 311 PetscScalar *y; 312 PetscErrorCode ierr; 313 sparse_status_t stat; 314 315 PetscFunctionBegin; 316 317 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 318 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 319 320 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 321 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 322 323 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 324 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 325 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 326 if (stat != SPARSE_STATUS_SUCCESS) { 327 PetscFunctionReturn(PETSC_ERR_LIB); 328 } 329 PetscFunctionReturn(0); 330 } 331 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 332 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatMultAdd_SeqAIJMKL" 335 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 336 { 337 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 338 const PetscScalar *x; 339 PetscScalar *y,*z; 340 const MatScalar *aa; 341 PetscErrorCode ierr; 342 PetscInt m=A->rmap->n; 343 const PetscInt *aj,*ai; 344 PetscInt i; 345 346 /* Variables not in MatMultAdd_SeqAIJ. */ 347 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 348 PetscScalar alpha = 1.0; 349 PetscScalar beta = 1.0; 350 char matdescra[6]; 351 352 PetscFunctionBegin; 353 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 354 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 355 356 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 357 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 358 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 359 aa = a->a; /* Nonzero elements stored row-by-row. */ 360 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 361 362 /* Call MKL sparse BLAS routine to do the MatMult. */ 363 if (zz == yy) { 364 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 365 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 366 } else { 367 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 368 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 369 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 370 for (i=0; i<m; i++) { 371 z[i] += y[i]; 372 } 373 } 374 375 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 376 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 377 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 378 PetscFunctionReturn(0); 379 } 380 381 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 382 #undef __FUNCT__ 383 #define __FUNCT__ "MatMultAdd_SeqAIJMKL_SpMV2" 384 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 385 { 386 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 387 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 388 const PetscScalar *x; 389 PetscScalar *y,*z; 390 PetscErrorCode ierr; 391 PetscInt m=A->rmap->n; 392 PetscInt i; 393 394 /* Variables not in MatMultAdd_SeqAIJ. */ 395 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 396 397 PetscFunctionBegin; 398 399 400 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 401 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 402 403 /* Call MKL sparse BLAS routine to do the MatMult. */ 404 if (zz == yy) { 405 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 406 * with alpha and beta both set to 1.0. */ 407 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y); 408 } else { 409 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 410 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 411 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 412 for (i=0; i<m; i++) { 413 z[i] += y[i]; 414 } 415 } 416 417 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 418 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 419 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 420 if (stat != SPARSE_STATUS_SUCCESS) { 421 PetscFunctionReturn(PETSC_ERR_LIB); 422 } 423 PetscFunctionReturn(0); 424 } 425 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 426 427 #undef __FUNCT__ 428 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL" 429 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 430 { 431 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 432 const PetscScalar *x; 433 PetscScalar *y,*z; 434 const MatScalar *aa; 435 PetscErrorCode ierr; 436 PetscInt m=A->rmap->n; 437 const PetscInt *aj,*ai; 438 PetscInt i; 439 440 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 441 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 442 PetscScalar alpha = 1.0; 443 PetscScalar beta = 1.0; 444 char matdescra[6]; 445 446 PetscFunctionBegin; 447 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 448 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 449 450 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 451 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 452 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 453 aa = a->a; /* Nonzero elements stored row-by-row. */ 454 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 455 456 /* Call MKL sparse BLAS routine to do the MatMult. */ 457 if (zz == yy) { 458 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 459 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 460 } else { 461 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 462 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 463 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 464 for (i=0; i<m; i++) { 465 z[i] += y[i]; 466 } 467 } 468 469 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 470 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 471 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 472 PetscFunctionReturn(0); 473 } 474 475 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL_SpMV2" 478 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 479 { 480 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 481 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 482 const PetscScalar *x; 483 PetscScalar *y,*z; 484 PetscErrorCode ierr; 485 PetscInt m=A->rmap->n; 486 PetscInt i; 487 488 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 489 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 490 491 PetscFunctionBegin; 492 493 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 494 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 495 496 /* Call MKL sparse BLAS routine to do the MatMult. */ 497 if (zz == yy) { 498 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 499 * with alpha and beta both set to 1.0. */ 500 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y); 501 } else { 502 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 503 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 504 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 505 for (i=0; i<m; i++) { 506 z[i] += y[i]; 507 } 508 } 509 510 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 511 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 512 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 513 if (stat != SPARSE_STATUS_SUCCESS) { 514 PetscFunctionReturn(PETSC_ERR_LIB); 515 } 516 PetscFunctionReturn(0); 517 } 518 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 519 520 521 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 522 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 523 * routine, but can also be used to convert an assembled SeqAIJ matrix 524 * into a SeqAIJMKL one. */ 525 #undef __FUNCT__ 526 #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJMKL" 527 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 528 { 529 PetscErrorCode ierr; 530 Mat B = *newmat; 531 Mat_SeqAIJMKL *aijmkl; 532 PetscBool set; 533 PetscBool sametype; 534 535 PetscFunctionBegin; 536 if (reuse == MAT_INITIAL_MATRIX) { 537 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 538 } 539 540 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 541 if (sametype) PetscFunctionReturn(0); 542 543 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 544 B->spptr = (void*) aijmkl; 545 546 /* Set function pointers for methods that we inherit from AIJ but override. 547 * We also parse some command line options below, since those determine some of the methods we point to. 548 * Note: Currently the transposed operations are not being set because I encounter memory corruption 549 * when these are enabled. Need to look at this with Valgrind or similar. --RTM */ 550 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 551 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 552 B->ops->destroy = MatDestroy_SeqAIJMKL; 553 554 aijmkl->sparse_optimized = PETSC_FALSE; 555 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 556 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 557 #elif 558 aijmkl->no_SpMV2 = PETSC_TRUE; 559 #endif 560 561 /* Parse command line options. */ 562 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 563 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 564 ierr = PetscOptionsEnd();CHKERRQ(ierr); 565 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 566 if(!aijmkl->no_SpMV2) { 567 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 568 aijmkl->no_SpMV2 = PETSC_TRUE; 569 } 570 #endif 571 572 if(!aijmkl->no_SpMV2) { 573 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 574 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 575 /* B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; */ 576 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 577 /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; */ 578 #endif 579 } else { 580 B->ops->mult = MatMult_SeqAIJMKL; 581 /* B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; */ 582 B->ops->multadd = MatMultAdd_SeqAIJMKL; 583 /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; */ 584 } 585 586 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 587 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 590 591 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 592 *newmat = B; 593 PetscFunctionReturn(0); 594 } 595 596 #undef __FUNCT__ 597 #define __FUNCT__ "MatCreateSeqAIJMKL" 598 /*@C 599 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 600 This type inherits from AIJ and is largely identical, but uses sparse BLAS 601 routines from Intel MKL whenever possible. 602 Collective on MPI_Comm 603 604 Input Parameters: 605 + comm - MPI communicator, set to PETSC_COMM_SELF 606 . m - number of rows 607 . n - number of columns 608 . nz - number of nonzeros per row (same for all rows) 609 - nnz - array containing the number of nonzeros in the various rows 610 (possibly different for each row) or NULL 611 612 Output Parameter: 613 . A - the matrix 614 615 Notes: 616 If nnz is given then nz is ignored 617 618 Level: intermediate 619 620 .keywords: matrix, cray, sparse, parallel 621 622 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 623 @*/ 624 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 625 { 626 PetscErrorCode ierr; 627 628 PetscFunctionBegin; 629 ierr = MatCreate(comm,A);CHKERRQ(ierr); 630 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 631 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 632 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 633 PetscFunctionReturn(0); 634 } 635 636 #undef __FUNCT__ 637 #define __FUNCT__ "MatCreate_SeqAIJMKL" 638 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 639 { 640 PetscErrorCode ierr; 641 642 PetscFunctionBegin; 643 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 644 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 645 PetscFunctionReturn(0); 646 } 647