1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 19 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 20 struct matrix_descr descr; 21 #endif 22 } Mat_SeqAIJMKL; 23 24 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatConvert_SeqAIJMKL_SeqAIJ" 28 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 29 { 30 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 31 /* so we will ignore 'MatType type'. */ 32 PetscErrorCode ierr; 33 Mat B = *newmat; 34 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 35 36 PetscFunctionBegin; 37 if (reuse == MAT_INITIAL_MATRIX) { 38 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 39 } 40 41 /* Reset the original function pointers. */ 42 B->ops->duplicate = MatDuplicate_SeqAIJ; 43 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 44 B->ops->destroy = MatDestroy_SeqAIJ; 45 B->ops->mult = MatMult_SeqAIJ; 46 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 47 B->ops->multadd = MatMultAdd_SeqAIJ; 48 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 49 50 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 51 * simply involves destroying the MKL sparse matrix handle. 52 * We don't free the Mat_SeqAIJMKL struct itself, as this will 53 * cause problems later when MatDestroy() tries to free it. */ 54 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 55 if (aijmkl->sparse_optimized) { 56 sparse_status_t stat; 57 stat = mkl_sparse_destroy(aijmkl->csrA); 58 if (stat != SPARSE_STATUS_SUCCESS) { 59 PetscFunctionReturn(PETSC_ERR_LIB); 60 } 61 } 62 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 63 64 /* Change the type of B to MATSEQAIJ. */ 65 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 66 67 *newmat = B; 68 PetscFunctionReturn(0); 69 } 70 71 #undef __FUNCT__ 72 #define __FUNCT__ "MatDestroy_SeqAIJMKL" 73 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 74 { 75 PetscErrorCode ierr; 76 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 77 78 PetscFunctionBegin; 79 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 80 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 81 if (aijmkl->sparse_optimized) { 82 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 83 stat = mkl_sparse_destroy(aijmkl->csrA); 84 if (stat != SPARSE_STATUS_SUCCESS) { 85 PetscFunctionReturn(PETSC_ERR_LIB); 86 } 87 } 88 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 89 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 90 91 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 92 * to destroy everything that remains. */ 93 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 94 /* Note that I don't call MatSetType(). I believe this is because that 95 * is only to be called when *building* a matrix. I could be wrong, but 96 * that is how things work for the SuperLU matrix class. */ 97 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDuplicate_SeqAIJMKL" 103 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 104 { 105 PetscErrorCode ierr; 106 Mat_SeqAIJMKL *aijmkl; 107 Mat_SeqAIJMKL *aijmkl_dest; 108 109 PetscFunctionBegin; 110 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 111 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 112 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 113 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 114 aijmkl_dest->sparse_optimized = PETSC_FALSE; 115 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 116 aijmkl_dest->csrA = NULL; 117 if (!aijmkl->no_SpMV2) { 118 sparse_status_t stat; 119 stat = mkl_sparse_copy(aijmkl->csrA,aijmkl->descr,&aijmkl_dest->csrA); 120 stat = mkl_sparse_optimize(aijmkl_dest->csrA); 121 if (stat != SPARSE_STATUS_SUCCESS) { 122 PetscFunctionReturn(PETSC_ERR_LIB); 123 } 124 aijmkl_dest->sparse_optimized = PETSC_TRUE; 125 } 126 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 127 PetscFunctionReturn(0); 128 } 129 130 #undef __FUNCT__ 131 #define __FUNCT__ "MatAssemblyEnd_SeqAIJMKL" 132 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 133 { 134 PetscErrorCode ierr; 135 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 136 Mat_SeqAIJMKL *aijmkl; 137 138 MatScalar *aa; 139 PetscInt n; 140 PetscInt *aj,*ai; 141 142 PetscFunctionBegin; 143 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 144 145 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 146 * extra information and some different methods, call the AssemblyEnd 147 * routine for a MATSEQAIJ. 148 * I'm not sure if this is the best way to do this, but it avoids 149 * a lot of code duplication. 150 * I also note that currently MATSEQAIJMKL doesn't know anything about 151 * the Mat_CompressedRow data structure that SeqAIJ now uses when there 152 * are many zero rows. If the SeqAIJ assembly end routine decides to use 153 * this, this may break things. (Don't know... haven't looked at it. 154 * Do I need to disable this somehow?) */ 155 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 156 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 157 158 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 159 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 160 if (!aijmkl->no_SpMV2) { 161 sparse_status_t stat; 162 if (aijmkl->sparse_optimized) { 163 /* Matrix has been previously assembled and optimized. Must destroy old 164 * matrix handle before running the optimization step again. */ 165 sparse_status_t stat; 166 stat = mkl_sparse_destroy(aijmkl->csrA); 167 if (stat != SPARSE_STATUS_SUCCESS) { 168 PetscFunctionReturn(PETSC_ERR_LIB); 169 } 170 } 171 /* Now perform the SpMV2 setup and matrix optimization. */ 172 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 173 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 174 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 175 n = A->rmap->n; 176 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 177 aa = a->a; /* Nonzero elements stored row-by-row. */ 178 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 179 stat = mkl_sparse_x_create_csr (&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,n,n,ai,ai+1,aj,aa); 180 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 181 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 182 stat = mkl_sparse_optimize(aijmkl->csrA); 183 if (stat != SPARSE_STATUS_SUCCESS) { 184 PetscFunctionReturn(PETSC_ERR_LIB); 185 } 186 aijmkl->sparse_optimized = PETSC_TRUE; 187 } 188 #endif 189 190 PetscFunctionReturn(0); 191 } 192 193 #undef __FUNCT__ 194 #define __FUNCT__ "MatMult_SeqAIJMKL" 195 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 196 { 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 198 const PetscScalar *x; 199 PetscScalar *y; 200 const MatScalar *aa; 201 PetscErrorCode ierr; 202 PetscInt m=A->rmap->n; 203 const PetscInt *aj,*ai; 204 205 /* Variables not in MatMult_SeqAIJ. */ 206 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 207 208 PetscFunctionBegin; 209 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 210 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 211 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 212 aa = a->a; /* Nonzero elements stored row-by-row. */ 213 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 214 215 /* Call MKL sparse BLAS routine to do the MatMult. */ 216 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 217 218 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 219 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 220 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 221 PetscFunctionReturn(0); 222 } 223 224 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 225 #undef __FUNCT__ 226 #define __FUNCT__ "MatMult_SeqAIJMKL_SpMV2" 227 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 228 { 229 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 230 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 231 const PetscScalar *x; 232 PetscScalar *y; 233 PetscErrorCode ierr; 234 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 235 236 PetscFunctionBegin; 237 238 #ifdef DEBUG 239 printf("DEBUG: In MatMult_SeqAIJMKL_SpMV2\n"); 240 #endif 241 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 242 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 243 244 /* Call MKL SpMV2 executor routine to do the MatMult. */ 245 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 246 247 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 248 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 249 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 250 if (stat != SPARSE_STATUS_SUCCESS) { 251 PetscFunctionReturn(PETSC_ERR_LIB); 252 } 253 PetscFunctionReturn(0); 254 } 255 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 256 257 #undef __FUNCT__ 258 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL" 259 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 260 { 261 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 262 const PetscScalar *x; 263 PetscScalar *y; 264 const MatScalar *aa; 265 PetscErrorCode ierr; 266 PetscInt m=A->rmap->n; 267 const PetscInt *aj,*ai; 268 269 /* Variables not in MatMultTranspose_SeqAIJ. */ 270 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 271 272 PetscFunctionBegin; 273 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 274 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 275 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 276 aa = a->a; /* Nonzero elements stored row-by-row. */ 277 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 278 279 /* Call MKL sparse BLAS routine to do the MatMult. */ 280 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 281 282 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 283 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 284 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 289 #undef __FUNCT__ 290 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL_SpMV2" 291 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 292 { 293 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 294 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 295 const PetscScalar *x; 296 PetscScalar *y; 297 PetscErrorCode ierr; 298 sparse_status_t stat; 299 300 PetscFunctionBegin; 301 302 #ifdef DEBUG 303 printf("DEBUG: In MatMultTranspose_SeqAIJMKL_SpMV2\n"); 304 #endif 305 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 306 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 307 308 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 309 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 310 311 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 312 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 313 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 314 if (stat != SPARSE_STATUS_SUCCESS) { 315 PetscFunctionReturn(PETSC_ERR_LIB); 316 } 317 PetscFunctionReturn(0); 318 } 319 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatMultAdd_SeqAIJMKL" 323 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 324 { 325 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 326 const PetscScalar *x; 327 PetscScalar *y,*z; 328 const MatScalar *aa; 329 PetscErrorCode ierr; 330 PetscInt m=A->rmap->n; 331 const PetscInt *aj,*ai; 332 PetscInt i; 333 334 /* Variables not in MatMultAdd_SeqAIJ. */ 335 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 336 PetscScalar alpha = 1.0; 337 PetscScalar beta = 1.0; 338 char matdescra[6]; 339 340 PetscFunctionBegin; 341 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 342 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 343 344 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 345 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 346 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 347 aa = a->a; /* Nonzero elements stored row-by-row. */ 348 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 349 350 /* Call MKL sparse BLAS routine to do the MatMult. */ 351 if (zz == yy) { 352 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 353 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 354 } else { 355 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 356 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 357 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 358 for (i=0; i<m; i++) { 359 z[i] += y[i]; 360 } 361 } 362 363 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 364 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 365 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 366 PetscFunctionReturn(0); 367 } 368 369 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 370 #undef __FUNCT__ 371 #define __FUNCT__ "MatMultAdd_SeqAIJMKL_SpMV2" 372 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 373 { 374 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 375 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 376 const PetscScalar *x; 377 PetscScalar *y,*z; 378 PetscErrorCode ierr; 379 PetscInt m=A->rmap->n; 380 PetscInt i; 381 382 /* Variables not in MatMultAdd_SeqAIJ. */ 383 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 384 385 PetscFunctionBegin; 386 387 #ifdef DEBUG 388 printf("DEBUG: In MatMultAdd_SeqAIJMKL_SpMV2\n"); 389 #endif 390 391 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 392 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 393 394 /* Call MKL sparse BLAS routine to do the MatMult. */ 395 if (zz == yy) { 396 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 397 * with alpha and beta both set to 1.0. */ 398 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y); 399 } else { 400 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 401 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 402 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 403 for (i=0; i<m; i++) { 404 z[i] += y[i]; 405 } 406 } 407 408 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 409 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 410 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 411 if (stat != SPARSE_STATUS_SUCCESS) { 412 PetscFunctionReturn(PETSC_ERR_LIB); 413 } 414 PetscFunctionReturn(0); 415 } 416 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 417 418 #undef __FUNCT__ 419 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL" 420 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 421 { 422 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 423 const PetscScalar *x; 424 PetscScalar *y,*z; 425 const MatScalar *aa; 426 PetscErrorCode ierr; 427 PetscInt m=A->rmap->n; 428 const PetscInt *aj,*ai; 429 PetscInt i; 430 431 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 432 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 433 PetscScalar alpha = 1.0; 434 PetscScalar beta = 1.0; 435 char matdescra[6]; 436 437 PetscFunctionBegin; 438 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 439 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 440 441 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 442 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 443 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 444 aa = a->a; /* Nonzero elements stored row-by-row. */ 445 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 446 447 /* Call MKL sparse BLAS routine to do the MatMult. */ 448 if (zz == yy) { 449 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 450 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 451 } else { 452 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 453 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 454 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 455 for (i=0; i<m; i++) { 456 z[i] += y[i]; 457 } 458 } 459 460 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 461 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 462 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 466 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 467 #undef __FUNCT__ 468 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL_SpMV2" 469 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 470 { 471 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 472 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 473 const PetscScalar *x; 474 PetscScalar *y,*z; 475 PetscErrorCode ierr; 476 PetscInt m=A->rmap->n; 477 PetscInt i; 478 479 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 480 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 481 482 PetscFunctionBegin; 483 484 #ifdef DEBUG 485 printf("DEBUG: In MatMultTransposeAdd_SeqAIJMKL_SpMV2\n"); 486 #endif 487 488 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 489 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 490 491 /* Call MKL sparse BLAS routine to do the MatMult. */ 492 if (zz == yy) { 493 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 494 * with alpha and beta both set to 1.0. */ 495 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y); 496 } else { 497 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 498 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 499 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 500 for (i=0; i<m; i++) { 501 z[i] += y[i]; 502 } 503 } 504 505 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 506 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 507 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 508 if (stat != SPARSE_STATUS_SUCCESS) { 509 PetscFunctionReturn(PETSC_ERR_LIB); 510 } 511 PetscFunctionReturn(0); 512 } 513 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 514 515 516 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 517 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 518 * routine, but can also be used to convert an assembled SeqAIJ matrix 519 * into a SeqAIJMKL one. */ 520 #undef __FUNCT__ 521 #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJMKL" 522 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 523 { 524 PetscErrorCode ierr; 525 Mat B = *newmat; 526 Mat_SeqAIJMKL *aijmkl; 527 PetscBool set; 528 529 PetscFunctionBegin; 530 if (reuse == MAT_INITIAL_MATRIX) { 531 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 532 } 533 534 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 535 B->spptr = (void*) aijmkl; 536 537 /* Set function pointers for methods that we inherit from AIJ but override. 538 * Currently the transposed operations are not being set because I encounter memory corruption 539 * when these are enabled. Need to look at this with Valgrind or similar. --RTM */ 540 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 541 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 542 B->ops->destroy = MatDestroy_SeqAIJMKL; 543 544 aijmkl->sparse_optimized = PETSC_FALSE; 545 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 546 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 547 #elif 548 aijmkl->no_SpMV2 = PETSC_TRUE; 549 #endif 550 551 /* Parse command line options. */ 552 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 553 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 554 ierr = PetscOptionsEnd();CHKERRQ(ierr); 555 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 556 if(!aijmkl->no_SpMV2) { 557 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 558 aijmkl->no_SpMV2 = PETSC_TRUE; 559 } 560 #endif 561 562 if(!aijmkl->no_SpMV2) { 563 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 564 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 565 /* B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; */ 566 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 567 /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; */ 568 #endif 569 } else { 570 B->ops->mult = MatMult_SeqAIJMKL; 571 /* B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; */ 572 B->ops->multadd = MatMultAdd_SeqAIJMKL; 573 /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; */ 574 } 575 576 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 577 578 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 579 *newmat = B; 580 PetscFunctionReturn(0); 581 } 582 583 #undef __FUNCT__ 584 #define __FUNCT__ "MatCreateSeqAIJMKL" 585 /*@C 586 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 587 This type inherits from AIJ and is largely identical, but uses sparse BLAS 588 routines from Intel MKL whenever possible. 589 Collective on MPI_Comm 590 591 Input Parameters: 592 + comm - MPI communicator, set to PETSC_COMM_SELF 593 . m - number of rows 594 . n - number of columns 595 . nz - number of nonzeros per row (same for all rows) 596 - nnz - array containing the number of nonzeros in the various rows 597 (possibly different for each row) or NULL 598 599 Output Parameter: 600 . A - the matrix 601 602 Notes: 603 If nnz is given then nz is ignored 604 605 Level: intermediate 606 607 .keywords: matrix, cray, sparse, parallel 608 609 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 610 @*/ 611 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 612 { 613 PetscErrorCode ierr; 614 615 PetscFunctionBegin; 616 ierr = MatCreate(comm,A);CHKERRQ(ierr); 617 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 618 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 619 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 620 PetscFunctionReturn(0); 621 } 622 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatCreate_SeqAIJMKL" 625 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 626 { 627 PetscErrorCode ierr; 628 629 PetscFunctionBegin; 630 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 631 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 632 PetscFunctionReturn(0); 633 } 634