1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 19 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 20 struct matrix_descr descr; 21 #endif 22 } Mat_SeqAIJMKL; 23 24 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatConvert_SeqAIJMKL_SeqAIJ" 28 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 29 { 30 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 31 /* so we will ignore 'MatType type'. */ 32 PetscErrorCode ierr; 33 Mat B = *newmat; 34 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 35 36 PetscFunctionBegin; 37 if (reuse == MAT_INITIAL_MATRIX) { 38 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 39 } 40 41 /* Reset the original function pointers. */ 42 B->ops->duplicate = MatDuplicate_SeqAIJ; 43 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 44 B->ops->destroy = MatDestroy_SeqAIJ; 45 B->ops->mult = MatMult_SeqAIJ; 46 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 47 B->ops->multadd = MatMultAdd_SeqAIJ; 48 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 49 50 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 51 * simply involves destroying the MKL sparse matrix handle. 52 * We don't free the Mat_SeqAIJMKL struct itself, as this will 53 * cause problems later when MatDestroy() tries to free it. */ 54 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 55 if (aijmkl->sparse_optimized) { 56 sparse_status_t stat; 57 stat = mkl_sparse_destroy(aijmkl->csrA); 58 if (stat != SPARSE_STATUS_SUCCESS) { 59 PetscFunctionReturn(PETSC_ERR_LIB); 60 } 61 } 62 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 63 64 /* Change the type of B to MATSEQAIJ. */ 65 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 66 67 *newmat = B; 68 PetscFunctionReturn(0); 69 } 70 71 #undef __FUNCT__ 72 #define __FUNCT__ "MatDestroy_SeqAIJMKL" 73 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 74 { 75 PetscErrorCode ierr; 76 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 77 78 PetscFunctionBegin; 79 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 80 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 81 if (aijmkl->sparse_optimized) { 82 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 83 stat = mkl_sparse_destroy(aijmkl->csrA); 84 if (stat != SPARSE_STATUS_SUCCESS) { 85 PetscFunctionReturn(PETSC_ERR_LIB); 86 } 87 } 88 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 89 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 90 91 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 92 * to destroy everything that remains. */ 93 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 94 /* Note that I don't call MatSetType(). I believe this is because that 95 * is only to be called when *building* a matrix. I could be wrong, but 96 * that is how things work for the SuperLU matrix class. */ 97 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDuplicate_SeqAIJMKL" 103 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 104 { 105 PetscErrorCode ierr; 106 Mat_SeqAIJMKL *aijmkl; 107 Mat_SeqAIJMKL *aijmkl_dest; 108 109 PetscFunctionBegin; 110 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 111 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 112 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 113 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 114 aijmkl_dest->sparse_optimized = PETSC_FALSE; 115 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 116 aijmkl_dest->csrA = NULL; 117 if (!aijmkl->no_SpMV2) { 118 sparse_status_t stat; 119 stat = mkl_sparse_copy(aijmkl->csrA,aijmkl->descr,&aijmkl_dest->csrA); 120 stat = mkl_sparse_optimize(aijmkl_dest->csrA); 121 if (stat != SPARSE_STATUS_SUCCESS) { 122 PetscFunctionReturn(PETSC_ERR_LIB); 123 } 124 aijmkl_dest->sparse_optimized = PETSC_TRUE; 125 } 126 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 127 PetscFunctionReturn(0); 128 } 129 130 #undef __FUNCT__ 131 #define __FUNCT__ "MatAssemblyEnd_SeqAIJMKL" 132 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 133 { 134 PetscErrorCode ierr; 135 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 136 Mat_SeqAIJMKL *aijmkl; 137 138 MatScalar *aa; 139 PetscInt n; 140 PetscInt *aj,*ai; 141 142 PetscFunctionBegin; 143 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 144 145 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 146 * extra information and some different methods, call the AssemblyEnd 147 * routine for a MATSEQAIJ. 148 * I'm not sure if this is the best way to do this, but it avoids 149 * a lot of code duplication. 150 * I also note that currently MATSEQAIJMKL doesn't know anything about 151 * the Mat_CompressedRow data structure that SeqAIJ now uses when there 152 * are many zero rows. If the SeqAIJ assembly end routine decides to use 153 * this, this may break things. (Don't know... haven't looked at it. 154 * Do I need to disable this somehow?) */ 155 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 156 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 157 158 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 159 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 160 if (!aijmkl->no_SpMV2) { 161 sparse_status_t stat; 162 if (aijmkl->sparse_optimized) { 163 /* Matrix has been previously assembled and optimized. Must destroy old 164 * matrix handle before running the optimization step again. */ 165 sparse_status_t stat; 166 stat = mkl_sparse_destroy(aijmkl->csrA); 167 if (stat != SPARSE_STATUS_SUCCESS) { 168 PetscFunctionReturn(PETSC_ERR_LIB); 169 } 170 } 171 /* Now perform the SpMV2 setup and matrix optimization. */ 172 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 173 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 174 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 175 n = A->rmap->n; 176 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 177 aa = a->a; /* Nonzero elements stored row-by-row. */ 178 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 179 stat = mkl_sparse_x_create_csr (&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,n,n,ai,ai+1,aj,aa); 180 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 181 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 182 stat = mkl_sparse_optimize(aijmkl->csrA); 183 if (stat != SPARSE_STATUS_SUCCESS) { 184 PetscFunctionReturn(PETSC_ERR_LIB); 185 } 186 aijmkl->sparse_optimized = PETSC_TRUE; 187 } 188 #endif 189 190 PetscFunctionReturn(0); 191 } 192 193 #undef __FUNCT__ 194 #define __FUNCT__ "MatMult_SeqAIJMKL" 195 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 196 { 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 198 const PetscScalar *x; 199 PetscScalar *y; 200 const MatScalar *aa; 201 PetscErrorCode ierr; 202 PetscInt m=A->rmap->n; 203 const PetscInt *aj,*ai; 204 205 /* Variables not in MatMult_SeqAIJ. */ 206 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 207 208 PetscFunctionBegin; 209 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 210 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 211 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 212 aa = a->a; /* Nonzero elements stored row-by-row. */ 213 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 214 215 /* Call MKL sparse BLAS routine to do the MatMult. */ 216 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 217 218 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 219 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 220 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 221 PetscFunctionReturn(0); 222 } 223 224 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 225 #undef __FUNCT__ 226 #define __FUNCT__ "MatMult_SeqAIJMKL_SpMV2" 227 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 228 { 229 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 230 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 231 const PetscScalar *x; 232 PetscScalar *y; 233 PetscErrorCode ierr; 234 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 235 236 PetscFunctionBegin; 237 238 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 239 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 240 241 /* Call MKL SpMV2 executor routine to do the MatMult. */ 242 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 243 244 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 245 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 246 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 247 if (stat != SPARSE_STATUS_SUCCESS) { 248 PetscFunctionReturn(PETSC_ERR_LIB); 249 } 250 PetscFunctionReturn(0); 251 } 252 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 253 254 #undef __FUNCT__ 255 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL" 256 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 257 { 258 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 259 const PetscScalar *x; 260 PetscScalar *y; 261 const MatScalar *aa; 262 PetscErrorCode ierr; 263 PetscInt m=A->rmap->n; 264 const PetscInt *aj,*ai; 265 266 /* Variables not in MatMultTranspose_SeqAIJ. */ 267 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 268 269 PetscFunctionBegin; 270 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 271 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 272 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 273 aa = a->a; /* Nonzero elements stored row-by-row. */ 274 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 275 276 /* Call MKL sparse BLAS routine to do the MatMult. */ 277 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 278 279 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 280 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 281 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 282 PetscFunctionReturn(0); 283 } 284 285 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 286 #undef __FUNCT__ 287 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL_SpMV2" 288 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 289 { 290 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 291 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 292 const PetscScalar *x; 293 PetscScalar *y; 294 PetscErrorCode ierr; 295 sparse_status_t stat; 296 297 PetscFunctionBegin; 298 299 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 300 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 301 302 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 303 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 304 305 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 306 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 307 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 308 if (stat != SPARSE_STATUS_SUCCESS) { 309 PetscFunctionReturn(PETSC_ERR_LIB); 310 } 311 PetscFunctionReturn(0); 312 } 313 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatMultAdd_SeqAIJMKL" 317 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 318 { 319 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 320 const PetscScalar *x; 321 PetscScalar *y,*z; 322 const MatScalar *aa; 323 PetscErrorCode ierr; 324 PetscInt m=A->rmap->n; 325 const PetscInt *aj,*ai; 326 PetscInt i; 327 328 /* Variables not in MatMultAdd_SeqAIJ. */ 329 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 330 PetscScalar alpha = 1.0; 331 PetscScalar beta = 1.0; 332 char matdescra[6]; 333 334 PetscFunctionBegin; 335 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 336 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 337 338 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 339 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 340 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 341 aa = a->a; /* Nonzero elements stored row-by-row. */ 342 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 343 344 /* Call MKL sparse BLAS routine to do the MatMult. */ 345 if (zz == yy) { 346 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 347 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 348 } else { 349 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 350 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 351 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 352 for (i=0; i<m; i++) { 353 z[i] += y[i]; 354 } 355 } 356 357 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 358 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 359 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 360 PetscFunctionReturn(0); 361 } 362 363 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 364 #undef __FUNCT__ 365 #define __FUNCT__ "MatMultAdd_SeqAIJMKL_SpMV2" 366 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 367 { 368 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 369 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 370 const PetscScalar *x; 371 PetscScalar *y,*z; 372 PetscErrorCode ierr; 373 PetscInt m=A->rmap->n; 374 PetscInt i; 375 376 /* Variables not in MatMultAdd_SeqAIJ. */ 377 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 378 379 PetscFunctionBegin; 380 381 382 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 383 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 384 385 /* Call MKL sparse BLAS routine to do the MatMult. */ 386 if (zz == yy) { 387 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 388 * with alpha and beta both set to 1.0. */ 389 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y); 390 } else { 391 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 392 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 393 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 394 for (i=0; i<m; i++) { 395 z[i] += y[i]; 396 } 397 } 398 399 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 400 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 401 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 402 if (stat != SPARSE_STATUS_SUCCESS) { 403 PetscFunctionReturn(PETSC_ERR_LIB); 404 } 405 PetscFunctionReturn(0); 406 } 407 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL" 411 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 412 { 413 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 414 const PetscScalar *x; 415 PetscScalar *y,*z; 416 const MatScalar *aa; 417 PetscErrorCode ierr; 418 PetscInt m=A->rmap->n; 419 const PetscInt *aj,*ai; 420 PetscInt i; 421 422 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 423 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 424 PetscScalar alpha = 1.0; 425 PetscScalar beta = 1.0; 426 char matdescra[6]; 427 428 PetscFunctionBegin; 429 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 430 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 431 432 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 433 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 434 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 435 aa = a->a; /* Nonzero elements stored row-by-row. */ 436 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 437 438 /* Call MKL sparse BLAS routine to do the MatMult. */ 439 if (zz == yy) { 440 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 441 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 442 } else { 443 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 444 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 445 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 446 for (i=0; i<m; i++) { 447 z[i] += y[i]; 448 } 449 } 450 451 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 452 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 453 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 454 PetscFunctionReturn(0); 455 } 456 457 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 458 #undef __FUNCT__ 459 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL_SpMV2" 460 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 461 { 462 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 463 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 464 const PetscScalar *x; 465 PetscScalar *y,*z; 466 PetscErrorCode ierr; 467 PetscInt m=A->rmap->n; 468 PetscInt i; 469 470 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 471 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 472 473 PetscFunctionBegin; 474 475 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 476 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 477 478 /* Call MKL sparse BLAS routine to do the MatMult. */ 479 if (zz == yy) { 480 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 481 * with alpha and beta both set to 1.0. */ 482 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y); 483 } else { 484 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 485 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 486 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 487 for (i=0; i<m; i++) { 488 z[i] += y[i]; 489 } 490 } 491 492 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 493 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 494 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 495 if (stat != SPARSE_STATUS_SUCCESS) { 496 PetscFunctionReturn(PETSC_ERR_LIB); 497 } 498 PetscFunctionReturn(0); 499 } 500 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 501 502 503 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 504 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 505 * routine, but can also be used to convert an assembled SeqAIJ matrix 506 * into a SeqAIJMKL one. */ 507 #undef __FUNCT__ 508 #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJMKL" 509 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 510 { 511 PetscErrorCode ierr; 512 Mat B = *newmat; 513 Mat_SeqAIJMKL *aijmkl; 514 PetscBool set; 515 516 PetscFunctionBegin; 517 if (reuse == MAT_INITIAL_MATRIX) { 518 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 519 } 520 521 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 522 B->spptr = (void*) aijmkl; 523 524 /* Set function pointers for methods that we inherit from AIJ but override. 525 * Currently the transposed operations are not being set because I encounter memory corruption 526 * when these are enabled. Need to look at this with Valgrind or similar. --RTM */ 527 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 528 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 529 B->ops->destroy = MatDestroy_SeqAIJMKL; 530 531 aijmkl->sparse_optimized = PETSC_FALSE; 532 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 533 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 534 #elif 535 aijmkl->no_SpMV2 = PETSC_TRUE; 536 #endif 537 538 /* Parse command line options. */ 539 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 540 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 541 ierr = PetscOptionsEnd();CHKERRQ(ierr); 542 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 543 if(!aijmkl->no_SpMV2) { 544 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 545 aijmkl->no_SpMV2 = PETSC_TRUE; 546 } 547 #endif 548 549 if(!aijmkl->no_SpMV2) { 550 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 551 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 552 /* B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; */ 553 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 554 /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; */ 555 #endif 556 } else { 557 B->ops->mult = MatMult_SeqAIJMKL; 558 /* B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; */ 559 B->ops->multadd = MatMultAdd_SeqAIJMKL; 560 /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; */ 561 } 562 563 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 564 565 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 566 *newmat = B; 567 PetscFunctionReturn(0); 568 } 569 570 #undef __FUNCT__ 571 #define __FUNCT__ "MatCreateSeqAIJMKL" 572 /*@C 573 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 574 This type inherits from AIJ and is largely identical, but uses sparse BLAS 575 routines from Intel MKL whenever possible. 576 Collective on MPI_Comm 577 578 Input Parameters: 579 + comm - MPI communicator, set to PETSC_COMM_SELF 580 . m - number of rows 581 . n - number of columns 582 . nz - number of nonzeros per row (same for all rows) 583 - nnz - array containing the number of nonzeros in the various rows 584 (possibly different for each row) or NULL 585 586 Output Parameter: 587 . A - the matrix 588 589 Notes: 590 If nnz is given then nz is ignored 591 592 Level: intermediate 593 594 .keywords: matrix, cray, sparse, parallel 595 596 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 597 @*/ 598 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 599 { 600 PetscErrorCode ierr; 601 602 PetscFunctionBegin; 603 ierr = MatCreate(comm,A);CHKERRQ(ierr); 604 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 605 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 606 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 607 PetscFunctionReturn(0); 608 } 609 610 #undef __FUNCT__ 611 #define __FUNCT__ "MatCreate_SeqAIJMKL" 612 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 613 { 614 PetscErrorCode ierr; 615 616 PetscFunctionBegin; 617 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 618 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 619 PetscFunctionReturn(0); 620 } 621