1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 18 struct matrix_descr descr; 19 } Mat_SeqAIJMKL; 20 21 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatConvert_SeqAIJMKL_SeqAIJ" 25 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 26 { 27 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 28 /* so we will ignore 'MatType type'. */ 29 PetscErrorCode ierr; 30 Mat B = *newmat; 31 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 32 33 PetscFunctionBegin; 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 36 } 37 38 /* Reset the original function pointers. */ 39 B->ops->duplicate = MatDuplicate_SeqAIJ; 40 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 41 B->ops->destroy = MatDestroy_SeqAIJ; 42 B->ops->mult = MatMult_SeqAIJ; 43 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 44 B->ops->multadd = MatMultAdd_SeqAIJ; 45 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 46 47 /* Free everything in the Mat_SeqAIJMKL data structure. 48 * We don't free the Mat_SeqAIJMKL struct itself, as this will 49 * cause problems later when MatDestroy() tries to free it. */ 50 /* Actually there is nothing to do here right now. 51 * When I've added use of the MKL SpMV2 inspector-executor routines, I should 52 * see if there is some way to clean up the "handle" used by SpMV2. */ 53 54 /* Change the type of B to MATSEQAIJ. */ 55 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 56 57 *newmat = B; 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatDestroy_SeqAIJMKL" 63 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 64 { 65 PetscErrorCode ierr; 66 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 67 68 PetscFunctionBegin; 69 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 70 mkl_sparse_destroy(aijmkl->csrA); 71 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 72 73 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 74 * to destroy everything that remains. */ 75 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 76 /* Note that I don't call MatSetType(). I believe this is because that 77 * is only to be called when *building* a matrix. I could be wrong, but 78 * that is how things work for the SuperLU matrix class. */ 79 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 80 PetscFunctionReturn(0); 81 } 82 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatDuplicate_SeqAIJMKL" 85 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 86 { 87 PetscErrorCode ierr; 88 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 89 Mat_SeqAIJMKL *aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 90 91 PetscFunctionBegin; 92 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 93 ierr = PetscMemcpy((*M)->spptr,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 97 #undef __FUNCT__ 98 #define __FUNCT__ "MatAssemblyEnd_SeqAIJMKL" 99 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 100 { 101 PetscErrorCode ierr; 102 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 103 Mat_SeqAIJMKL *aijmkl; 104 105 MatScalar *aa; 106 PetscInt n; 107 PetscInt *aj,*ai; 108 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 109 PetscBool set; 110 111 PetscFunctionBegin; 112 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 113 114 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 115 * extra information and some different methods, call the AssemblyEnd 116 * routine for a MATSEQAIJ. 117 * I'm not sure if this is the best way to do this, but it avoids 118 * a lot of code duplication. 119 * I also note that currently MATSEQAIJMKL doesn't know anything about 120 * the Mat_CompressedRow data structure that SeqAIJ now uses when there 121 * are many zero rows. If the SeqAIJ assembly end routine decides to use 122 * this, this may break things. (Don't know... haven't looked at it. 123 * Do I need to disable this somehow?) */ 124 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 125 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 126 127 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 128 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 129 if (!aijmkl->no_SpMV2) { 130 /* Now perform the SpMV2 setup and matrix optimization. */ 131 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 132 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 133 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 134 n = A->rmap->n; 135 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 136 aa = a->a; /* Nonzero elements stored row-by-row. */ 137 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 138 stat = mkl_sparse_x_create_csr (&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,n,n,ai,ai+1,aj,aa); 139 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 140 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 141 stat = mkl_sparse_optimize(aijmkl->csrA); 142 if (stat != SPARSE_STATUS_SUCCESS) { 143 PetscFunctionReturn(PETSC_ERR_LIB); 144 } 145 } 146 #endif 147 148 PetscFunctionReturn(0); 149 } 150 151 #undef __FUNCT__ 152 #define __FUNCT__ "MatMult_SeqAIJMKL" 153 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 154 { 155 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 156 const PetscScalar *x; 157 PetscScalar *y; 158 const MatScalar *aa; 159 PetscErrorCode ierr; 160 PetscInt m=A->rmap->n; 161 const PetscInt *aj,*ai; 162 PetscInt i; 163 164 /* Variables not in MatMult_SeqAIJ. */ 165 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 166 167 PetscFunctionBegin; 168 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 169 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 170 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 171 aa = a->a; /* Nonzero elements stored row-by-row. */ 172 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 173 174 /* Call MKL sparse BLAS routine to do the MatMult. */ 175 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 176 177 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 178 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 179 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 180 PetscFunctionReturn(0); 181 } 182 183 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatMult_SeqAIJMKL_SpMV2" 186 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 187 { 188 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 189 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 190 const PetscScalar *x; 191 PetscScalar *y; 192 const MatScalar *aa; 193 PetscErrorCode ierr; 194 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 195 196 PetscFunctionBegin; 197 198 #ifdef DEBUG 199 printf("DEBUG: In MatMult_SeqAIJMKL_SpMV2\n"); 200 #endif 201 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 202 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 203 204 /* Call MKL SpMV2 executor routine to do the MatMult. */ 205 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 206 207 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 208 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 209 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 210 if (stat != SPARSE_STATUS_SUCCESS) { 211 PetscFunctionReturn(PETSC_ERR_LIB); 212 } 213 PetscFunctionReturn(0); 214 } 215 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 216 217 #undef __FUNCT__ 218 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL" 219 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 220 { 221 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 222 const PetscScalar *x; 223 PetscScalar *y; 224 const MatScalar *aa; 225 PetscErrorCode ierr; 226 PetscInt m=A->rmap->n; 227 const PetscInt *aj,*ai; 228 PetscInt i; 229 230 /* Variables not in MatMultTranspose_SeqAIJ. */ 231 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 232 233 PetscFunctionBegin; 234 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 235 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 236 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 237 aa = a->a; /* Nonzero elements stored row-by-row. */ 238 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 239 240 /* Call MKL sparse BLAS routine to do the MatMult. */ 241 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 242 243 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 244 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 245 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 246 PetscFunctionReturn(0); 247 } 248 249 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 250 #undef __FUNCT__ 251 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL_SpMV2" 252 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 253 { 254 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 255 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 256 const PetscScalar *x; 257 PetscScalar *y; 258 const MatScalar *aa; 259 PetscErrorCode ierr; 260 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 261 262 PetscFunctionBegin; 263 264 #ifdef DEBUG 265 printf("DEBUG: In MatMultTranspose_SeqAIJMKL_SpMV2\n"); 266 #endif 267 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 268 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 269 270 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 271 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 272 273 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 274 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 275 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 276 if (stat != SPARSE_STATUS_SUCCESS) { 277 PetscFunctionReturn(PETSC_ERR_LIB); 278 } 279 PetscFunctionReturn(0); 280 } 281 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatMultAdd_SeqAIJMKL" 285 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 286 { 287 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 288 const PetscScalar *x; 289 PetscScalar *y,*z; 290 const MatScalar *aa; 291 PetscErrorCode ierr; 292 PetscInt m=A->rmap->n; 293 const PetscInt *aj,*ai; 294 PetscInt i; 295 296 /* Variables not in MatMultAdd_SeqAIJ. */ 297 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 298 PetscScalar alpha = 1.0; 299 PetscScalar beta = 1.0; 300 char matdescra[6]; 301 302 PetscFunctionBegin; 303 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 304 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 305 306 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 307 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 308 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 309 aa = a->a; /* Nonzero elements stored row-by-row. */ 310 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 311 312 /* Call MKL sparse BLAS routine to do the MatMult. */ 313 if (zz == yy) { 314 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 315 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 316 } else { 317 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 318 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 319 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 320 for (i=0; i<m; i++) { 321 z[i] += y[i]; 322 } 323 } 324 325 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 326 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 327 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 328 PetscFunctionReturn(0); 329 } 330 331 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 332 #undef __FUNCT__ 333 #define __FUNCT__ "MatMultAdd_SeqAIJMKL_SpMV2" 334 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 335 { 336 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 337 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 338 const PetscScalar *x; 339 PetscScalar *y,*z; 340 const MatScalar *aa; 341 PetscErrorCode ierr; 342 PetscInt m=A->rmap->n; 343 const PetscInt *aj,*ai; 344 PetscInt i; 345 346 /* Variables not in MatMultAdd_SeqAIJ. */ 347 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 348 349 PetscFunctionBegin; 350 351 #ifdef DEBUG 352 printf("DEBUG: In MatMultAdd_SeqAIJMKL_SpMV2\n"); 353 #endif 354 355 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 356 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 357 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 358 aa = a->a; /* Nonzero elements stored row-by-row. */ 359 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 360 361 /* Call MKL sparse BLAS routine to do the MatMult. */ 362 if (zz == yy) { 363 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 364 * with alpha and beta both set to 1.0. */ 365 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y); 366 } else { 367 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 368 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 369 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 370 for (i=0; i<m; i++) { 371 z[i] += y[i]; 372 } 373 } 374 375 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 376 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 377 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 378 if (stat != SPARSE_STATUS_SUCCESS) { 379 PetscFunctionReturn(PETSC_ERR_LIB); 380 } 381 PetscFunctionReturn(0); 382 } 383 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 384 385 #undef __FUNCT__ 386 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL" 387 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 388 { 389 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 390 const PetscScalar *x; 391 PetscScalar *y,*z; 392 const MatScalar *aa; 393 PetscErrorCode ierr; 394 PetscInt m=A->rmap->n; 395 const PetscInt *aj,*ai; 396 PetscInt i; 397 398 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 399 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 400 PetscScalar alpha = 1.0; 401 PetscScalar beta = 1.0; 402 char matdescra[6]; 403 404 PetscFunctionBegin; 405 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 406 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 407 408 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 409 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 410 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 411 aa = a->a; /* Nonzero elements stored row-by-row. */ 412 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 413 414 /* Call MKL sparse BLAS routine to do the MatMult. */ 415 if (zz == yy) { 416 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 417 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 418 } else { 419 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 420 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 421 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 422 for (i=0; i<m; i++) { 423 z[i] += y[i]; 424 } 425 } 426 427 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 428 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 429 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 433 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 434 #undef __FUNCT__ 435 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL_SpMV2" 436 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 437 { 438 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 439 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 440 const PetscScalar *x; 441 PetscScalar *y,*z; 442 const MatScalar *aa; 443 PetscErrorCode ierr; 444 PetscInt m=A->rmap->n; 445 const PetscInt *aj,*ai; 446 PetscInt i; 447 448 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 449 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 450 451 PetscFunctionBegin; 452 453 #ifdef DEBUG 454 printf("DEBUG: In MatMultTransposeAdd_SeqAIJMKL_SpMV2\n"); 455 #endif 456 457 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 458 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 459 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 460 aa = a->a; /* Nonzero elements stored row-by-row. */ 461 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 462 463 /* Call MKL sparse BLAS routine to do the MatMult. */ 464 if (zz == yy) { 465 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 466 * with alpha and beta both set to 1.0. */ 467 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y); 468 } else { 469 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 470 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 471 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 472 for (i=0; i<m; i++) { 473 z[i] += y[i]; 474 } 475 } 476 477 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 478 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 479 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 480 if (stat != SPARSE_STATUS_SUCCESS) { 481 PetscFunctionReturn(PETSC_ERR_LIB); 482 } 483 PetscFunctionReturn(0); 484 } 485 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 486 487 488 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 489 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 490 * routine, but can also be used to convert an assembled SeqAIJ matrix 491 * into a SeqAIJMKL one. */ 492 #undef __FUNCT__ 493 #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJMKL" 494 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 495 { 496 PetscErrorCode ierr; 497 Mat B = *newmat; 498 Mat_SeqAIJMKL *aijmkl; 499 PetscBool set; 500 501 PetscFunctionBegin; 502 if (reuse == MAT_INITIAL_MATRIX) { 503 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 504 } 505 506 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 507 B->spptr = (void*) aijmkl; 508 509 /* Set function pointers for methods that we inherit from AIJ but override. 510 * Currently the transposed operations are not being set because I encounter memory corruption 511 * when these are enabled. Need to look at this with Valgrind or similar. --RTM */ 512 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 513 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 514 B->ops->destroy = MatDestroy_SeqAIJMKL; 515 516 /* Parse command line options. */ 517 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 518 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 519 #elif 520 aijmkl->no_SpMV2 = PETSC_TRUE; 521 #endif 522 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 523 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 524 ierr = PetscOptionsEnd();CHKERRQ(ierr); 525 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 526 if(!aijmkl->no_SpMV2) { 527 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 528 aijmkl->no_SpMV2 = PETSC_TRUE; 529 } 530 #endif 531 532 if(!aijmkl->no_SpMV2) { 533 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 534 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 535 /* B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; */ 536 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 537 /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; */ 538 #endif 539 } else { 540 B->ops->mult = MatMult_SeqAIJMKL; 541 /* B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; */ 542 B->ops->multadd = MatMultAdd_SeqAIJMKL; 543 /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; */ 544 } 545 546 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 547 548 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 549 *newmat = B; 550 PetscFunctionReturn(0); 551 } 552 553 #undef __FUNCT__ 554 #define __FUNCT__ "MatCreateSeqAIJMKL" 555 /*@C 556 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 557 This type inherits from AIJ and is largely identical, but uses sparse BLAS 558 routines from Intel MKL whenever possible. 559 Collective on MPI_Comm 560 561 Input Parameters: 562 + comm - MPI communicator, set to PETSC_COMM_SELF 563 . m - number of rows 564 . n - number of columns 565 . nz - number of nonzeros per row (same for all rows) 566 - nnz - array containing the number of nonzeros in the various rows 567 (possibly different for each row) or NULL 568 569 Output Parameter: 570 . A - the matrix 571 572 Notes: 573 If nnz is given then nz is ignored 574 575 Level: intermediate 576 577 .keywords: matrix, cray, sparse, parallel 578 579 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 580 @*/ 581 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 582 { 583 PetscErrorCode ierr; 584 585 PetscFunctionBegin; 586 ierr = MatCreate(comm,A);CHKERRQ(ierr); 587 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 588 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 589 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 590 PetscFunctionReturn(0); 591 } 592 593 #undef __FUNCT__ 594 #define __FUNCT__ "MatCreate_SeqAIJMKL" 595 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 596 { 597 PetscErrorCode ierr; 598 599 PetscFunctionBegin; 600 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 601 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 602 PetscFunctionReturn(0); 603 } 604