1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 19 struct matrix_descr descr; 20 } Mat_SeqAIJMKL; 21 22 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 23 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatConvert_SeqAIJMKL_SeqAIJ" 26 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 27 { 28 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 29 /* so we will ignore 'MatType type'. */ 30 PetscErrorCode ierr; 31 Mat B = *newmat; 32 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 33 34 PetscFunctionBegin; 35 if (reuse == MAT_INITIAL_MATRIX) { 36 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 37 } 38 39 /* Reset the original function pointers. */ 40 B->ops->duplicate = MatDuplicate_SeqAIJ; 41 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 42 B->ops->destroy = MatDestroy_SeqAIJ; 43 B->ops->mult = MatMult_SeqAIJ; 44 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 45 B->ops->multadd = MatMultAdd_SeqAIJ; 46 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 47 48 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 49 * simply involves destroying the MKL sparse matrix handle. 50 * We don't free the Mat_SeqAIJMKL struct itself, as this will 51 * cause problems later when MatDestroy() tries to free it. */ 52 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 53 if (aijmkl->sparse_optimized) { 54 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 55 stat = mkl_sparse_destroy(aijmkl->csrA); 56 if (stat != SPARSE_STATUS_SUCCESS) { 57 PetscFunctionReturn(PETSC_ERR_LIB); 58 } 59 } 60 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 61 62 /* Change the type of B to MATSEQAIJ. */ 63 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 64 65 *newmat = B; 66 PetscFunctionReturn(0); 67 } 68 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatDestroy_SeqAIJMKL" 71 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 72 { 73 PetscErrorCode ierr; 74 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 75 76 PetscFunctionBegin; 77 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 78 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 79 if (aijmkl->sparse_optimized) { 80 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 81 stat = mkl_sparse_destroy(aijmkl->csrA); 82 if (stat != SPARSE_STATUS_SUCCESS) { 83 PetscFunctionReturn(PETSC_ERR_LIB); 84 } 85 } 86 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 87 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 88 89 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 90 * to destroy everything that remains. */ 91 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 92 /* Note that I don't call MatSetType(). I believe this is because that 93 * is only to be called when *building* a matrix. I could be wrong, but 94 * that is how things work for the SuperLU matrix class. */ 95 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatDuplicate_SeqAIJMKL" 101 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 102 { 103 PetscErrorCode ierr; 104 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 105 Mat_SeqAIJMKL *aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 106 107 PetscFunctionBegin; 108 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 109 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 110 PetscFunctionReturn(0); 111 } 112 113 #undef __FUNCT__ 114 #define __FUNCT__ "MatAssemblyEnd_SeqAIJMKL" 115 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 116 { 117 PetscErrorCode ierr; 118 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 119 Mat_SeqAIJMKL *aijmkl; 120 121 MatScalar *aa; 122 PetscInt n; 123 PetscInt *aj,*ai; 124 125 PetscFunctionBegin; 126 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 127 128 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 129 * extra information and some different methods, call the AssemblyEnd 130 * routine for a MATSEQAIJ. 131 * I'm not sure if this is the best way to do this, but it avoids 132 * a lot of code duplication. 133 * I also note that currently MATSEQAIJMKL doesn't know anything about 134 * the Mat_CompressedRow data structure that SeqAIJ now uses when there 135 * are many zero rows. If the SeqAIJ assembly end routine decides to use 136 * this, this may break things. (Don't know... haven't looked at it. 137 * Do I need to disable this somehow?) */ 138 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 139 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 140 141 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 142 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 143 if (!aijmkl->no_SpMV2) { 144 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 145 /* Now perform the SpMV2 setup and matrix optimization. */ 146 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 147 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 148 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 149 n = A->rmap->n; 150 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 151 aa = a->a; /* Nonzero elements stored row-by-row. */ 152 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 153 stat = mkl_sparse_x_create_csr (&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,n,n,ai,ai+1,aj,aa); 154 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 155 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 156 stat = mkl_sparse_optimize(aijmkl->csrA); 157 if (stat != SPARSE_STATUS_SUCCESS) { 158 PetscFunctionReturn(PETSC_ERR_LIB); 159 } 160 aijmkl->sparse_optimized = PETSC_TRUE; 161 } 162 #endif 163 164 PetscFunctionReturn(0); 165 } 166 167 #undef __FUNCT__ 168 #define __FUNCT__ "MatMult_SeqAIJMKL" 169 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 170 { 171 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 172 const PetscScalar *x; 173 PetscScalar *y; 174 const MatScalar *aa; 175 PetscErrorCode ierr; 176 PetscInt m=A->rmap->n; 177 const PetscInt *aj,*ai; 178 179 /* Variables not in MatMult_SeqAIJ. */ 180 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 181 182 PetscFunctionBegin; 183 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 184 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 185 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 186 aa = a->a; /* Nonzero elements stored row-by-row. */ 187 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 188 189 /* Call MKL sparse BLAS routine to do the MatMult. */ 190 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 191 192 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 193 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 194 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 195 PetscFunctionReturn(0); 196 } 197 198 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 199 #undef __FUNCT__ 200 #define __FUNCT__ "MatMult_SeqAIJMKL_SpMV2" 201 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 202 { 203 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 204 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 205 const PetscScalar *x; 206 PetscScalar *y; 207 PetscErrorCode ierr; 208 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 209 210 PetscFunctionBegin; 211 212 #ifdef DEBUG 213 printf("DEBUG: In MatMult_SeqAIJMKL_SpMV2\n"); 214 #endif 215 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 216 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 217 218 /* Call MKL SpMV2 executor routine to do the MatMult. */ 219 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 220 221 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 222 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 223 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 224 if (stat != SPARSE_STATUS_SUCCESS) { 225 PetscFunctionReturn(PETSC_ERR_LIB); 226 } 227 PetscFunctionReturn(0); 228 } 229 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 230 231 #undef __FUNCT__ 232 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL" 233 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 234 { 235 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 236 const PetscScalar *x; 237 PetscScalar *y; 238 const MatScalar *aa; 239 PetscErrorCode ierr; 240 PetscInt m=A->rmap->n; 241 const PetscInt *aj,*ai; 242 243 /* Variables not in MatMultTranspose_SeqAIJ. */ 244 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 245 246 PetscFunctionBegin; 247 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 248 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 249 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 250 aa = a->a; /* Nonzero elements stored row-by-row. */ 251 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 252 253 /* Call MKL sparse BLAS routine to do the MatMult. */ 254 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 255 256 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 257 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 258 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 259 PetscFunctionReturn(0); 260 } 261 262 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 263 #undef __FUNCT__ 264 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL_SpMV2" 265 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 266 { 267 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 268 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 269 const PetscScalar *x; 270 PetscScalar *y; 271 PetscErrorCode ierr; 272 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 273 274 PetscFunctionBegin; 275 276 #ifdef DEBUG 277 printf("DEBUG: In MatMultTranspose_SeqAIJMKL_SpMV2\n"); 278 #endif 279 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 280 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 281 282 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 283 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 284 285 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 286 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 287 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 288 if (stat != SPARSE_STATUS_SUCCESS) { 289 PetscFunctionReturn(PETSC_ERR_LIB); 290 } 291 PetscFunctionReturn(0); 292 } 293 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 294 295 #undef __FUNCT__ 296 #define __FUNCT__ "MatMultAdd_SeqAIJMKL" 297 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 298 { 299 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 300 const PetscScalar *x; 301 PetscScalar *y,*z; 302 const MatScalar *aa; 303 PetscErrorCode ierr; 304 PetscInt m=A->rmap->n; 305 const PetscInt *aj,*ai; 306 PetscInt i; 307 308 /* Variables not in MatMultAdd_SeqAIJ. */ 309 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 310 PetscScalar alpha = 1.0; 311 PetscScalar beta = 1.0; 312 char matdescra[6]; 313 314 PetscFunctionBegin; 315 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 316 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 317 318 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 319 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 320 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 321 aa = a->a; /* Nonzero elements stored row-by-row. */ 322 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 323 324 /* Call MKL sparse BLAS routine to do the MatMult. */ 325 if (zz == yy) { 326 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 327 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 328 } else { 329 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 330 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 331 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 332 for (i=0; i<m; i++) { 333 z[i] += y[i]; 334 } 335 } 336 337 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 338 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 339 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 340 PetscFunctionReturn(0); 341 } 342 343 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 344 #undef __FUNCT__ 345 #define __FUNCT__ "MatMultAdd_SeqAIJMKL_SpMV2" 346 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 347 { 348 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 349 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 350 const PetscScalar *x; 351 PetscScalar *y,*z; 352 PetscErrorCode ierr; 353 PetscInt m=A->rmap->n; 354 PetscInt i; 355 356 /* Variables not in MatMultAdd_SeqAIJ. */ 357 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 358 359 PetscFunctionBegin; 360 361 #ifdef DEBUG 362 printf("DEBUG: In MatMultAdd_SeqAIJMKL_SpMV2\n"); 363 #endif 364 365 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 366 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 367 368 /* Call MKL sparse BLAS routine to do the MatMult. */ 369 if (zz == yy) { 370 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 371 * with alpha and beta both set to 1.0. */ 372 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y); 373 } else { 374 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 375 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 376 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 377 for (i=0; i<m; i++) { 378 z[i] += y[i]; 379 } 380 } 381 382 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 383 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 384 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 385 if (stat != SPARSE_STATUS_SUCCESS) { 386 PetscFunctionReturn(PETSC_ERR_LIB); 387 } 388 PetscFunctionReturn(0); 389 } 390 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 391 392 #undef __FUNCT__ 393 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL" 394 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 395 { 396 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 397 const PetscScalar *x; 398 PetscScalar *y,*z; 399 const MatScalar *aa; 400 PetscErrorCode ierr; 401 PetscInt m=A->rmap->n; 402 const PetscInt *aj,*ai; 403 PetscInt i; 404 405 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 406 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 407 PetscScalar alpha = 1.0; 408 PetscScalar beta = 1.0; 409 char matdescra[6]; 410 411 PetscFunctionBegin; 412 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 413 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 414 415 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 416 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 417 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 418 aa = a->a; /* Nonzero elements stored row-by-row. */ 419 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 420 421 /* Call MKL sparse BLAS routine to do the MatMult. */ 422 if (zz == yy) { 423 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 424 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 425 } else { 426 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 427 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 428 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 429 for (i=0; i<m; i++) { 430 z[i] += y[i]; 431 } 432 } 433 434 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 435 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 436 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 437 PetscFunctionReturn(0); 438 } 439 440 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 441 #undef __FUNCT__ 442 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL_SpMV2" 443 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 444 { 445 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 446 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 447 const PetscScalar *x; 448 PetscScalar *y,*z; 449 PetscErrorCode ierr; 450 PetscInt m=A->rmap->n; 451 PetscInt i; 452 453 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 454 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 455 456 PetscFunctionBegin; 457 458 #ifdef DEBUG 459 printf("DEBUG: In MatMultTransposeAdd_SeqAIJMKL_SpMV2\n"); 460 #endif 461 462 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 463 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 464 465 /* Call MKL sparse BLAS routine to do the MatMult. */ 466 if (zz == yy) { 467 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 468 * with alpha and beta both set to 1.0. */ 469 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y); 470 } else { 471 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 472 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 473 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 474 for (i=0; i<m; i++) { 475 z[i] += y[i]; 476 } 477 } 478 479 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 480 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 481 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 482 if (stat != SPARSE_STATUS_SUCCESS) { 483 PetscFunctionReturn(PETSC_ERR_LIB); 484 } 485 PetscFunctionReturn(0); 486 } 487 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 488 489 490 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 491 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 492 * routine, but can also be used to convert an assembled SeqAIJ matrix 493 * into a SeqAIJMKL one. */ 494 #undef __FUNCT__ 495 #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJMKL" 496 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 497 { 498 PetscErrorCode ierr; 499 Mat B = *newmat; 500 Mat_SeqAIJMKL *aijmkl; 501 PetscBool set; 502 503 PetscFunctionBegin; 504 if (reuse == MAT_INITIAL_MATRIX) { 505 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 506 } 507 508 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 509 B->spptr = (void*) aijmkl; 510 511 /* Set function pointers for methods that we inherit from AIJ but override. 512 * Currently the transposed operations are not being set because I encounter memory corruption 513 * when these are enabled. Need to look at this with Valgrind or similar. --RTM */ 514 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 515 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 516 B->ops->destroy = MatDestroy_SeqAIJMKL; 517 518 aijmkl->sparse_optimized = PETSC_FALSE; 519 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 520 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 521 #elif 522 aijmkl->no_SpMV2 = PETSC_TRUE; 523 #endif 524 525 /* Parse command line options. */ 526 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 527 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 528 ierr = PetscOptionsEnd();CHKERRQ(ierr); 529 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 530 if(!aijmkl->no_SpMV2) { 531 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 532 aijmkl->no_SpMV2 = PETSC_TRUE; 533 } 534 #endif 535 536 if(!aijmkl->no_SpMV2) { 537 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 538 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 539 /* B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; */ 540 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 541 /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; */ 542 #endif 543 } else { 544 B->ops->mult = MatMult_SeqAIJMKL; 545 /* B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; */ 546 B->ops->multadd = MatMultAdd_SeqAIJMKL; 547 /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; */ 548 } 549 550 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 551 552 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 553 *newmat = B; 554 PetscFunctionReturn(0); 555 } 556 557 #undef __FUNCT__ 558 #define __FUNCT__ "MatCreateSeqAIJMKL" 559 /*@C 560 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 561 This type inherits from AIJ and is largely identical, but uses sparse BLAS 562 routines from Intel MKL whenever possible. 563 Collective on MPI_Comm 564 565 Input Parameters: 566 + comm - MPI communicator, set to PETSC_COMM_SELF 567 . m - number of rows 568 . n - number of columns 569 . nz - number of nonzeros per row (same for all rows) 570 - nnz - array containing the number of nonzeros in the various rows 571 (possibly different for each row) or NULL 572 573 Output Parameter: 574 . A - the matrix 575 576 Notes: 577 If nnz is given then nz is ignored 578 579 Level: intermediate 580 581 .keywords: matrix, cray, sparse, parallel 582 583 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 584 @*/ 585 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 586 { 587 PetscErrorCode ierr; 588 589 PetscFunctionBegin; 590 ierr = MatCreate(comm,A);CHKERRQ(ierr); 591 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 592 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 593 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatCreate_SeqAIJMKL" 599 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 600 { 601 PetscErrorCode ierr; 602 603 PetscFunctionBegin; 604 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 605 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 606 PetscFunctionReturn(0); 607 } 608