1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 19 struct matrix_descr descr; 20 } Mat_SeqAIJMKL; 21 22 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 23 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatConvert_SeqAIJMKL_SeqAIJ" 26 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 27 { 28 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 29 /* so we will ignore 'MatType type'. */ 30 PetscErrorCode ierr; 31 Mat B = *newmat; 32 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 33 34 PetscFunctionBegin; 35 if (reuse == MAT_INITIAL_MATRIX) { 36 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 37 } 38 39 /* Reset the original function pointers. */ 40 B->ops->duplicate = MatDuplicate_SeqAIJ; 41 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 42 B->ops->destroy = MatDestroy_SeqAIJ; 43 B->ops->mult = MatMult_SeqAIJ; 44 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 45 B->ops->multadd = MatMultAdd_SeqAIJ; 46 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 47 48 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 49 * simply involves destroying the MKL sparse matrix handle. 50 * We don't free the Mat_SeqAIJMKL struct itself, as this will 51 * cause problems later when MatDestroy() tries to free it. */ 52 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 53 if (aijmkl->sparse_optimized) { 54 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 55 stat = mkl_sparse_destroy(aijmkl->csrA); 56 if (stat != SPARSE_STATUS_SUCCESS) { 57 PetscFunctionReturn(PETSC_ERR_LIB); 58 } 59 } 60 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 61 62 /* Change the type of B to MATSEQAIJ. */ 63 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 64 65 *newmat = B; 66 PetscFunctionReturn(0); 67 } 68 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatDestroy_SeqAIJMKL" 71 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 72 { 73 PetscErrorCode ierr; 74 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 75 76 PetscFunctionBegin; 77 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 78 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 79 if (aijmkl->sparse_optimized) { 80 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 81 stat = mkl_sparse_destroy(aijmkl->csrA); 82 if (stat != SPARSE_STATUS_SUCCESS) { 83 PetscFunctionReturn(PETSC_ERR_LIB); 84 } 85 } 86 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 87 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 88 89 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 90 * to destroy everything that remains. */ 91 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 92 /* Note that I don't call MatSetType(). I believe this is because that 93 * is only to be called when *building* a matrix. I could be wrong, but 94 * that is how things work for the SuperLU matrix class. */ 95 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatDuplicate_SeqAIJMKL" 101 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 102 { 103 PetscErrorCode ierr; 104 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 105 Mat_SeqAIJMKL *aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 106 107 PetscFunctionBegin; 108 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 109 ierr = PetscMemcpy((*M)->spptr,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 110 PetscFunctionReturn(0); 111 } 112 113 #undef __FUNCT__ 114 #define __FUNCT__ "MatAssemblyEnd_SeqAIJMKL" 115 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 116 { 117 PetscErrorCode ierr; 118 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 119 Mat_SeqAIJMKL *aijmkl; 120 121 MatScalar *aa; 122 PetscInt n; 123 PetscInt *aj,*ai; 124 PetscBool set; 125 126 PetscFunctionBegin; 127 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 128 129 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 130 * extra information and some different methods, call the AssemblyEnd 131 * routine for a MATSEQAIJ. 132 * I'm not sure if this is the best way to do this, but it avoids 133 * a lot of code duplication. 134 * I also note that currently MATSEQAIJMKL doesn't know anything about 135 * the Mat_CompressedRow data structure that SeqAIJ now uses when there 136 * are many zero rows. If the SeqAIJ assembly end routine decides to use 137 * this, this may break things. (Don't know... haven't looked at it. 138 * Do I need to disable this somehow?) */ 139 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 140 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 141 142 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 143 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 144 if (!aijmkl->no_SpMV2) { 145 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 146 /* Now perform the SpMV2 setup and matrix optimization. */ 147 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 148 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 149 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 150 n = A->rmap->n; 151 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 152 aa = a->a; /* Nonzero elements stored row-by-row. */ 153 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 154 stat = mkl_sparse_x_create_csr (&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,n,n,ai,ai+1,aj,aa); 155 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 156 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 157 stat = mkl_sparse_optimize(aijmkl->csrA); 158 if (stat != SPARSE_STATUS_SUCCESS) { 159 PetscFunctionReturn(PETSC_ERR_LIB); 160 } 161 aijmkl->sparse_optimized = PETSC_TRUE; 162 } 163 #endif 164 165 PetscFunctionReturn(0); 166 } 167 168 #undef __FUNCT__ 169 #define __FUNCT__ "MatMult_SeqAIJMKL" 170 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 171 { 172 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 173 const PetscScalar *x; 174 PetscScalar *y; 175 const MatScalar *aa; 176 PetscErrorCode ierr; 177 PetscInt m=A->rmap->n; 178 const PetscInt *aj,*ai; 179 PetscInt i; 180 181 /* Variables not in MatMult_SeqAIJ. */ 182 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 183 184 PetscFunctionBegin; 185 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 186 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 187 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 188 aa = a->a; /* Nonzero elements stored row-by-row. */ 189 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 190 191 /* Call MKL sparse BLAS routine to do the MatMult. */ 192 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 193 194 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 195 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 196 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 197 PetscFunctionReturn(0); 198 } 199 200 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 201 #undef __FUNCT__ 202 #define __FUNCT__ "MatMult_SeqAIJMKL_SpMV2" 203 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 204 { 205 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 206 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 207 const PetscScalar *x; 208 PetscScalar *y; 209 const MatScalar *aa; 210 PetscErrorCode ierr; 211 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 212 213 PetscFunctionBegin; 214 215 #ifdef DEBUG 216 printf("DEBUG: In MatMult_SeqAIJMKL_SpMV2\n"); 217 #endif 218 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 219 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 220 221 /* Call MKL SpMV2 executor routine to do the MatMult. */ 222 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 223 224 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 225 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 226 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 227 if (stat != SPARSE_STATUS_SUCCESS) { 228 PetscFunctionReturn(PETSC_ERR_LIB); 229 } 230 PetscFunctionReturn(0); 231 } 232 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 233 234 #undef __FUNCT__ 235 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL" 236 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 237 { 238 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 239 const PetscScalar *x; 240 PetscScalar *y; 241 const MatScalar *aa; 242 PetscErrorCode ierr; 243 PetscInt m=A->rmap->n; 244 const PetscInt *aj,*ai; 245 PetscInt i; 246 247 /* Variables not in MatMultTranspose_SeqAIJ. */ 248 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 249 250 PetscFunctionBegin; 251 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 252 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 253 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 254 aa = a->a; /* Nonzero elements stored row-by-row. */ 255 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 256 257 /* Call MKL sparse BLAS routine to do the MatMult. */ 258 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 259 260 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 261 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 262 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 263 PetscFunctionReturn(0); 264 } 265 266 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 267 #undef __FUNCT__ 268 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL_SpMV2" 269 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 270 { 271 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 272 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 273 const PetscScalar *x; 274 PetscScalar *y; 275 const MatScalar *aa; 276 PetscErrorCode ierr; 277 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 278 279 PetscFunctionBegin; 280 281 #ifdef DEBUG 282 printf("DEBUG: In MatMultTranspose_SeqAIJMKL_SpMV2\n"); 283 #endif 284 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 285 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 286 287 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 288 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 289 290 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 291 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 292 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 293 if (stat != SPARSE_STATUS_SUCCESS) { 294 PetscFunctionReturn(PETSC_ERR_LIB); 295 } 296 PetscFunctionReturn(0); 297 } 298 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 299 300 #undef __FUNCT__ 301 #define __FUNCT__ "MatMultAdd_SeqAIJMKL" 302 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 303 { 304 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 305 const PetscScalar *x; 306 PetscScalar *y,*z; 307 const MatScalar *aa; 308 PetscErrorCode ierr; 309 PetscInt m=A->rmap->n; 310 const PetscInt *aj,*ai; 311 PetscInt i; 312 313 /* Variables not in MatMultAdd_SeqAIJ. */ 314 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 315 PetscScalar alpha = 1.0; 316 PetscScalar beta = 1.0; 317 char matdescra[6]; 318 319 PetscFunctionBegin; 320 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 321 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 322 323 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 324 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 325 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 326 aa = a->a; /* Nonzero elements stored row-by-row. */ 327 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 328 329 /* Call MKL sparse BLAS routine to do the MatMult. */ 330 if (zz == yy) { 331 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 332 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 333 } else { 334 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 335 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 336 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 337 for (i=0; i<m; i++) { 338 z[i] += y[i]; 339 } 340 } 341 342 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 343 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 344 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 345 PetscFunctionReturn(0); 346 } 347 348 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 349 #undef __FUNCT__ 350 #define __FUNCT__ "MatMultAdd_SeqAIJMKL_SpMV2" 351 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 352 { 353 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 354 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 355 const PetscScalar *x; 356 PetscScalar *y,*z; 357 const MatScalar *aa; 358 PetscErrorCode ierr; 359 PetscInt m=A->rmap->n; 360 const PetscInt *aj,*ai; 361 PetscInt i; 362 363 /* Variables not in MatMultAdd_SeqAIJ. */ 364 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 365 366 PetscFunctionBegin; 367 368 #ifdef DEBUG 369 printf("DEBUG: In MatMultAdd_SeqAIJMKL_SpMV2\n"); 370 #endif 371 372 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 373 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 374 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 375 aa = a->a; /* Nonzero elements stored row-by-row. */ 376 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 377 378 /* Call MKL sparse BLAS routine to do the MatMult. */ 379 if (zz == yy) { 380 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 381 * with alpha and beta both set to 1.0. */ 382 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y); 383 } else { 384 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 385 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 386 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 387 for (i=0; i<m; i++) { 388 z[i] += y[i]; 389 } 390 } 391 392 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 393 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 394 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 395 if (stat != SPARSE_STATUS_SUCCESS) { 396 PetscFunctionReturn(PETSC_ERR_LIB); 397 } 398 PetscFunctionReturn(0); 399 } 400 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 401 402 #undef __FUNCT__ 403 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL" 404 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 405 { 406 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 407 const PetscScalar *x; 408 PetscScalar *y,*z; 409 const MatScalar *aa; 410 PetscErrorCode ierr; 411 PetscInt m=A->rmap->n; 412 const PetscInt *aj,*ai; 413 PetscInt i; 414 415 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 416 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 417 PetscScalar alpha = 1.0; 418 PetscScalar beta = 1.0; 419 char matdescra[6]; 420 421 PetscFunctionBegin; 422 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 423 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 424 425 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 426 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 427 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 428 aa = a->a; /* Nonzero elements stored row-by-row. */ 429 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 430 431 /* Call MKL sparse BLAS routine to do the MatMult. */ 432 if (zz == yy) { 433 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 434 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 435 } else { 436 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 437 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 438 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 439 for (i=0; i<m; i++) { 440 z[i] += y[i]; 441 } 442 } 443 444 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 445 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 446 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL_SpMV2" 453 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 454 { 455 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 456 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 457 const PetscScalar *x; 458 PetscScalar *y,*z; 459 const MatScalar *aa; 460 PetscErrorCode ierr; 461 PetscInt m=A->rmap->n; 462 const PetscInt *aj,*ai; 463 PetscInt i; 464 465 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 466 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 467 468 PetscFunctionBegin; 469 470 #ifdef DEBUG 471 printf("DEBUG: In MatMultTransposeAdd_SeqAIJMKL_SpMV2\n"); 472 #endif 473 474 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 475 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 476 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 477 aa = a->a; /* Nonzero elements stored row-by-row. */ 478 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 479 480 /* Call MKL sparse BLAS routine to do the MatMult. */ 481 if (zz == yy) { 482 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 483 * with alpha and beta both set to 1.0. */ 484 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y); 485 } else { 486 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 487 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 488 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 489 for (i=0; i<m; i++) { 490 z[i] += y[i]; 491 } 492 } 493 494 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 495 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 496 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 497 if (stat != SPARSE_STATUS_SUCCESS) { 498 PetscFunctionReturn(PETSC_ERR_LIB); 499 } 500 PetscFunctionReturn(0); 501 } 502 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 503 504 505 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 506 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 507 * routine, but can also be used to convert an assembled SeqAIJ matrix 508 * into a SeqAIJMKL one. */ 509 #undef __FUNCT__ 510 #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJMKL" 511 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 512 { 513 PetscErrorCode ierr; 514 Mat B = *newmat; 515 Mat_SeqAIJMKL *aijmkl; 516 PetscBool set; 517 518 PetscFunctionBegin; 519 if (reuse == MAT_INITIAL_MATRIX) { 520 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 521 } 522 523 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 524 B->spptr = (void*) aijmkl; 525 526 /* Set function pointers for methods that we inherit from AIJ but override. 527 * Currently the transposed operations are not being set because I encounter memory corruption 528 * when these are enabled. Need to look at this with Valgrind or similar. --RTM */ 529 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 530 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 531 B->ops->destroy = MatDestroy_SeqAIJMKL; 532 533 aijmkl->sparse_optimized = PETSC_FALSE; 534 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 535 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 536 #elif 537 aijmkl->no_SpMV2 = PETSC_TRUE; 538 #endif 539 540 /* Parse command line options. */ 541 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 542 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 543 ierr = PetscOptionsEnd();CHKERRQ(ierr); 544 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 545 if(!aijmkl->no_SpMV2) { 546 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 547 aijmkl->no_SpMV2 = PETSC_TRUE; 548 } 549 #endif 550 551 if(!aijmkl->no_SpMV2) { 552 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 553 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 554 /* B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; */ 555 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 556 /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; */ 557 #endif 558 } else { 559 B->ops->mult = MatMult_SeqAIJMKL; 560 /* B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; */ 561 B->ops->multadd = MatMultAdd_SeqAIJMKL; 562 /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; */ 563 } 564 565 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 566 567 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 568 *newmat = B; 569 PetscFunctionReturn(0); 570 } 571 572 #undef __FUNCT__ 573 #define __FUNCT__ "MatCreateSeqAIJMKL" 574 /*@C 575 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 576 This type inherits from AIJ and is largely identical, but uses sparse BLAS 577 routines from Intel MKL whenever possible. 578 Collective on MPI_Comm 579 580 Input Parameters: 581 + comm - MPI communicator, set to PETSC_COMM_SELF 582 . m - number of rows 583 . n - number of columns 584 . nz - number of nonzeros per row (same for all rows) 585 - nnz - array containing the number of nonzeros in the various rows 586 (possibly different for each row) or NULL 587 588 Output Parameter: 589 . A - the matrix 590 591 Notes: 592 If nnz is given then nz is ignored 593 594 Level: intermediate 595 596 .keywords: matrix, cray, sparse, parallel 597 598 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 599 @*/ 600 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 601 { 602 PetscErrorCode ierr; 603 604 PetscFunctionBegin; 605 ierr = MatCreate(comm,A);CHKERRQ(ierr); 606 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 607 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 608 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatCreate_SeqAIJMKL" 614 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 615 { 616 PetscErrorCode ierr; 617 618 PetscFunctionBegin; 619 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 620 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 621 PetscFunctionReturn(0); 622 } 623