1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 19 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 20 struct matrix_descr descr; 21 #endif 22 } Mat_SeqAIJMKL; 23 24 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatConvert_SeqAIJMKL_SeqAIJ" 28 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 29 { 30 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 31 /* so we will ignore 'MatType type'. */ 32 PetscErrorCode ierr; 33 Mat B = *newmat; 34 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 35 36 PetscFunctionBegin; 37 if (reuse == MAT_INITIAL_MATRIX) { 38 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 39 } 40 41 /* Reset the original function pointers. */ 42 B->ops->duplicate = MatDuplicate_SeqAIJ; 43 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 44 B->ops->destroy = MatDestroy_SeqAIJ; 45 B->ops->mult = MatMult_SeqAIJ; 46 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 47 B->ops->multadd = MatMultAdd_SeqAIJ; 48 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 49 50 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 51 * simply involves destroying the MKL sparse matrix handle. 52 * We don't free the Mat_SeqAIJMKL struct itself, as this will 53 * cause problems later when MatDestroy() tries to free it. */ 54 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 55 if (aijmkl->sparse_optimized) { 56 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 57 stat = mkl_sparse_destroy(aijmkl->csrA); 58 if (stat != SPARSE_STATUS_SUCCESS) { 59 PetscFunctionReturn(PETSC_ERR_LIB); 60 } 61 } 62 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 63 64 /* Change the type of B to MATSEQAIJ. */ 65 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 66 67 *newmat = B; 68 PetscFunctionReturn(0); 69 } 70 71 #undef __FUNCT__ 72 #define __FUNCT__ "MatDestroy_SeqAIJMKL" 73 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 74 { 75 PetscErrorCode ierr; 76 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 77 78 PetscFunctionBegin; 79 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 80 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 81 if (aijmkl->sparse_optimized) { 82 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 83 stat = mkl_sparse_destroy(aijmkl->csrA); 84 if (stat != SPARSE_STATUS_SUCCESS) { 85 PetscFunctionReturn(PETSC_ERR_LIB); 86 } 87 } 88 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 89 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 90 91 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 92 * to destroy everything that remains. */ 93 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 94 /* Note that I don't call MatSetType(). I believe this is because that 95 * is only to be called when *building* a matrix. I could be wrong, but 96 * that is how things work for the SuperLU matrix class. */ 97 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDuplicate_SeqAIJMKL" 103 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 104 { 105 PetscErrorCode ierr; 106 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 107 Mat_SeqAIJMKL *aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 108 109 PetscFunctionBegin; 110 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 111 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 112 PetscFunctionReturn(0); 113 } 114 115 #undef __FUNCT__ 116 #define __FUNCT__ "MatAssemblyEnd_SeqAIJMKL" 117 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 118 { 119 PetscErrorCode ierr; 120 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 121 Mat_SeqAIJMKL *aijmkl; 122 123 MatScalar *aa; 124 PetscInt n; 125 PetscInt *aj,*ai; 126 127 PetscFunctionBegin; 128 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 129 130 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 131 * extra information and some different methods, call the AssemblyEnd 132 * routine for a MATSEQAIJ. 133 * I'm not sure if this is the best way to do this, but it avoids 134 * a lot of code duplication. 135 * I also note that currently MATSEQAIJMKL doesn't know anything about 136 * the Mat_CompressedRow data structure that SeqAIJ now uses when there 137 * are many zero rows. If the SeqAIJ assembly end routine decides to use 138 * this, this may break things. (Don't know... haven't looked at it. 139 * Do I need to disable this somehow?) */ 140 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 141 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 142 143 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 144 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 145 if (!aijmkl->no_SpMV2) { 146 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 147 /* Now perform the SpMV2 setup and matrix optimization. */ 148 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 149 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 150 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 151 n = A->rmap->n; 152 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 153 aa = a->a; /* Nonzero elements stored row-by-row. */ 154 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 155 stat = mkl_sparse_x_create_csr (&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,n,n,ai,ai+1,aj,aa); 156 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 157 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 158 stat = mkl_sparse_optimize(aijmkl->csrA); 159 if (stat != SPARSE_STATUS_SUCCESS) { 160 PetscFunctionReturn(PETSC_ERR_LIB); 161 } 162 aijmkl->sparse_optimized = PETSC_TRUE; 163 } 164 #endif 165 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatMult_SeqAIJMKL" 171 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 172 { 173 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 174 const PetscScalar *x; 175 PetscScalar *y; 176 const MatScalar *aa; 177 PetscErrorCode ierr; 178 PetscInt m=A->rmap->n; 179 const PetscInt *aj,*ai; 180 181 /* Variables not in MatMult_SeqAIJ. */ 182 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 183 184 PetscFunctionBegin; 185 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 186 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 187 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 188 aa = a->a; /* Nonzero elements stored row-by-row. */ 189 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 190 191 /* Call MKL sparse BLAS routine to do the MatMult. */ 192 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 193 194 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 195 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 196 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 197 PetscFunctionReturn(0); 198 } 199 200 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 201 #undef __FUNCT__ 202 #define __FUNCT__ "MatMult_SeqAIJMKL_SpMV2" 203 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 204 { 205 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 206 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 207 const PetscScalar *x; 208 PetscScalar *y; 209 PetscErrorCode ierr; 210 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 211 212 PetscFunctionBegin; 213 214 #ifdef DEBUG 215 printf("DEBUG: In MatMult_SeqAIJMKL_SpMV2\n"); 216 #endif 217 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 218 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 219 220 /* Call MKL SpMV2 executor routine to do the MatMult. */ 221 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 222 223 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 224 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 225 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 226 if (stat != SPARSE_STATUS_SUCCESS) { 227 PetscFunctionReturn(PETSC_ERR_LIB); 228 } 229 PetscFunctionReturn(0); 230 } 231 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 232 233 #undef __FUNCT__ 234 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL" 235 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 236 { 237 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 238 const PetscScalar *x; 239 PetscScalar *y; 240 const MatScalar *aa; 241 PetscErrorCode ierr; 242 PetscInt m=A->rmap->n; 243 const PetscInt *aj,*ai; 244 245 /* Variables not in MatMultTranspose_SeqAIJ. */ 246 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 247 248 PetscFunctionBegin; 249 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 250 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 251 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 252 aa = a->a; /* Nonzero elements stored row-by-row. */ 253 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 254 255 /* Call MKL sparse BLAS routine to do the MatMult. */ 256 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 257 258 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 259 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 260 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 261 PetscFunctionReturn(0); 262 } 263 264 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 265 #undef __FUNCT__ 266 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL_SpMV2" 267 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 268 { 269 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 270 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 271 const PetscScalar *x; 272 PetscScalar *y; 273 PetscErrorCode ierr; 274 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 275 276 PetscFunctionBegin; 277 278 #ifdef DEBUG 279 printf("DEBUG: In MatMultTranspose_SeqAIJMKL_SpMV2\n"); 280 #endif 281 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 282 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 283 284 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 285 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 286 287 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 288 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 289 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 290 if (stat != SPARSE_STATUS_SUCCESS) { 291 PetscFunctionReturn(PETSC_ERR_LIB); 292 } 293 PetscFunctionReturn(0); 294 } 295 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 296 297 #undef __FUNCT__ 298 #define __FUNCT__ "MatMultAdd_SeqAIJMKL" 299 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 300 { 301 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 302 const PetscScalar *x; 303 PetscScalar *y,*z; 304 const MatScalar *aa; 305 PetscErrorCode ierr; 306 PetscInt m=A->rmap->n; 307 const PetscInt *aj,*ai; 308 PetscInt i; 309 310 /* Variables not in MatMultAdd_SeqAIJ. */ 311 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 312 PetscScalar alpha = 1.0; 313 PetscScalar beta = 1.0; 314 char matdescra[6]; 315 316 PetscFunctionBegin; 317 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 318 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 319 320 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 321 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 322 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 323 aa = a->a; /* Nonzero elements stored row-by-row. */ 324 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 325 326 /* Call MKL sparse BLAS routine to do the MatMult. */ 327 if (zz == yy) { 328 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 329 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 330 } else { 331 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 332 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 333 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 334 for (i=0; i<m; i++) { 335 z[i] += y[i]; 336 } 337 } 338 339 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 340 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 341 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 346 #undef __FUNCT__ 347 #define __FUNCT__ "MatMultAdd_SeqAIJMKL_SpMV2" 348 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 349 { 350 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 351 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 352 const PetscScalar *x; 353 PetscScalar *y,*z; 354 PetscErrorCode ierr; 355 PetscInt m=A->rmap->n; 356 PetscInt i; 357 358 /* Variables not in MatMultAdd_SeqAIJ. */ 359 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 360 361 PetscFunctionBegin; 362 363 #ifdef DEBUG 364 printf("DEBUG: In MatMultAdd_SeqAIJMKL_SpMV2\n"); 365 #endif 366 367 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 368 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 369 370 /* Call MKL sparse BLAS routine to do the MatMult. */ 371 if (zz == yy) { 372 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 373 * with alpha and beta both set to 1.0. */ 374 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y); 375 } else { 376 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 377 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 378 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 379 for (i=0; i<m; i++) { 380 z[i] += y[i]; 381 } 382 } 383 384 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 385 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 386 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 387 if (stat != SPARSE_STATUS_SUCCESS) { 388 PetscFunctionReturn(PETSC_ERR_LIB); 389 } 390 PetscFunctionReturn(0); 391 } 392 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 393 394 #undef __FUNCT__ 395 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL" 396 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 397 { 398 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 399 const PetscScalar *x; 400 PetscScalar *y,*z; 401 const MatScalar *aa; 402 PetscErrorCode ierr; 403 PetscInt m=A->rmap->n; 404 const PetscInt *aj,*ai; 405 PetscInt i; 406 407 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 408 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 409 PetscScalar alpha = 1.0; 410 PetscScalar beta = 1.0; 411 char matdescra[6]; 412 413 PetscFunctionBegin; 414 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 415 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 416 417 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 418 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 419 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 420 aa = a->a; /* Nonzero elements stored row-by-row. */ 421 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 422 423 /* Call MKL sparse BLAS routine to do the MatMult. */ 424 if (zz == yy) { 425 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 426 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 427 } else { 428 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 429 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 430 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 431 for (i=0; i<m; i++) { 432 z[i] += y[i]; 433 } 434 } 435 436 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 437 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 438 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 439 PetscFunctionReturn(0); 440 } 441 442 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL_SpMV2" 445 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 446 { 447 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 448 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 449 const PetscScalar *x; 450 PetscScalar *y,*z; 451 PetscErrorCode ierr; 452 PetscInt m=A->rmap->n; 453 PetscInt i; 454 455 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 456 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 457 458 PetscFunctionBegin; 459 460 #ifdef DEBUG 461 printf("DEBUG: In MatMultTransposeAdd_SeqAIJMKL_SpMV2\n"); 462 #endif 463 464 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 465 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 466 467 /* Call MKL sparse BLAS routine to do the MatMult. */ 468 if (zz == yy) { 469 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 470 * with alpha and beta both set to 1.0. */ 471 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y); 472 } else { 473 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 474 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 475 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 476 for (i=0; i<m; i++) { 477 z[i] += y[i]; 478 } 479 } 480 481 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 482 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 483 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 484 if (stat != SPARSE_STATUS_SUCCESS) { 485 PetscFunctionReturn(PETSC_ERR_LIB); 486 } 487 PetscFunctionReturn(0); 488 } 489 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 490 491 492 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 493 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 494 * routine, but can also be used to convert an assembled SeqAIJ matrix 495 * into a SeqAIJMKL one. */ 496 #undef __FUNCT__ 497 #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJMKL" 498 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 499 { 500 PetscErrorCode ierr; 501 Mat B = *newmat; 502 Mat_SeqAIJMKL *aijmkl; 503 PetscBool set; 504 505 PetscFunctionBegin; 506 if (reuse == MAT_INITIAL_MATRIX) { 507 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 508 } 509 510 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 511 B->spptr = (void*) aijmkl; 512 513 /* Set function pointers for methods that we inherit from AIJ but override. 514 * Currently the transposed operations are not being set because I encounter memory corruption 515 * when these are enabled. Need to look at this with Valgrind or similar. --RTM */ 516 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 517 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 518 B->ops->destroy = MatDestroy_SeqAIJMKL; 519 520 aijmkl->sparse_optimized = PETSC_FALSE; 521 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 522 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 523 #elif 524 aijmkl->no_SpMV2 = PETSC_TRUE; 525 #endif 526 527 /* Parse command line options. */ 528 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 529 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 530 ierr = PetscOptionsEnd();CHKERRQ(ierr); 531 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 532 if(!aijmkl->no_SpMV2) { 533 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 534 aijmkl->no_SpMV2 = PETSC_TRUE; 535 } 536 #endif 537 538 if(!aijmkl->no_SpMV2) { 539 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 540 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 541 /* B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; */ 542 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 543 /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; */ 544 #endif 545 } else { 546 B->ops->mult = MatMult_SeqAIJMKL; 547 /* B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; */ 548 B->ops->multadd = MatMultAdd_SeqAIJMKL; 549 /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; */ 550 } 551 552 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 553 554 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 555 *newmat = B; 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatCreateSeqAIJMKL" 561 /*@C 562 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 563 This type inherits from AIJ and is largely identical, but uses sparse BLAS 564 routines from Intel MKL whenever possible. 565 Collective on MPI_Comm 566 567 Input Parameters: 568 + comm - MPI communicator, set to PETSC_COMM_SELF 569 . m - number of rows 570 . n - number of columns 571 . nz - number of nonzeros per row (same for all rows) 572 - nnz - array containing the number of nonzeros in the various rows 573 (possibly different for each row) or NULL 574 575 Output Parameter: 576 . A - the matrix 577 578 Notes: 579 If nnz is given then nz is ignored 580 581 Level: intermediate 582 583 .keywords: matrix, cray, sparse, parallel 584 585 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 586 @*/ 587 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 588 { 589 PetscErrorCode ierr; 590 591 PetscFunctionBegin; 592 ierr = MatCreate(comm,A);CHKERRQ(ierr); 593 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 594 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 595 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 596 PetscFunctionReturn(0); 597 } 598 599 #undef __FUNCT__ 600 #define __FUNCT__ "MatCreate_SeqAIJMKL" 601 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 602 { 603 PetscErrorCode ierr; 604 605 PetscFunctionBegin; 606 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 607 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 608 PetscFunctionReturn(0); 609 } 610