1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 18 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 19 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 20 struct matrix_descr descr; 21 #endif 22 } Mat_SeqAIJMKL; 23 24 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 25 26 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 27 { 28 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 29 /* so we will ignore 'MatType type'. */ 30 PetscErrorCode ierr; 31 Mat B = *newmat; 32 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 33 34 PetscFunctionBegin; 35 if (reuse == MAT_INITIAL_MATRIX) { 36 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 37 aijmkl = (Mat_SeqAIJMKL*)B->spptr; 38 } 39 40 /* Reset the original function pointers. */ 41 B->ops->duplicate = MatDuplicate_SeqAIJ; 42 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 43 B->ops->destroy = MatDestroy_SeqAIJ; 44 B->ops->mult = MatMult_SeqAIJ; 45 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 46 B->ops->multadd = MatMultAdd_SeqAIJ; 47 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 48 49 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr); 50 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 51 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 52 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr); 53 54 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 55 * simply involves destroying the MKL sparse matrix handle and then freeing 56 * the spptr pointer. */ 57 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 58 if (aijmkl->sparse_optimized) { 59 sparse_status_t stat; 60 stat = mkl_sparse_destroy(aijmkl->csrA); 61 if (stat != SPARSE_STATUS_SUCCESS) { 62 PetscFunctionReturn(PETSC_ERR_LIB); 63 } 64 } 65 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 66 ierr = PetscFree(B->spptr);CHKERRQ(ierr); 67 68 /* Change the type of B to MATSEQAIJ. */ 69 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 70 71 *newmat = B; 72 PetscFunctionReturn(0); 73 } 74 75 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 76 { 77 PetscErrorCode ierr; 78 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 79 80 PetscFunctionBegin; 81 82 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 83 * spptr pointer. */ 84 if (aijmkl) { 85 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 86 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 87 if (aijmkl->sparse_optimized) { 88 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 89 stat = mkl_sparse_destroy(aijmkl->csrA); 90 if (stat != SPARSE_STATUS_SUCCESS) { 91 PetscFunctionReturn(PETSC_ERR_LIB); 92 } 93 } 94 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 95 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 96 } 97 98 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 99 * to destroy everything that remains. */ 100 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 101 /* Note that I don't call MatSetType(). I believe this is because that 102 * is only to be called when *building* a matrix. I could be wrong, but 103 * that is how things work for the SuperLU matrix class. */ 104 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 109 { 110 PetscErrorCode ierr; 111 Mat_SeqAIJMKL *aijmkl; 112 Mat_SeqAIJMKL *aijmkl_dest; 113 114 PetscFunctionBegin; 115 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 116 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 117 aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 118 ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 119 aijmkl_dest->sparse_optimized = PETSC_FALSE; 120 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 121 aijmkl_dest->csrA = NULL; 122 if (!aijmkl->no_SpMV2 && A->cmap->n > 0) { 123 sparse_status_t stat; 124 stat = mkl_sparse_copy(aijmkl->csrA,aijmkl->descr,&aijmkl_dest->csrA); 125 if (stat != SPARSE_STATUS_SUCCESS) { 126 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_copy"); 127 PetscFunctionReturn(PETSC_ERR_LIB); 128 } 129 stat = mkl_sparse_optimize(aijmkl_dest->csrA); 130 if (stat != SPARSE_STATUS_SUCCESS) { 131 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize"); 132 PetscFunctionReturn(PETSC_ERR_LIB); 133 } 134 aijmkl_dest->sparse_optimized = PETSC_TRUE; 135 } 136 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 141 { 142 PetscErrorCode ierr; 143 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 144 Mat_SeqAIJMKL *aijmkl; 145 146 MatScalar *aa; 147 PetscInt m,n; 148 PetscInt *aj,*ai; 149 150 PetscFunctionBegin; 151 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 152 153 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 154 * extra information and some different methods, call the AssemblyEnd 155 * routine for a MATSEQAIJ. 156 * I'm not sure if this is the best way to do this, but it avoids 157 * a lot of code duplication. 158 * I also note that currently MATSEQAIJMKL doesn't know anything about 159 * the Mat_CompressedRow data structure that SeqAIJ now uses when there 160 * are many zero rows. If the SeqAIJ assembly end routine decides to use 161 * this, this may break things. (Don't know... haven't looked at it. 162 * Do I need to disable this somehow?) */ 163 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 164 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 165 166 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 167 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 168 if (!aijmkl->no_SpMV2) { 169 sparse_status_t stat; 170 if (aijmkl->sparse_optimized) { 171 /* Matrix has been previously assembled and optimized. Must destroy old 172 * matrix handle before running the optimization step again. */ 173 sparse_status_t stat; 174 stat = mkl_sparse_destroy(aijmkl->csrA); 175 if (stat != SPARSE_STATUS_SUCCESS) { 176 PetscFunctionReturn(PETSC_ERR_LIB); 177 } 178 } 179 /* Now perform the SpMV2 setup and matrix optimization. */ 180 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 181 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 182 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 183 m = A->rmap->n; 184 n = A->cmap->n; 185 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 186 aa = a->a; /* Nonzero elements stored row-by-row. */ 187 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 188 if (n>0) { 189 stat = mkl_sparse_x_create_csr (&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa); 190 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 191 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 192 stat = mkl_sparse_optimize(aijmkl->csrA); 193 if (stat != SPARSE_STATUS_SUCCESS) { 194 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize"); 195 PetscFunctionReturn(PETSC_ERR_LIB); 196 } 197 aijmkl->sparse_optimized = PETSC_TRUE; 198 } 199 } 200 #endif 201 202 PetscFunctionReturn(0); 203 } 204 205 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 206 { 207 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 208 const PetscScalar *x; 209 PetscScalar *y; 210 const MatScalar *aa; 211 PetscErrorCode ierr; 212 PetscInt m=A->rmap->n; 213 const PetscInt *aj,*ai; 214 215 /* Variables not in MatMult_SeqAIJ. */ 216 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 217 218 PetscFunctionBegin; 219 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 220 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 221 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 222 aa = a->a; /* Nonzero elements stored row-by-row. */ 223 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 224 225 /* Call MKL sparse BLAS routine to do the MatMult. */ 226 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 227 228 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 229 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 230 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 231 PetscFunctionReturn(0); 232 } 233 234 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 235 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 236 { 237 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 238 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 239 const PetscScalar *x; 240 PetscScalar *y; 241 PetscErrorCode ierr; 242 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 243 244 PetscFunctionBegin; 245 246 /* If there are no rows, this is a no-op: return immediately. */ 247 if(A->cmap->n < 1) PetscFunctionReturn(0); 248 249 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 250 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 251 252 /* Call MKL SpMV2 executor routine to do the MatMult. */ 253 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 254 255 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 256 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 257 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 258 if (stat != SPARSE_STATUS_SUCCESS) { 259 PetscFunctionReturn(PETSC_ERR_LIB); 260 } 261 PetscFunctionReturn(0); 262 } 263 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 264 265 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 266 { 267 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 268 const PetscScalar *x; 269 PetscScalar *y; 270 const MatScalar *aa; 271 PetscErrorCode ierr; 272 PetscInt m=A->rmap->n; 273 const PetscInt *aj,*ai; 274 275 /* Variables not in MatMultTranspose_SeqAIJ. */ 276 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 277 278 PetscFunctionBegin; 279 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 280 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 281 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 282 aa = a->a; /* Nonzero elements stored row-by-row. */ 283 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 284 285 /* Call MKL sparse BLAS routine to do the MatMult. */ 286 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 287 288 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 289 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 290 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 291 PetscFunctionReturn(0); 292 } 293 294 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 295 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 296 { 297 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 298 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 299 const PetscScalar *x; 300 PetscScalar *y; 301 PetscErrorCode ierr; 302 sparse_status_t stat; 303 304 PetscFunctionBegin; 305 306 /* If there are no rows, this is a no-op: return immediately. */ 307 if(A->cmap->n < 1) PetscFunctionReturn(0); 308 309 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 310 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 311 312 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 313 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 314 315 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 316 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 317 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 318 if (stat != SPARSE_STATUS_SUCCESS) { 319 PetscFunctionReturn(PETSC_ERR_LIB); 320 } 321 PetscFunctionReturn(0); 322 } 323 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 324 325 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 326 { 327 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 328 const PetscScalar *x; 329 PetscScalar *y,*z; 330 const MatScalar *aa; 331 PetscErrorCode ierr; 332 PetscInt m=A->rmap->n; 333 const PetscInt *aj,*ai; 334 PetscInt i; 335 336 /* Variables not in MatMultAdd_SeqAIJ. */ 337 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 338 PetscScalar alpha = 1.0; 339 PetscScalar beta = 1.0; 340 char matdescra[6]; 341 342 PetscFunctionBegin; 343 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 344 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 345 346 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 347 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 348 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 349 aa = a->a; /* Nonzero elements stored row-by-row. */ 350 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 351 352 /* Call MKL sparse BLAS routine to do the MatMult. */ 353 if (zz == yy) { 354 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 355 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 356 } else { 357 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 358 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 359 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 360 for (i=0; i<m; i++) { 361 z[i] += y[i]; 362 } 363 } 364 365 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 366 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 367 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 368 PetscFunctionReturn(0); 369 } 370 371 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 372 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 373 { 374 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 375 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 376 const PetscScalar *x; 377 PetscScalar *y,*z; 378 PetscErrorCode ierr; 379 PetscInt m=A->rmap->n; 380 PetscInt i; 381 382 /* Variables not in MatMultAdd_SeqAIJ. */ 383 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 384 385 PetscFunctionBegin; 386 387 /* If there are no rows, this is a no-op: return immediately. */ 388 if(A->cmap->n < 1) PetscFunctionReturn(0); 389 390 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 391 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 392 393 /* Call MKL sparse BLAS routine to do the MatMult. */ 394 if (zz == yy) { 395 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 396 * with alpha and beta both set to 1.0. */ 397 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y); 398 } else { 399 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 400 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 401 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 402 for (i=0; i<m; i++) { 403 z[i] += y[i]; 404 } 405 } 406 407 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 408 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 409 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 410 if (stat != SPARSE_STATUS_SUCCESS) { 411 PetscFunctionReturn(PETSC_ERR_LIB); 412 } 413 PetscFunctionReturn(0); 414 } 415 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 416 417 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 418 { 419 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 420 const PetscScalar *x; 421 PetscScalar *y,*z; 422 const MatScalar *aa; 423 PetscErrorCode ierr; 424 PetscInt m=A->rmap->n; 425 const PetscInt *aj,*ai; 426 PetscInt i; 427 428 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 429 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 430 PetscScalar alpha = 1.0; 431 PetscScalar beta = 1.0; 432 char matdescra[6]; 433 434 PetscFunctionBegin; 435 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 436 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 437 438 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 439 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 440 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 441 aa = a->a; /* Nonzero elements stored row-by-row. */ 442 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 443 444 /* Call MKL sparse BLAS routine to do the MatMult. */ 445 if (zz == yy) { 446 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 447 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 448 } else { 449 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 450 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 451 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 452 for (i=0; i<m; i++) { 453 z[i] += y[i]; 454 } 455 } 456 457 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 458 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 459 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 460 PetscFunctionReturn(0); 461 } 462 463 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 464 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 465 { 466 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 467 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 468 const PetscScalar *x; 469 PetscScalar *y,*z; 470 PetscErrorCode ierr; 471 PetscInt m=A->rmap->n; 472 PetscInt i; 473 474 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 475 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 476 477 PetscFunctionBegin; 478 479 /* If there are no rows, this is a no-op: return immediately. */ 480 if(A->cmap->n < 1) PetscFunctionReturn(0); 481 482 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 483 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 484 485 /* Call MKL sparse BLAS routine to do the MatMult. */ 486 if (zz == yy) { 487 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 488 * with alpha and beta both set to 1.0. */ 489 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y); 490 } else { 491 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 492 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 493 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 494 for (i=0; i<m; i++) { 495 z[i] += y[i]; 496 } 497 } 498 499 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 500 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 501 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 502 if (stat != SPARSE_STATUS_SUCCESS) { 503 PetscFunctionReturn(PETSC_ERR_LIB); 504 } 505 PetscFunctionReturn(0); 506 } 507 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 508 509 510 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 511 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 512 * routine, but can also be used to convert an assembled SeqAIJ matrix 513 * into a SeqAIJMKL one. */ 514 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 515 { 516 PetscErrorCode ierr; 517 Mat B = *newmat; 518 Mat_SeqAIJMKL *aijmkl; 519 PetscBool set; 520 PetscBool sametype; 521 522 PetscFunctionBegin; 523 if (reuse == MAT_INITIAL_MATRIX) { 524 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 525 } 526 527 ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr); 528 if (sametype) PetscFunctionReturn(0); 529 530 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 531 B->spptr = (void*) aijmkl; 532 533 /* Set function pointers for methods that we inherit from AIJ but override. 534 * We also parse some command line options below, since those determine some of the methods we point to. 535 * Note: Currently the transposed operations are not being set because I encounter memory corruption 536 * when these are enabled. Need to look at this with Valgrind or similar. --RTM */ 537 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 538 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 539 B->ops->destroy = MatDestroy_SeqAIJMKL; 540 541 aijmkl->sparse_optimized = PETSC_FALSE; 542 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 543 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 544 #elif 545 aijmkl->no_SpMV2 = PETSC_TRUE; 546 #endif 547 548 /* Parse command line options. */ 549 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 550 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 551 ierr = PetscOptionsEnd();CHKERRQ(ierr); 552 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 553 if(!aijmkl->no_SpMV2) { 554 ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"); 555 aijmkl->no_SpMV2 = PETSC_TRUE; 556 } 557 #endif 558 559 if(!aijmkl->no_SpMV2) { 560 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 561 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 562 /* B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; */ 563 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 564 /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; */ 565 #endif 566 } else { 567 B->ops->mult = MatMult_SeqAIJMKL; 568 /* B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; */ 569 B->ops->multadd = MatMultAdd_SeqAIJMKL; 570 /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; */ 571 } 572 573 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr); 577 578 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 579 *newmat = B; 580 PetscFunctionReturn(0); 581 } 582 583 /*@C 584 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 585 This type inherits from AIJ and is largely identical, but uses sparse BLAS 586 routines from Intel MKL whenever possible. 587 Collective on MPI_Comm 588 589 Input Parameters: 590 + comm - MPI communicator, set to PETSC_COMM_SELF 591 . m - number of rows 592 . n - number of columns 593 . nz - number of nonzeros per row (same for all rows) 594 - nnz - array containing the number of nonzeros in the various rows 595 (possibly different for each row) or NULL 596 597 Output Parameter: 598 . A - the matrix 599 600 Notes: 601 If nnz is given then nz is ignored 602 603 Level: intermediate 604 605 .keywords: matrix, cray, sparse, parallel 606 607 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 608 @*/ 609 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 610 { 611 PetscErrorCode ierr; 612 613 PetscFunctionBegin; 614 ierr = MatCreate(comm,A);CHKERRQ(ierr); 615 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 616 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 617 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 618 PetscFunctionReturn(0); 619 } 620 621 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 622 { 623 PetscErrorCode ierr; 624 625 PetscFunctionBegin; 626 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 627 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630