1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 #define USE_MKL_SPMV2 1 13 /* TODO: Eventually fix the above--I shouldn't hard code things like this! 14 * Use of MKL SpMV2 should eventually be determined at configure time, run time, or 15 * it should just always be used -- not sure what makes sense yet! --RTM */ 16 17 /* MKL include files. */ 18 #include <mkl_spblas.h> /* Sparse BLAS */ 19 20 typedef struct { 21 PetscBool use_SpMV2; /* If PETSC_TRUE, then use the MKL SpMV2 inspector-executor routines. */ 22 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 23 struct matrix_descr descr; 24 } Mat_SeqAIJMKL; 25 26 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatConvert_SeqAIJMKL_SeqAIJ" 30 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 31 { 32 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 33 /* so we will ignore 'MatType type'. */ 34 PetscErrorCode ierr; 35 Mat B = *newmat; 36 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 37 38 PetscFunctionBegin; 39 if (reuse == MAT_INITIAL_MATRIX) { 40 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 41 } 42 43 /* Reset the original function pointers. */ 44 B->ops->duplicate = MatDuplicate_SeqAIJ; 45 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 46 B->ops->destroy = MatDestroy_SeqAIJ; 47 B->ops->mult = MatMult_SeqAIJ; 48 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 49 B->ops->multadd = MatMultAdd_SeqAIJ; 50 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 51 52 /* Free everything in the Mat_SeqAIJMKL data structure. 53 * We don't free the Mat_SeqAIJMKL struct itself, as this will 54 * cause problems later when MatDestroy() tries to free it. */ 55 /* Actually there is nothing to do here right now. 56 * When I've added use of the MKL SpMV2 inspector-executor routines, I should 57 * see if there is some way to clean up the "handle" used by SpMV2. */ 58 59 /* Change the type of B to MATSEQAIJ. */ 60 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 61 62 *newmat = B; 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatDestroy_SeqAIJMKL" 68 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 69 { 70 PetscErrorCode ierr; 71 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 72 73 PetscFunctionBegin; 74 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 75 mkl_sparse_destroy(aijmkl->csrA); 76 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 77 78 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 79 * to destroy everything that remains. */ 80 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 81 /* Note that I don't call MatSetType(). I believe this is because that 82 * is only to be called when *building* a matrix. I could be wrong, but 83 * that is how things work for the SuperLU matrix class. */ 84 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 85 PetscFunctionReturn(0); 86 } 87 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatDuplicate_SeqAIJMKL" 90 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 91 { 92 PetscErrorCode ierr; 93 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 94 Mat_SeqAIJMKL *aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 95 96 PetscFunctionBegin; 97 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 98 ierr = PetscMemcpy((*M)->spptr,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 99 PetscFunctionReturn(0); 100 } 101 102 #undef __FUNCT__ 103 #define __FUNCT__ "MatAssemblyEnd_SeqAIJMKL" 104 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 105 { 106 PetscErrorCode ierr; 107 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 108 Mat_SeqAIJMKL *aijmkl; 109 110 MatScalar *aa; 111 PetscInt n; 112 PetscInt *aj,*ai; 113 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 114 115 PetscFunctionBegin; 116 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 117 118 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 119 * extra information and some different methods, call the AssemblyEnd 120 * routine for a MATSEQAIJ. 121 * I'm not sure if this is the best way to do this, but it avoids 122 * a lot of code duplication. 123 * I also note that currently MATSEQAIJMKL doesn't know anything about 124 * the Mat_CompressedRow data structure that SeqAIJ now uses when there 125 * are many zero rows. If the SeqAIJ assembly end routine decides to use 126 * this, this may break things. (Don't know... haven't looked at it. 127 * Do I need to disable this somehow?) */ 128 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 129 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 130 131 #ifdef USE_MKL_SPMV2 132 /* Now perform the SpMV2 setup and matrix optimization. */ 133 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 134 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 135 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 136 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 137 n = A->rmap->n; 138 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 139 aa = a->a; /* Nonzero elements stored row-by-row. */ 140 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 141 stat = mkl_sparse_x_create_csr (&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,n,n,ai,ai+1,aj,aa); 142 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 143 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 144 stat = mkl_sparse_optimize(aijmkl->csrA); 145 if (stat != SPARSE_STATUS_SUCCESS) { 146 PetscFunctionReturn(PETSC_ERR_LIB); 147 } 148 #endif /* USE_MKL_SPMV2 */ 149 150 PetscFunctionReturn(0); 151 } 152 153 #undef __FUNCT__ 154 #define __FUNCT__ "MatMult_SeqAIJMKL" 155 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 156 { 157 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 158 const PetscScalar *x; 159 PetscScalar *y; 160 const MatScalar *aa; 161 PetscErrorCode ierr; 162 PetscInt m=A->rmap->n; 163 const PetscInt *aj,*ai; 164 PetscInt i; 165 166 /* Variables not in MatMult_SeqAIJ. */ 167 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 168 169 PetscFunctionBegin; 170 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 171 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 172 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 173 aa = a->a; /* Nonzero elements stored row-by-row. */ 174 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 175 176 /* Call MKL sparse BLAS routine to do the MatMult. */ 177 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 178 179 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 180 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 181 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 182 PetscFunctionReturn(0); 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatMult_SeqAIJMKL_SpMV2" 187 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 188 { 189 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 190 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 191 const PetscScalar *x; 192 PetscScalar *y; 193 const MatScalar *aa; 194 PetscErrorCode ierr; 195 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 196 197 PetscFunctionBegin; 198 199 #ifdef DEBUG 200 printf("DEBUG: In MatMult_SeqAIJMKL_SpMV2\n"); 201 #endif 202 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 203 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 204 205 /* Call MKL SpMV2 executor routine to do the MatMult. */ 206 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 207 208 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 209 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 210 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 211 if (stat != SPARSE_STATUS_SUCCESS) { 212 PetscFunctionReturn(PETSC_ERR_LIB); 213 } 214 PetscFunctionReturn(0); 215 } 216 217 #undef __FUNCT__ 218 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL" 219 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 220 { 221 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 222 const PetscScalar *x; 223 PetscScalar *y; 224 const MatScalar *aa; 225 PetscErrorCode ierr; 226 PetscInt m=A->rmap->n; 227 const PetscInt *aj,*ai; 228 PetscInt i; 229 230 /* Variables not in MatMultTranspose_SeqAIJ. */ 231 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 232 233 PetscFunctionBegin; 234 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 235 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 236 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 237 aa = a->a; /* Nonzero elements stored row-by-row. */ 238 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 239 240 /* Call MKL sparse BLAS routine to do the MatMult. */ 241 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 242 243 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 244 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 245 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 246 PetscFunctionReturn(0); 247 } 248 249 #undef __FUNCT__ 250 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL_SpMV2" 251 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 252 { 253 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 254 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 255 const PetscScalar *x; 256 PetscScalar *y; 257 const MatScalar *aa; 258 PetscErrorCode ierr; 259 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 260 261 PetscFunctionBegin; 262 263 #ifdef DEBUG 264 printf("DEBUG: In MatMultTranspose_SeqAIJMKL_SpMV2\n"); 265 #endif 266 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 267 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 268 269 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 270 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 271 272 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 273 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 274 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 275 if (stat != SPARSE_STATUS_SUCCESS) { 276 PetscFunctionReturn(PETSC_ERR_LIB); 277 } 278 PetscFunctionReturn(0); 279 } 280 281 #undef __FUNCT__ 282 #define __FUNCT__ "MatMultAdd_SeqAIJMKL" 283 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 284 { 285 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 286 const PetscScalar *x; 287 PetscScalar *y,*z; 288 const MatScalar *aa; 289 PetscErrorCode ierr; 290 PetscInt m=A->rmap->n; 291 const PetscInt *aj,*ai; 292 PetscInt i; 293 294 /* Variables not in MatMultAdd_SeqAIJ. */ 295 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 296 PetscScalar alpha = 1.0; 297 PetscScalar beta = 1.0; 298 char matdescra[6]; 299 300 PetscFunctionBegin; 301 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 302 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 303 304 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 305 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 306 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 307 aa = a->a; /* Nonzero elements stored row-by-row. */ 308 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 309 310 /* Call MKL sparse BLAS routine to do the MatMult. */ 311 if (zz == yy) { 312 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 313 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 314 } else { 315 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 316 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 317 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 318 for (i=0; i<m; i++) { 319 z[i] += y[i]; 320 } 321 } 322 323 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 324 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 325 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 326 PetscFunctionReturn(0); 327 } 328 329 #undef __FUNCT__ 330 #define __FUNCT__ "MatMultAdd_SeqAIJMKL_SpMV2" 331 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 332 { 333 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 334 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 335 const PetscScalar *x; 336 PetscScalar *y,*z; 337 const MatScalar *aa; 338 PetscErrorCode ierr; 339 PetscInt m=A->rmap->n; 340 const PetscInt *aj,*ai; 341 PetscInt i; 342 343 /* Variables not in MatMultAdd_SeqAIJ. */ 344 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 345 346 PetscFunctionBegin; 347 348 #ifdef DEBUG 349 printf("DEBUG: In MatMultAdd_SeqAIJMKL_SpMV2\n"); 350 #endif 351 352 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 353 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 354 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 355 aa = a->a; /* Nonzero elements stored row-by-row. */ 356 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 357 358 /* Call MKL sparse BLAS routine to do the MatMult. */ 359 if (zz == yy) { 360 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 361 * with alpha and beta both set to 1.0. */ 362 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y); 363 } else { 364 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 365 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 366 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 367 for (i=0; i<m; i++) { 368 z[i] += y[i]; 369 } 370 } 371 372 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 373 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 374 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 375 if (stat != SPARSE_STATUS_SUCCESS) { 376 PetscFunctionReturn(PETSC_ERR_LIB); 377 } 378 PetscFunctionReturn(0); 379 } 380 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL" 383 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 384 { 385 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 386 const PetscScalar *x; 387 PetscScalar *y,*z; 388 const MatScalar *aa; 389 PetscErrorCode ierr; 390 PetscInt m=A->rmap->n; 391 const PetscInt *aj,*ai; 392 PetscInt i; 393 394 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 395 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 396 PetscScalar alpha = 1.0; 397 PetscScalar beta = 1.0; 398 char matdescra[6]; 399 400 PetscFunctionBegin; 401 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 402 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 403 404 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 405 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 406 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 407 aa = a->a; /* Nonzero elements stored row-by-row. */ 408 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 409 410 /* Call MKL sparse BLAS routine to do the MatMult. */ 411 if (zz == yy) { 412 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 413 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 414 } else { 415 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 416 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 417 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 418 for (i=0; i<m; i++) { 419 z[i] += y[i]; 420 } 421 } 422 423 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 424 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 425 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL_SpMV2" 431 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 432 { 433 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 434 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 435 const PetscScalar *x; 436 PetscScalar *y,*z; 437 const MatScalar *aa; 438 PetscErrorCode ierr; 439 PetscInt m=A->rmap->n; 440 const PetscInt *aj,*ai; 441 PetscInt i; 442 443 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 444 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 445 446 PetscFunctionBegin; 447 448 #ifdef DEBUG 449 printf("DEBUG: In MatMultTransposeAdd_SeqAIJMKL_SpMV2\n"); 450 #endif 451 452 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 453 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 454 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 455 aa = a->a; /* Nonzero elements stored row-by-row. */ 456 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 457 458 /* Call MKL sparse BLAS routine to do the MatMult. */ 459 if (zz == yy) { 460 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 461 * with alpha and beta both set to 1.0. */ 462 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y); 463 } else { 464 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 465 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 466 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 467 for (i=0; i<m; i++) { 468 z[i] += y[i]; 469 } 470 } 471 472 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 473 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 474 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 475 if (stat != SPARSE_STATUS_SUCCESS) { 476 PetscFunctionReturn(PETSC_ERR_LIB); 477 } 478 PetscFunctionReturn(0); 479 } 480 481 482 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 483 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 484 * routine, but can also be used to convert an assembled SeqAIJ matrix 485 * into a SeqAIJMKL one. */ 486 #undef __FUNCT__ 487 #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJMKL" 488 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 489 { 490 PetscErrorCode ierr; 491 Mat B = *newmat; 492 Mat_SeqAIJMKL *aijmkl; 493 494 PetscFunctionBegin; 495 if (reuse == MAT_INITIAL_MATRIX) { 496 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 497 } 498 499 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 500 B->spptr = (void*) aijmkl; 501 502 /* Set function pointers for methods that we inherit from AIJ but override. 503 * Currently the transposed operations are not being set because I encounter memory corruption 504 * when these are enabled. Need to look at this with Valgrind or similar. --RTM */ 505 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 506 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 507 B->ops->destroy = MatDestroy_SeqAIJMKL; 508 #ifdef USE_MKL_SPMV2 509 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 510 /* B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; */ 511 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 512 /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; */ 513 #else 514 B->ops->mult = MatMult_SeqAIJMKL; 515 // B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 516 B->ops->multadd = MatMultAdd_SeqAIJMKL; 517 // B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 518 #endif /* USE_MKL_SPMV2 */ 519 520 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 521 522 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 523 *newmat = B; 524 PetscFunctionReturn(0); 525 } 526 527 #undef __FUNCT__ 528 #define __FUNCT__ "MatCreateSeqAIJMKL" 529 /*@C 530 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 531 This type inherits from AIJ and is largely identical, but uses sparse BLAS 532 routines from Intel MKL whenever possible. 533 Collective on MPI_Comm 534 535 Input Parameters: 536 + comm - MPI communicator, set to PETSC_COMM_SELF 537 . m - number of rows 538 . n - number of columns 539 . nz - number of nonzeros per row (same for all rows) 540 - nnz - array containing the number of nonzeros in the various rows 541 (possibly different for each row) or NULL 542 543 Output Parameter: 544 . A - the matrix 545 546 Notes: 547 If nnz is given then nz is ignored 548 549 Level: intermediate 550 551 .keywords: matrix, cray, sparse, parallel 552 553 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 554 @*/ 555 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 556 { 557 PetscErrorCode ierr; 558 559 PetscFunctionBegin; 560 ierr = MatCreate(comm,A);CHKERRQ(ierr); 561 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 562 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 563 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 564 PetscFunctionReturn(0); 565 } 566 567 #undef __FUNCT__ 568 #define __FUNCT__ "MatCreate_SeqAIJMKL" 569 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 570 { 571 PetscErrorCode ierr; 572 573 PetscFunctionBegin; 574 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 575 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 576 PetscFunctionReturn(0); 577 } 578