1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 17 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 18 struct matrix_descr descr; 19 } Mat_SeqAIJMKL; 20 21 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatConvert_SeqAIJMKL_SeqAIJ" 25 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 26 { 27 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 28 /* so we will ignore 'MatType type'. */ 29 PetscErrorCode ierr; 30 Mat B = *newmat; 31 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 32 33 PetscFunctionBegin; 34 if (reuse == MAT_INITIAL_MATRIX) { 35 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 36 } 37 38 /* Reset the original function pointers. */ 39 B->ops->duplicate = MatDuplicate_SeqAIJ; 40 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 41 B->ops->destroy = MatDestroy_SeqAIJ; 42 B->ops->mult = MatMult_SeqAIJ; 43 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 44 B->ops->multadd = MatMultAdd_SeqAIJ; 45 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 46 47 /* Free everything in the Mat_SeqAIJMKL data structure. 48 * We don't free the Mat_SeqAIJMKL struct itself, as this will 49 * cause problems later when MatDestroy() tries to free it. */ 50 /* Actually there is nothing to do here right now. 51 * When I've added use of the MKL SpMV2 inspector-executor routines, I should 52 * see if there is some way to clean up the "handle" used by SpMV2. */ 53 54 /* Change the type of B to MATSEQAIJ. */ 55 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 56 57 *newmat = B; 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatDestroy_SeqAIJMKL" 63 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 64 { 65 PetscErrorCode ierr; 66 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 67 68 PetscFunctionBegin; 69 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 70 mkl_sparse_destroy(aijmkl->csrA); 71 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 72 73 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 74 * to destroy everything that remains. */ 75 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 76 /* Note that I don't call MatSetType(). I believe this is because that 77 * is only to be called when *building* a matrix. I could be wrong, but 78 * that is how things work for the SuperLU matrix class. */ 79 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 80 PetscFunctionReturn(0); 81 } 82 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatDuplicate_SeqAIJMKL" 85 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 86 { 87 PetscErrorCode ierr; 88 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 89 Mat_SeqAIJMKL *aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 90 91 PetscFunctionBegin; 92 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 93 ierr = PetscMemcpy((*M)->spptr,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 97 #undef __FUNCT__ 98 #define __FUNCT__ "MatAssemblyEnd_SeqAIJMKL" 99 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 100 { 101 PetscErrorCode ierr; 102 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 103 Mat_SeqAIJMKL *aijmkl; 104 105 MatScalar *aa; 106 PetscInt n; 107 PetscInt *aj,*ai; 108 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 109 PetscBool set; 110 111 PetscFunctionBegin; 112 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 113 114 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 115 * extra information and some different methods, call the AssemblyEnd 116 * routine for a MATSEQAIJ. 117 * I'm not sure if this is the best way to do this, but it avoids 118 * a lot of code duplication. 119 * I also note that currently MATSEQAIJMKL doesn't know anything about 120 * the Mat_CompressedRow data structure that SeqAIJ now uses when there 121 * are many zero rows. If the SeqAIJ assembly end routine decides to use 122 * this, this may break things. (Don't know... haven't looked at it. 123 * Do I need to disable this somehow?) */ 124 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 125 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 126 127 aijmkl = (Mat_SeqAIJMKL*) A->spptr; 128 if (!aijmkl->no_SpMV2) { 129 /* Now perform the SpMV2 setup and matrix optimization. */ 130 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 131 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 132 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 133 n = A->rmap->n; 134 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 135 aa = a->a; /* Nonzero elements stored row-by-row. */ 136 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 137 stat = mkl_sparse_x_create_csr (&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,n,n,ai,ai+1,aj,aa); 138 stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000); 139 stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE); 140 stat = mkl_sparse_optimize(aijmkl->csrA); 141 if (stat != SPARSE_STATUS_SUCCESS) { 142 PetscFunctionReturn(PETSC_ERR_LIB); 143 } 144 } 145 146 PetscFunctionReturn(0); 147 } 148 149 #undef __FUNCT__ 150 #define __FUNCT__ "MatMult_SeqAIJMKL" 151 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 152 { 153 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 154 const PetscScalar *x; 155 PetscScalar *y; 156 const MatScalar *aa; 157 PetscErrorCode ierr; 158 PetscInt m=A->rmap->n; 159 const PetscInt *aj,*ai; 160 PetscInt i; 161 162 /* Variables not in MatMult_SeqAIJ. */ 163 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 164 165 PetscFunctionBegin; 166 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 167 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 168 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 169 aa = a->a; /* Nonzero elements stored row-by-row. */ 170 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 171 172 /* Call MKL sparse BLAS routine to do the MatMult. */ 173 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 174 175 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 176 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 177 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 178 PetscFunctionReturn(0); 179 } 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatMult_SeqAIJMKL_SpMV2" 183 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 184 { 185 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 186 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 187 const PetscScalar *x; 188 PetscScalar *y; 189 const MatScalar *aa; 190 PetscErrorCode ierr; 191 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 192 193 PetscFunctionBegin; 194 195 #ifdef DEBUG 196 printf("DEBUG: In MatMult_SeqAIJMKL_SpMV2\n"); 197 #endif 198 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 199 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 200 201 /* Call MKL SpMV2 executor routine to do the MatMult. */ 202 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 203 204 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 205 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 206 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 207 if (stat != SPARSE_STATUS_SUCCESS) { 208 PetscFunctionReturn(PETSC_ERR_LIB); 209 } 210 PetscFunctionReturn(0); 211 } 212 213 #undef __FUNCT__ 214 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL" 215 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy) 216 { 217 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 218 const PetscScalar *x; 219 PetscScalar *y; 220 const MatScalar *aa; 221 PetscErrorCode ierr; 222 PetscInt m=A->rmap->n; 223 const PetscInt *aj,*ai; 224 PetscInt i; 225 226 /* Variables not in MatMultTranspose_SeqAIJ. */ 227 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 228 229 PetscFunctionBegin; 230 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 231 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 232 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 233 aa = a->a; /* Nonzero elements stored row-by-row. */ 234 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 235 236 /* Call MKL sparse BLAS routine to do the MatMult. */ 237 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 238 239 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 240 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 241 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 242 PetscFunctionReturn(0); 243 } 244 245 #undef __FUNCT__ 246 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL_SpMV2" 247 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy) 248 { 249 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 250 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 251 const PetscScalar *x; 252 PetscScalar *y; 253 const MatScalar *aa; 254 PetscErrorCode ierr; 255 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 256 257 PetscFunctionBegin; 258 259 #ifdef DEBUG 260 printf("DEBUG: In MatMultTranspose_SeqAIJMKL_SpMV2\n"); 261 #endif 262 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 263 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 264 265 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 266 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 267 268 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 269 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 270 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 271 if (stat != SPARSE_STATUS_SUCCESS) { 272 PetscFunctionReturn(PETSC_ERR_LIB); 273 } 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatMultAdd_SeqAIJMKL" 279 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 280 { 281 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 282 const PetscScalar *x; 283 PetscScalar *y,*z; 284 const MatScalar *aa; 285 PetscErrorCode ierr; 286 PetscInt m=A->rmap->n; 287 const PetscInt *aj,*ai; 288 PetscInt i; 289 290 /* Variables not in MatMultAdd_SeqAIJ. */ 291 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 292 PetscScalar alpha = 1.0; 293 PetscScalar beta = 1.0; 294 char matdescra[6]; 295 296 PetscFunctionBegin; 297 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 298 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 299 300 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 301 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 302 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 303 aa = a->a; /* Nonzero elements stored row-by-row. */ 304 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 305 306 /* Call MKL sparse BLAS routine to do the MatMult. */ 307 if (zz == yy) { 308 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 309 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 310 } else { 311 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 312 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 313 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 314 for (i=0; i<m; i++) { 315 z[i] += y[i]; 316 } 317 } 318 319 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 320 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 321 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 322 PetscFunctionReturn(0); 323 } 324 325 #undef __FUNCT__ 326 #define __FUNCT__ "MatMultAdd_SeqAIJMKL_SpMV2" 327 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 328 { 329 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 330 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 331 const PetscScalar *x; 332 PetscScalar *y,*z; 333 const MatScalar *aa; 334 PetscErrorCode ierr; 335 PetscInt m=A->rmap->n; 336 const PetscInt *aj,*ai; 337 PetscInt i; 338 339 /* Variables not in MatMultAdd_SeqAIJ. */ 340 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 341 342 PetscFunctionBegin; 343 344 #ifdef DEBUG 345 printf("DEBUG: In MatMultAdd_SeqAIJMKL_SpMV2\n"); 346 #endif 347 348 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 349 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 350 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 351 aa = a->a; /* Nonzero elements stored row-by-row. */ 352 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 353 354 /* Call MKL sparse BLAS routine to do the MatMult. */ 355 if (zz == yy) { 356 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 357 * with alpha and beta both set to 1.0. */ 358 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y); 359 } else { 360 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 361 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 362 stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 363 for (i=0; i<m; i++) { 364 z[i] += y[i]; 365 } 366 } 367 368 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 369 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 370 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 371 if (stat != SPARSE_STATUS_SUCCESS) { 372 PetscFunctionReturn(PETSC_ERR_LIB); 373 } 374 PetscFunctionReturn(0); 375 } 376 377 #undef __FUNCT__ 378 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL" 379 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 380 { 381 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 382 const PetscScalar *x; 383 PetscScalar *y,*z; 384 const MatScalar *aa; 385 PetscErrorCode ierr; 386 PetscInt m=A->rmap->n; 387 const PetscInt *aj,*ai; 388 PetscInt i; 389 390 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 391 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 392 PetscScalar alpha = 1.0; 393 PetscScalar beta = 1.0; 394 char matdescra[6]; 395 396 PetscFunctionBegin; 397 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 398 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 399 400 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 401 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 402 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 403 aa = a->a; /* Nonzero elements stored row-by-row. */ 404 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 405 406 /* Call MKL sparse BLAS routine to do the MatMult. */ 407 if (zz == yy) { 408 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 409 mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y); 410 } else { 411 /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then 412 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 413 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 414 for (i=0; i<m; i++) { 415 z[i] += y[i]; 416 } 417 } 418 419 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 420 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 421 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 422 PetscFunctionReturn(0); 423 } 424 425 #undef __FUNCT__ 426 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL_SpMV2" 427 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz) 428 { 429 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 430 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 431 const PetscScalar *x; 432 PetscScalar *y,*z; 433 const MatScalar *aa; 434 PetscErrorCode ierr; 435 PetscInt m=A->rmap->n; 436 const PetscInt *aj,*ai; 437 PetscInt i; 438 439 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 440 sparse_status_t stat = SPARSE_STATUS_SUCCESS; 441 442 PetscFunctionBegin; 443 444 #ifdef DEBUG 445 printf("DEBUG: In MatMultTransposeAdd_SeqAIJMKL_SpMV2\n"); 446 #endif 447 448 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 449 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 450 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 451 aa = a->a; /* Nonzero elements stored row-by-row. */ 452 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 453 454 /* Call MKL sparse BLAS routine to do the MatMult. */ 455 if (zz == yy) { 456 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 457 * with alpha and beta both set to 1.0. */ 458 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y); 459 } else { 460 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 461 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 462 stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y); 463 for (i=0; i<m; i++) { 464 z[i] += y[i]; 465 } 466 } 467 468 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 469 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 470 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 471 if (stat != SPARSE_STATUS_SUCCESS) { 472 PetscFunctionReturn(PETSC_ERR_LIB); 473 } 474 PetscFunctionReturn(0); 475 } 476 477 478 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 479 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 480 * routine, but can also be used to convert an assembled SeqAIJ matrix 481 * into a SeqAIJMKL one. */ 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJMKL" 484 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 485 { 486 PetscErrorCode ierr; 487 Mat B = *newmat; 488 Mat_SeqAIJMKL *aijmkl; 489 PetscBool set; 490 491 PetscFunctionBegin; 492 if (reuse == MAT_INITIAL_MATRIX) { 493 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 494 } 495 496 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 497 B->spptr = (void*) aijmkl; 498 499 /* Set function pointers for methods that we inherit from AIJ but override. 500 * Currently the transposed operations are not being set because I encounter memory corruption 501 * when these are enabled. Need to look at this with Valgrind or similar. --RTM */ 502 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 503 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 504 B->ops->destroy = MatDestroy_SeqAIJMKL; 505 506 /* Parse command line options. */ 507 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines. */ 508 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr); 509 ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr); 510 ierr = PetscOptionsEnd();CHKERRQ(ierr); 511 512 if(!aijmkl->no_SpMV2) { 513 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 514 /* B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; */ 515 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 516 /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; */ 517 } else { 518 B->ops->mult = MatMult_SeqAIJMKL; 519 /* B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; */ 520 B->ops->multadd = MatMultAdd_SeqAIJMKL; 521 /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; */ 522 } 523 524 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 525 526 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 527 *newmat = B; 528 PetscFunctionReturn(0); 529 } 530 531 #undef __FUNCT__ 532 #define __FUNCT__ "MatCreateSeqAIJMKL" 533 /*@C 534 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 535 This type inherits from AIJ and is largely identical, but uses sparse BLAS 536 routines from Intel MKL whenever possible. 537 Collective on MPI_Comm 538 539 Input Parameters: 540 + comm - MPI communicator, set to PETSC_COMM_SELF 541 . m - number of rows 542 . n - number of columns 543 . nz - number of nonzeros per row (same for all rows) 544 - nnz - array containing the number of nonzeros in the various rows 545 (possibly different for each row) or NULL 546 547 Output Parameter: 548 . A - the matrix 549 550 Notes: 551 If nnz is given then nz is ignored 552 553 Level: intermediate 554 555 .keywords: matrix, cray, sparse, parallel 556 557 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 558 @*/ 559 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 560 { 561 PetscErrorCode ierr; 562 563 PetscFunctionBegin; 564 ierr = MatCreate(comm,A);CHKERRQ(ierr); 565 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 566 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 567 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 568 PetscFunctionReturn(0); 569 } 570 571 #undef __FUNCT__ 572 #define __FUNCT__ "MatCreate_SeqAIJMKL" 573 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 574 { 575 PetscErrorCode ierr; 576 577 PetscFunctionBegin; 578 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 579 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 580 PetscFunctionReturn(0); 581 } 582