1*4a2a386eSRichard Tran Mills /* 2*4a2a386eSRichard Tran Mills Defines basic operations for the MATSEQAIJMKL matrix class. 3*4a2a386eSRichard Tran Mills This class is derived from the MATSEQAIJ class and retains the 4*4a2a386eSRichard Tran Mills compressed row storage (aka Yale sparse matrix format) but uses 5*4a2a386eSRichard Tran Mills sparse BLAS operations from the Intel Math Kernel Library (MKL) 6*4a2a386eSRichard Tran Mills wherever possible. 7*4a2a386eSRichard Tran Mills */ 8*4a2a386eSRichard Tran Mills 9*4a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h> 10*4a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11*4a2a386eSRichard Tran Mills 12*4a2a386eSRichard Tran Mills /* MKL include files. */ 13*4a2a386eSRichard Tran Mills #include <mkl_spblas.h> /* Sparse BLAS */ 14*4a2a386eSRichard Tran Mills 15*4a2a386eSRichard Tran Mills typedef struct { 16*4a2a386eSRichard Tran Mills /* "Handle" used by SpMV2 inspector-executor routines. */ 17*4a2a386eSRichard Tran Mills sparse_matrix_t csrA; 18*4a2a386eSRichard Tran Mills } Mat_SeqAIJMKL; 19*4a2a386eSRichard Tran Mills 20*4a2a386eSRichard Tran Mills extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 21*4a2a386eSRichard Tran Mills 22*4a2a386eSRichard Tran Mills #undef __FUNCT__ 23*4a2a386eSRichard Tran Mills #define __FUNCT__ "MatConvert_SeqAIJMKL_SeqAIJ" 24*4a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 25*4a2a386eSRichard Tran Mills { 26*4a2a386eSRichard Tran Mills /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 27*4a2a386eSRichard Tran Mills /* so we will ignore 'MatType type'. */ 28*4a2a386eSRichard Tran Mills PetscErrorCode ierr; 29*4a2a386eSRichard Tran Mills Mat B = *newmat; 30*4a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 31*4a2a386eSRichard Tran Mills 32*4a2a386eSRichard Tran Mills PetscFunctionBegin; 33*4a2a386eSRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 34*4a2a386eSRichard Tran Mills ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 35*4a2a386eSRichard Tran Mills } 36*4a2a386eSRichard Tran Mills 37*4a2a386eSRichard Tran Mills /* Reset the original function pointers. */ 38*4a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 39*4a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJ; 40*4a2a386eSRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJ; 41*4a2a386eSRichard Tran Mills 42*4a2a386eSRichard Tran Mills /* Free everything in the Mat_SeqAIJMKL data structure. 43*4a2a386eSRichard Tran Mills * We don't free the Mat_SeqAIJMKL struct itself, as this will 44*4a2a386eSRichard Tran Mills * cause problems later when MatDestroy() tries to free it. */ 45*4a2a386eSRichard Tran Mills /* Actually there is nothing to do here right now. 46*4a2a386eSRichard Tran Mills * When I've added use of the MKL SpMV2 inspector-executor routines, I should 47*4a2a386eSRichard Tran Mills * see if there is some way to clean up the "handle" used by SpMV2. */ 48*4a2a386eSRichard Tran Mills 49*4a2a386eSRichard Tran Mills /* Change the type of B to MATSEQAIJ. */ 50*4a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 51*4a2a386eSRichard Tran Mills 52*4a2a386eSRichard Tran Mills *newmat = B; 53*4a2a386eSRichard Tran Mills PetscFunctionReturn(0); 54*4a2a386eSRichard Tran Mills } 55*4a2a386eSRichard Tran Mills 56*4a2a386eSRichard Tran Mills #undef __FUNCT__ 57*4a2a386eSRichard Tran Mills #define __FUNCT__ "MatDestroy_SeqAIJMKL" 58*4a2a386eSRichard Tran Mills PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 59*4a2a386eSRichard Tran Mills { 60*4a2a386eSRichard Tran Mills PetscErrorCode ierr; 61*4a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 62*4a2a386eSRichard Tran Mills 63*4a2a386eSRichard Tran Mills PetscFunctionBegin; 64*4a2a386eSRichard Tran Mills /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 65*4a2a386eSRichard Tran Mills mkl_sparse_destroy(aijmkl->csrA); 66*4a2a386eSRichard Tran Mills ierr = PetscFree(A->spptr);CHKERRQ(ierr); 67*4a2a386eSRichard Tran Mills 68*4a2a386eSRichard Tran Mills /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 69*4a2a386eSRichard Tran Mills * to destroy everything that remains. */ 70*4a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 71*4a2a386eSRichard Tran Mills /* Note that I don't call MatSetType(). I believe this is because that 72*4a2a386eSRichard Tran Mills * is only to be called when *building* a matrix. I could be wrong, but 73*4a2a386eSRichard Tran Mills * that is how things work for the SuperLU matrix class. */ 74*4a2a386eSRichard Tran Mills ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 75*4a2a386eSRichard Tran Mills PetscFunctionReturn(0); 76*4a2a386eSRichard Tran Mills } 77*4a2a386eSRichard Tran Mills 78*4a2a386eSRichard Tran Mills #undef __FUNCT__ 79*4a2a386eSRichard Tran Mills #define __FUNCT__ "MatDuplicate_SeqAIJMKL" 80*4a2a386eSRichard Tran Mills PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 81*4a2a386eSRichard Tran Mills { 82*4a2a386eSRichard Tran Mills PetscErrorCode ierr; 83*4a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 84*4a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 85*4a2a386eSRichard Tran Mills 86*4a2a386eSRichard Tran Mills PetscFunctionBegin; 87*4a2a386eSRichard Tran Mills ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 88*4a2a386eSRichard Tran Mills ierr = PetscMemcpy((*M)->spptr,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 89*4a2a386eSRichard Tran Mills PetscFunctionReturn(0); 90*4a2a386eSRichard Tran Mills } 91*4a2a386eSRichard Tran Mills 92*4a2a386eSRichard Tran Mills #undef __FUNCT__ 93*4a2a386eSRichard Tran Mills #define __FUNCT__ "MatAssemblyEnd_SeqAIJMKL" 94*4a2a386eSRichard Tran Mills PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 95*4a2a386eSRichard Tran Mills { 96*4a2a386eSRichard Tran Mills PetscErrorCode ierr; 97*4a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 98*4a2a386eSRichard Tran Mills 99*4a2a386eSRichard Tran Mills PetscFunctionBegin; 100*4a2a386eSRichard Tran Mills if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 101*4a2a386eSRichard Tran Mills 102*4a2a386eSRichard Tran Mills /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 103*4a2a386eSRichard Tran Mills * extra information and some different methods, call the AssemblyEnd 104*4a2a386eSRichard Tran Mills * routine for a MATSEQAIJ. 105*4a2a386eSRichard Tran Mills * I'm not sure if this is the best way to do this, but it avoids 106*4a2a386eSRichard Tran Mills * a lot of code duplication. 107*4a2a386eSRichard Tran Mills * I also note that currently MATSEQAIJMKL doesn't know anything about 108*4a2a386eSRichard Tran Mills * the Mat_CompressedRow data structure that SeqAIJ now uses when there 109*4a2a386eSRichard Tran Mills * are many zero rows. If the SeqAIJ assembly end routine decides to use 110*4a2a386eSRichard Tran Mills * this, this may break things. (Don't know... haven't looked at it. 111*4a2a386eSRichard Tran Mills * Do I need to disable this somehow?) */ 112*4a2a386eSRichard Tran Mills a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 113*4a2a386eSRichard Tran Mills ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 114*4a2a386eSRichard Tran Mills 115*4a2a386eSRichard Tran Mills PetscFunctionReturn(0); 116*4a2a386eSRichard Tran Mills } 117*4a2a386eSRichard Tran Mills 118*4a2a386eSRichard Tran Mills #undef __FUNCT__ 119*4a2a386eSRichard Tran Mills #define __FUNCT__ "MatMult_SeqAIJMKL" 120*4a2a386eSRichard Tran Mills PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 121*4a2a386eSRichard Tran Mills { 122*4a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 123*4a2a386eSRichard Tran Mills const PetscScalar *x; 124*4a2a386eSRichard Tran Mills PetscScalar *y; 125*4a2a386eSRichard Tran Mills const MatScalar *aa; 126*4a2a386eSRichard Tran Mills PetscErrorCode ierr; 127*4a2a386eSRichard Tran Mills PetscInt m=A->rmap->n; 128*4a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 129*4a2a386eSRichard Tran Mills PetscInt i; 130*4a2a386eSRichard Tran Mills 131*4a2a386eSRichard Tran Mills /* Variables not in MatMult_SeqAIJ. */ 132*4a2a386eSRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are computing the transpose product. */ 133*4a2a386eSRichard Tran Mills 134*4a2a386eSRichard Tran Mills PetscFunctionBegin; 135*4a2a386eSRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 136*4a2a386eSRichard Tran Mills ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 137*4a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 138*4a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 139*4a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 140*4a2a386eSRichard Tran Mills 141*4a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 142*4a2a386eSRichard Tran Mills mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 143*4a2a386eSRichard Tran Mills 144*4a2a386eSRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 145*4a2a386eSRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 146*4a2a386eSRichard Tran Mills ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 147*4a2a386eSRichard Tran Mills PetscFunctionReturn(0); 148*4a2a386eSRichard Tran Mills } 149*4a2a386eSRichard Tran Mills 150*4a2a386eSRichard Tran Mills #undef __FUNCT__ 151*4a2a386eSRichard Tran Mills #define __FUNCT__ "MatMultAdd_SeqAIJMKL" 152*4a2a386eSRichard Tran Mills PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 153*4a2a386eSRichard Tran Mills { 154*4a2a386eSRichard Tran Mills Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 155*4a2a386eSRichard Tran Mills const PetscScalar *x; 156*4a2a386eSRichard Tran Mills PetscScalar *y,*z; 157*4a2a386eSRichard Tran Mills const MatScalar *aa; 158*4a2a386eSRichard Tran Mills PetscErrorCode ierr; 159*4a2a386eSRichard Tran Mills PetscInt m=A->rmap->n; 160*4a2a386eSRichard Tran Mills const PetscInt *aj,*ai; 161*4a2a386eSRichard Tran Mills PetscInt i; 162*4a2a386eSRichard Tran Mills 163*4a2a386eSRichard Tran Mills /* Variables not in MatMult_SeqAIJ. */ 164*4a2a386eSRichard Tran Mills char transa = 'n'; /* Used to indicate to MKL that we are computing the transpose product. */ 165*4a2a386eSRichard Tran Mills 166*4a2a386eSRichard Tran Mills PetscFunctionBegin; 167*4a2a386eSRichard Tran Mills ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 168*4a2a386eSRichard Tran Mills ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 169*4a2a386eSRichard Tran Mills aj = a->j; /* aj[k] gives column index for element aa[k]. */ 170*4a2a386eSRichard Tran Mills aa = a->a; /* Nonzero elements stored row-by-row. */ 171*4a2a386eSRichard Tran Mills ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 172*4a2a386eSRichard Tran Mills 173*4a2a386eSRichard Tran Mills /* Call MKL sparse BLAS routine to do the MatMult. */ 174*4a2a386eSRichard Tran Mills mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 175*4a2a386eSRichard Tran Mills 176*4a2a386eSRichard Tran Mills /* Now add the vector y; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 177*4a2a386eSRichard Tran Mills for (i=0; i<m; i++) { 178*4a2a386eSRichard Tran Mills z[i] += y[i]; 179*4a2a386eSRichard Tran Mills } 180*4a2a386eSRichard Tran Mills 181*4a2a386eSRichard Tran Mills ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 182*4a2a386eSRichard Tran Mills ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 183*4a2a386eSRichard Tran Mills ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 184*4a2a386eSRichard Tran Mills PetscFunctionReturn(0); 185*4a2a386eSRichard Tran Mills } 186*4a2a386eSRichard Tran Mills 187*4a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 188*4a2a386eSRichard Tran Mills * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 189*4a2a386eSRichard Tran Mills * routine, but can also be used to convert an assembled SeqAIJ matrix 190*4a2a386eSRichard Tran Mills * into a SeqAIJMKL one. */ 191*4a2a386eSRichard Tran Mills #undef __FUNCT__ 192*4a2a386eSRichard Tran Mills #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJMKL" 193*4a2a386eSRichard Tran Mills PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 194*4a2a386eSRichard Tran Mills { 195*4a2a386eSRichard Tran Mills PetscErrorCode ierr; 196*4a2a386eSRichard Tran Mills Mat B = *newmat; 197*4a2a386eSRichard Tran Mills Mat_SeqAIJMKL *aijmkl; 198*4a2a386eSRichard Tran Mills 199*4a2a386eSRichard Tran Mills PetscFunctionBegin; 200*4a2a386eSRichard Tran Mills if (reuse == MAT_INITIAL_MATRIX) { 201*4a2a386eSRichard Tran Mills ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 202*4a2a386eSRichard Tran Mills } 203*4a2a386eSRichard Tran Mills 204*4a2a386eSRichard Tran Mills ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 205*4a2a386eSRichard Tran Mills B->spptr = (void*) aijmkl; 206*4a2a386eSRichard Tran Mills 207*4a2a386eSRichard Tran Mills /* Set function pointers for methods that we inherit from AIJ but override. */ 208*4a2a386eSRichard Tran Mills B->ops->duplicate = MatDuplicate_SeqAIJMKL; 209*4a2a386eSRichard Tran Mills B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 210*4a2a386eSRichard Tran Mills B->ops->destroy = MatDestroy_SeqAIJMKL; 211*4a2a386eSRichard Tran Mills B->ops->mult = MatMult_SeqAIJMKL; 212*4a2a386eSRichard Tran Mills B->ops->multadd = MatMultAdd_SeqAIJMKL; 213*4a2a386eSRichard Tran Mills 214*4a2a386eSRichard Tran Mills ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 215*4a2a386eSRichard Tran Mills 216*4a2a386eSRichard Tran Mills ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 217*4a2a386eSRichard Tran Mills *newmat = B; 218*4a2a386eSRichard Tran Mills PetscFunctionReturn(0); 219*4a2a386eSRichard Tran Mills } 220*4a2a386eSRichard Tran Mills 221*4a2a386eSRichard Tran Mills #undef __FUNCT__ 222*4a2a386eSRichard Tran Mills #define __FUNCT__ "MatCreateSeqAIJMKL" 223*4a2a386eSRichard Tran Mills /*@C 224*4a2a386eSRichard Tran Mills MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 225*4a2a386eSRichard Tran Mills This type inherits from AIJ and is largely identical, but uses sparse BLAS 226*4a2a386eSRichard Tran Mills routines from Intel MKL whenever possible. 227*4a2a386eSRichard Tran Mills Collective on MPI_Comm 228*4a2a386eSRichard Tran Mills 229*4a2a386eSRichard Tran Mills Input Parameters: 230*4a2a386eSRichard Tran Mills + comm - MPI communicator, set to PETSC_COMM_SELF 231*4a2a386eSRichard Tran Mills . m - number of rows 232*4a2a386eSRichard Tran Mills . n - number of columns 233*4a2a386eSRichard Tran Mills . nz - number of nonzeros per row (same for all rows) 234*4a2a386eSRichard Tran Mills - nnz - array containing the number of nonzeros in the various rows 235*4a2a386eSRichard Tran Mills (possibly different for each row) or NULL 236*4a2a386eSRichard Tran Mills 237*4a2a386eSRichard Tran Mills Output Parameter: 238*4a2a386eSRichard Tran Mills . A - the matrix 239*4a2a386eSRichard Tran Mills 240*4a2a386eSRichard Tran Mills Notes: 241*4a2a386eSRichard Tran Mills If nnz is given then nz is ignored 242*4a2a386eSRichard Tran Mills 243*4a2a386eSRichard Tran Mills Level: intermediate 244*4a2a386eSRichard Tran Mills 245*4a2a386eSRichard Tran Mills .keywords: matrix, cray, sparse, parallel 246*4a2a386eSRichard Tran Mills 247*4a2a386eSRichard Tran Mills .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 248*4a2a386eSRichard Tran Mills @*/ 249*4a2a386eSRichard Tran Mills PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 250*4a2a386eSRichard Tran Mills { 251*4a2a386eSRichard Tran Mills PetscErrorCode ierr; 252*4a2a386eSRichard Tran Mills 253*4a2a386eSRichard Tran Mills PetscFunctionBegin; 254*4a2a386eSRichard Tran Mills ierr = MatCreate(comm,A);CHKERRQ(ierr); 255*4a2a386eSRichard Tran Mills ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 256*4a2a386eSRichard Tran Mills ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 257*4a2a386eSRichard Tran Mills ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 258*4a2a386eSRichard Tran Mills PetscFunctionReturn(0); 259*4a2a386eSRichard Tran Mills } 260*4a2a386eSRichard Tran Mills 261*4a2a386eSRichard Tran Mills #undef __FUNCT__ 262*4a2a386eSRichard Tran Mills #define __FUNCT__ "MatCreate_SeqAIJMKL" 263*4a2a386eSRichard Tran Mills PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 264*4a2a386eSRichard Tran Mills { 265*4a2a386eSRichard Tran Mills PetscErrorCode ierr; 266*4a2a386eSRichard Tran Mills 267*4a2a386eSRichard Tran Mills PetscFunctionBegin; 268*4a2a386eSRichard Tran Mills ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 269*4a2a386eSRichard Tran Mills ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 270*4a2a386eSRichard Tran Mills PetscFunctionReturn(0); 271*4a2a386eSRichard Tran Mills } 272*4a2a386eSRichard Tran Mills 273*4a2a386eSRichard Tran Mills 274