1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 12 /* MKL include files. */ 13 #include <mkl_spblas.h> /* Sparse BLAS */ 14 15 typedef struct { 16 /* "Handle" used by SpMV2 inspector-executor routines. */ 17 sparse_matrix_t csrA; 18 } Mat_SeqAIJMKL; 19 20 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType); 21 22 #undef __FUNCT__ 23 #define __FUNCT__ "MatConvert_SeqAIJMKL_SeqAIJ" 24 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 25 { 26 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 27 /* so we will ignore 'MatType type'. */ 28 PetscErrorCode ierr; 29 Mat B = *newmat; 30 Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr; 31 32 PetscFunctionBegin; 33 if (reuse == MAT_INITIAL_MATRIX) { 34 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 35 } 36 37 /* Reset the original function pointers. */ 38 B->ops->duplicate = MatDuplicate_SeqAIJ; 39 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 40 B->ops->destroy = MatDestroy_SeqAIJ; 41 B->ops->mult = MatMult_SeqAIJ; 42 B->ops->multadd = MatMultAdd_SeqAIJ; 43 44 /* Free everything in the Mat_SeqAIJMKL data structure. 45 * We don't free the Mat_SeqAIJMKL struct itself, as this will 46 * cause problems later when MatDestroy() tries to free it. */ 47 /* Actually there is nothing to do here right now. 48 * When I've added use of the MKL SpMV2 inspector-executor routines, I should 49 * see if there is some way to clean up the "handle" used by SpMV2. */ 50 51 /* Change the type of B to MATSEQAIJ. */ 52 ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr); 53 54 *newmat = B; 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "MatDestroy_SeqAIJMKL" 60 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 61 { 62 PetscErrorCode ierr; 63 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 64 65 PetscFunctionBegin; 66 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 67 mkl_sparse_destroy(aijmkl->csrA); 68 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 69 70 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 71 * to destroy everything that remains. */ 72 ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr); 73 /* Note that I don't call MatSetType(). I believe this is because that 74 * is only to be called when *building* a matrix. I could be wrong, but 75 * that is how things work for the SuperLU matrix class. */ 76 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "MatDuplicate_SeqAIJMKL" 82 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 83 { 84 PetscErrorCode ierr; 85 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr; 86 Mat_SeqAIJMKL *aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr; 87 88 PetscFunctionBegin; 89 ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr); 90 ierr = PetscMemcpy((*M)->spptr,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatAssemblyEnd_SeqAIJMKL" 96 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 97 { 98 PetscErrorCode ierr; 99 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 100 101 PetscFunctionBegin; 102 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 103 104 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 105 * extra information and some different methods, call the AssemblyEnd 106 * routine for a MATSEQAIJ. 107 * I'm not sure if this is the best way to do this, but it avoids 108 * a lot of code duplication. 109 * I also note that currently MATSEQAIJMKL doesn't know anything about 110 * the Mat_CompressedRow data structure that SeqAIJ now uses when there 111 * are many zero rows. If the SeqAIJ assembly end routine decides to use 112 * this, this may break things. (Don't know... haven't looked at it. 113 * Do I need to disable this somehow?) */ 114 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 115 ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr); 116 117 PetscFunctionReturn(0); 118 } 119 120 #undef __FUNCT__ 121 #define __FUNCT__ "MatMult_SeqAIJMKL" 122 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy) 123 { 124 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 125 const PetscScalar *x; 126 PetscScalar *y; 127 const MatScalar *aa; 128 PetscErrorCode ierr; 129 PetscInt m=A->rmap->n; 130 const PetscInt *aj,*ai; 131 PetscInt i; 132 133 /* Variables not in MatMult_SeqAIJ. */ 134 char transa = 'n'; /* Used to indicate to MKL that we are computing the transpose product. */ 135 136 PetscFunctionBegin; 137 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 138 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 139 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 140 aa = a->a; /* Nonzero elements stored row-by-row. */ 141 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 142 143 /* Call MKL sparse BLAS routine to do the MatMult. */ 144 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y); 145 146 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 147 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 148 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 #undef __FUNCT__ 153 #define __FUNCT__ "MatMultAdd_SeqAIJMKL" 154 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz) 155 { 156 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 157 const PetscScalar *x; 158 PetscScalar *y,*z; 159 const MatScalar *aa; 160 PetscErrorCode ierr; 161 PetscInt m=A->rmap->n; 162 const PetscInt *aj,*ai; 163 PetscInt i; 164 165 /* Variables not in MatMult_SeqAIJ. */ 166 char transa = 'n'; /* Used to indicate to MKL that we are computing the transpose product. */ 167 168 PetscFunctionBegin; 169 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 170 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 171 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 172 aa = a->a; /* Nonzero elements stored row-by-row. */ 173 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 174 175 /* Call MKL sparse BLAS routine to do the MatMult. */ 176 mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z); 177 178 /* Now add the vector y; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 179 for (i=0; i<m; i++) { 180 z[i] += y[i]; 181 } 182 183 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 184 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 185 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 186 PetscFunctionReturn(0); 187 } 188 189 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 190 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ() 191 * routine, but can also be used to convert an assembled SeqAIJ matrix 192 * into a SeqAIJMKL one. */ 193 #undef __FUNCT__ 194 #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJMKL" 195 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 196 { 197 PetscErrorCode ierr; 198 Mat B = *newmat; 199 Mat_SeqAIJMKL *aijmkl; 200 201 PetscFunctionBegin; 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 204 } 205 206 ierr = PetscNewLog(B,&aijmkl);CHKERRQ(ierr); 207 B->spptr = (void*) aijmkl; 208 209 /* Set function pointers for methods that we inherit from AIJ but override. */ 210 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 211 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 212 B->ops->destroy = MatDestroy_SeqAIJMKL; 213 B->ops->mult = MatMult_SeqAIJMKL; 214 B->ops->multadd = MatMultAdd_SeqAIJMKL; 215 216 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr); 217 218 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr); 219 *newmat = B; 220 PetscFunctionReturn(0); 221 } 222 223 #undef __FUNCT__ 224 #define __FUNCT__ "MatCreateSeqAIJMKL" 225 /*@C 226 MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL. 227 This type inherits from AIJ and is largely identical, but uses sparse BLAS 228 routines from Intel MKL whenever possible. 229 Collective on MPI_Comm 230 231 Input Parameters: 232 + comm - MPI communicator, set to PETSC_COMM_SELF 233 . m - number of rows 234 . n - number of columns 235 . nz - number of nonzeros per row (same for all rows) 236 - nnz - array containing the number of nonzeros in the various rows 237 (possibly different for each row) or NULL 238 239 Output Parameter: 240 . A - the matrix 241 242 Notes: 243 If nnz is given then nz is ignored 244 245 Level: intermediate 246 247 .keywords: matrix, cray, sparse, parallel 248 249 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues() 250 @*/ 251 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 252 { 253 PetscErrorCode ierr; 254 255 PetscFunctionBegin; 256 ierr = MatCreate(comm,A);CHKERRQ(ierr); 257 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 258 ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr); 259 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr); 260 PetscFunctionReturn(0); 261 } 262 263 #undef __FUNCT__ 264 #define __FUNCT__ "MatCreate_SeqAIJMKL" 265 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 266 { 267 PetscErrorCode ierr; 268 269 PetscFunctionBegin; 270 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 271 ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 272 PetscFunctionReturn(0); 273 } 274 275 276