xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 54871a98d0ef4c68e7311a77dff96962372cd4af)
1 /*
2   Defines basic operations for the MATSEQAIJMKL matrix class.
3   This class is derived from the MATSEQAIJ class and retains the
4   compressed row storage (aka Yale sparse matrix format) but uses
5   sparse BLAS operations from the Intel Math Kernel Library (MKL)
6   wherever possible.
7 */
8 
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 typedef struct {
16   /* "Handle" used by SpMV2 inspector-executor routines. */
17   sparse_matrix_t csrA;
18 } Mat_SeqAIJMKL;
19 
20 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
21 
22 #undef __FUNCT__
23 #define __FUNCT__ "MatConvert_SeqAIJMKL_SeqAIJ"
24 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
25 {
26   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
27   /* so we will ignore 'MatType type'. */
28   PetscErrorCode ierr;
29   Mat            B       = *newmat;
30   Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
31 
32   PetscFunctionBegin;
33   if (reuse == MAT_INITIAL_MATRIX) {
34     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
35   }
36 
37   /* Reset the original function pointers. */
38   B->ops->duplicate   = MatDuplicate_SeqAIJ;
39   B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
40   B->ops->destroy     = MatDestroy_SeqAIJ;
41   B->ops->mult        = MatMult_SeqAIJ;
42   B->ops->multadd     = MatMultAdd_SeqAIJ;
43 
44   /* Free everything in the Mat_SeqAIJMKL data structure.
45    * We don't free the Mat_SeqAIJMKL struct itself, as this will
46    * cause problems later when MatDestroy() tries to free it. */
47   /* Actually there is nothing to do here right now.
48    * When I've added use of the MKL SpMV2 inspector-executor routines, I should
49    * see if there is some way to clean up the "handle" used by SpMV2. */
50 
51   /* Change the type of B to MATSEQAIJ. */
52   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
53 
54   *newmat = B;
55   PetscFunctionReturn(0);
56 }
57 
58 #undef __FUNCT__
59 #define __FUNCT__ "MatDestroy_SeqAIJMKL"
60 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
61 {
62   PetscErrorCode ierr;
63   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
64 
65   PetscFunctionBegin;
66   /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
67   mkl_sparse_destroy(aijmkl->csrA);
68   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
69 
70   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
71    * to destroy everything that remains. */
72   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
73   /* Note that I don't call MatSetType().  I believe this is because that
74    * is only to be called when *building* a matrix.  I could be wrong, but
75    * that is how things work for the SuperLU matrix class. */
76   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "MatDuplicate_SeqAIJMKL"
82 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
83 {
84   PetscErrorCode ierr;
85   Mat_SeqAIJMKL *aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
86   Mat_SeqAIJMKL *aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
87 
88   PetscFunctionBegin;
89   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
90   ierr = PetscMemcpy((*M)->spptr,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
91   PetscFunctionReturn(0);
92 }
93 
94 #undef __FUNCT__
95 #define __FUNCT__ "MatAssemblyEnd_SeqAIJMKL"
96 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
97 {
98   PetscErrorCode ierr;
99   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
100 
101   PetscFunctionBegin;
102   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
103 
104   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
105    * extra information and some different methods, call the AssemblyEnd
106    * routine for a MATSEQAIJ.
107    * I'm not sure if this is the best way to do this, but it avoids
108    * a lot of code duplication.
109    * I also note that currently MATSEQAIJMKL doesn't know anything about
110    * the Mat_CompressedRow data structure that SeqAIJ now uses when there
111    * are many zero rows.  If the SeqAIJ assembly end routine decides to use
112    * this, this may break things.  (Don't know... haven't looked at it.
113    * Do I need to disable this somehow?) */
114   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
115   ierr         = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
116 
117   PetscFunctionReturn(0);
118 }
119 
120 #undef __FUNCT__
121 #define __FUNCT__ "MatMult_SeqAIJMKL"
122 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
123 {
124   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
125   const PetscScalar *x;
126   PetscScalar       *y;
127   const MatScalar   *aa;
128   PetscErrorCode    ierr;
129   PetscInt          m=A->rmap->n;
130   const PetscInt    *aj,*ai;
131   PetscInt          i;
132 
133   /* Variables not in MatMult_SeqAIJ. */
134   char transa = 'n';  /* Used to indicate to MKL that we are computing the transpose product. */
135 
136   PetscFunctionBegin;
137   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
138   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
139   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
140   aa   = a->a;  /* Nonzero elements stored row-by-row. */
141   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
142 
143   /* Call MKL sparse BLAS routine to do the MatMult. */
144   mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y);
145 
146   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
147   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
148   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 #undef __FUNCT__
153 #define __FUNCT__ "MatMultAdd_SeqAIJMKL"
154 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
155 {
156   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
157   const PetscScalar *x;
158   PetscScalar       *y,*z;
159   const MatScalar   *aa;
160   PetscErrorCode    ierr;
161   PetscInt          m=A->rmap->n;
162   const PetscInt    *aj,*ai;
163   PetscInt          i;
164 
165   /* Variables not in MatMult_SeqAIJ. */
166   char transa = 'n';  /* Used to indicate to MKL that we are computing the transpose product. */
167 
168   PetscFunctionBegin;
169   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
170   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
171   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
172   aa   = a->a;  /* Nonzero elements stored row-by-row. */
173   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
174 
175   /* Call MKL sparse BLAS routine to do the MatMult. */
176   mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z);
177 
178   /* Now add the vector y; MKL sparse BLAS does not have a MatMultAdd equivalent. */
179   for (i=0; i<m; i++) {
180     z[i] += y[i];
181   }
182 
183   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
184   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
185   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
186   PetscFunctionReturn(0);
187 }
188 
189 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
190  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
191  * routine, but can also be used to convert an assembled SeqAIJ matrix
192  * into a SeqAIJMKL one. */
193 #undef __FUNCT__
194 #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJMKL"
195 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
196 {
197   PetscErrorCode ierr;
198   Mat            B = *newmat;
199   Mat_SeqAIJMKL *aijmkl;
200 
201   PetscFunctionBegin;
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
204   }
205 
206   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
207   B->spptr = (void*) aijmkl;
208 
209   /* Set function pointers for methods that we inherit from AIJ but override. */
210   B->ops->duplicate   = MatDuplicate_SeqAIJMKL;
211   B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
212   B->ops->destroy     = MatDestroy_SeqAIJMKL;
213   B->ops->mult        = MatMult_SeqAIJMKL;
214   B->ops->multadd     = MatMultAdd_SeqAIJMKL;
215 
216   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
217 
218   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
219   *newmat = B;
220   PetscFunctionReturn(0);
221 }
222 
223 #undef __FUNCT__
224 #define __FUNCT__ "MatCreateSeqAIJMKL"
225 /*@C
226    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
227    This type inherits from AIJ and is largely identical, but uses sparse BLAS
228    routines from Intel MKL whenever possible.
229    Collective on MPI_Comm
230 
231    Input Parameters:
232 +  comm - MPI communicator, set to PETSC_COMM_SELF
233 .  m - number of rows
234 .  n - number of columns
235 .  nz - number of nonzeros per row (same for all rows)
236 -  nnz - array containing the number of nonzeros in the various rows
237          (possibly different for each row) or NULL
238 
239    Output Parameter:
240 .  A - the matrix
241 
242    Notes:
243    If nnz is given then nz is ignored
244 
245    Level: intermediate
246 
247 .keywords: matrix, cray, sparse, parallel
248 
249 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
250 @*/
251 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
252 {
253   PetscErrorCode ierr;
254 
255   PetscFunctionBegin;
256   ierr = MatCreate(comm,A);CHKERRQ(ierr);
257   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
258   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
259   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
260   PetscFunctionReturn(0);
261 }
262 
263 #undef __FUNCT__
264 #define __FUNCT__ "MatCreate_SeqAIJMKL"
265 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
266 {
267   PetscErrorCode ierr;
268 
269   PetscFunctionBegin;
270   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
271   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
272   PetscFunctionReturn(0);
273 }
274 
275 
276