xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision d995685e3253571fb7590f3ef838bd2bd64c935f)
1 /*
2   Defines basic operations for the MATSEQAIJMKL matrix class.
3   This class is derived from the MATSEQAIJ class and retains the
4   compressed row storage (aka Yale sparse matrix format) but uses
5   sparse BLAS operations from the Intel Math Kernel Library (MKL)
6   wherever possible.
7 */
8 
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 typedef struct {
16   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
17   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
18   struct matrix_descr descr;
19 } Mat_SeqAIJMKL;
20 
21 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatConvert_SeqAIJMKL_SeqAIJ"
25 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
26 {
27   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
28   /* so we will ignore 'MatType type'. */
29   PetscErrorCode ierr;
30   Mat            B       = *newmat;
31   Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
32 
33   PetscFunctionBegin;
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
36   }
37 
38   /* Reset the original function pointers. */
39   B->ops->duplicate        = MatDuplicate_SeqAIJ;
40   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
41   B->ops->destroy          = MatDestroy_SeqAIJ;
42   B->ops->mult             = MatMult_SeqAIJ;
43   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
44   B->ops->multadd          = MatMultAdd_SeqAIJ;
45   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
46 
47   /* Free everything in the Mat_SeqAIJMKL data structure.
48    * We don't free the Mat_SeqAIJMKL struct itself, as this will
49    * cause problems later when MatDestroy() tries to free it. */
50   /* Actually there is nothing to do here right now.
51    * When I've added use of the MKL SpMV2 inspector-executor routines, I should
52    * see if there is some way to clean up the "handle" used by SpMV2. */
53 
54   /* Change the type of B to MATSEQAIJ. */
55   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
56 
57   *newmat = B;
58   PetscFunctionReturn(0);
59 }
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "MatDestroy_SeqAIJMKL"
63 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
64 {
65   PetscErrorCode ierr;
66   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
67 
68   PetscFunctionBegin;
69   /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
70   mkl_sparse_destroy(aijmkl->csrA);
71   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
72 
73   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
74    * to destroy everything that remains. */
75   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
76   /* Note that I don't call MatSetType().  I believe this is because that
77    * is only to be called when *building* a matrix.  I could be wrong, but
78    * that is how things work for the SuperLU matrix class. */
79   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
80   PetscFunctionReturn(0);
81 }
82 
83 #undef __FUNCT__
84 #define __FUNCT__ "MatDuplicate_SeqAIJMKL"
85 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
86 {
87   PetscErrorCode ierr;
88   Mat_SeqAIJMKL *aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
89   Mat_SeqAIJMKL *aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
90 
91   PetscFunctionBegin;
92   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
93   ierr = PetscMemcpy((*M)->spptr,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 
97 #undef __FUNCT__
98 #define __FUNCT__ "MatAssemblyEnd_SeqAIJMKL"
99 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
100 {
101   PetscErrorCode  ierr;
102   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
103   Mat_SeqAIJMKL   *aijmkl;
104 
105   MatScalar       *aa;
106   PetscInt        n;
107   PetscInt        *aj,*ai;
108   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
109   PetscBool       set;
110 
111   PetscFunctionBegin;
112   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
113 
114   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
115    * extra information and some different methods, call the AssemblyEnd
116    * routine for a MATSEQAIJ.
117    * I'm not sure if this is the best way to do this, but it avoids
118    * a lot of code duplication.
119    * I also note that currently MATSEQAIJMKL doesn't know anything about
120    * the Mat_CompressedRow data structure that SeqAIJ now uses when there
121    * are many zero rows.  If the SeqAIJ assembly end routine decides to use
122    * this, this may break things.  (Don't know... haven't looked at it.
123    * Do I need to disable this somehow?) */
124   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
125   ierr         = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
126 
127   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
128 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
129   if (!aijmkl->no_SpMV2) {
130     /* Now perform the SpMV2 setup and matrix optimization. */
131     aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
132     aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
133     aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
134     n = A->rmap->n;
135     aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
136     aa   = a->a;  /* Nonzero elements stored row-by-row. */
137     ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
138     stat = mkl_sparse_x_create_csr (&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,n,n,ai,ai+1,aj,aa);
139     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
140     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
141     stat = mkl_sparse_optimize(aijmkl->csrA);
142     if (stat != SPARSE_STATUS_SUCCESS) {
143       PetscFunctionReturn(PETSC_ERR_LIB);
144     }
145   }
146 #endif
147 
148   PetscFunctionReturn(0);
149 }
150 
151 #undef __FUNCT__
152 #define __FUNCT__ "MatMult_SeqAIJMKL"
153 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
154 {
155   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
156   const PetscScalar *x;
157   PetscScalar       *y;
158   const MatScalar   *aa;
159   PetscErrorCode    ierr;
160   PetscInt          m=A->rmap->n;
161   const PetscInt    *aj,*ai;
162   PetscInt          i;
163 
164   /* Variables not in MatMult_SeqAIJ. */
165   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
166 
167   PetscFunctionBegin;
168   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
169   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
170   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
171   aa   = a->a;  /* Nonzero elements stored row-by-row. */
172   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
173 
174   /* Call MKL sparse BLAS routine to do the MatMult. */
175   mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y);
176 
177   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
178   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
179   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
180   PetscFunctionReturn(0);
181 }
182 
183 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
184 #undef __FUNCT__
185 #define __FUNCT__ "MatMult_SeqAIJMKL_SpMV2"
186 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
187 {
188   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
189   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
190   const PetscScalar *x;
191   PetscScalar       *y;
192   const MatScalar   *aa;
193   PetscErrorCode    ierr;
194   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
195 
196   PetscFunctionBegin;
197 
198 #ifdef DEBUG
199   printf("DEBUG: In MatMult_SeqAIJMKL_SpMV2\n");
200 #endif
201   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
202   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
203 
204   /* Call MKL SpMV2 executor routine to do the MatMult. */
205   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
206 
207   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
208   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
209   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
210   if (stat != SPARSE_STATUS_SUCCESS) {
211     PetscFunctionReturn(PETSC_ERR_LIB);
212   }
213   PetscFunctionReturn(0);
214 }
215 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
216 
217 #undef __FUNCT__
218 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL"
219 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
220 {
221   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
222   const PetscScalar *x;
223   PetscScalar       *y;
224   const MatScalar   *aa;
225   PetscErrorCode    ierr;
226   PetscInt          m=A->rmap->n;
227   const PetscInt    *aj,*ai;
228   PetscInt          i;
229 
230   /* Variables not in MatMultTranspose_SeqAIJ. */
231   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
232 
233   PetscFunctionBegin;
234   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
235   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
236   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
237   aa   = a->a;  /* Nonzero elements stored row-by-row. */
238   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
239 
240   /* Call MKL sparse BLAS routine to do the MatMult. */
241   mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y);
242 
243   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
244   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
245   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
246   PetscFunctionReturn(0);
247 }
248 
249 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
250 #undef __FUNCT__
251 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL_SpMV2"
252 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
253 {
254   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
255   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
256   const PetscScalar *x;
257   PetscScalar       *y;
258   const MatScalar   *aa;
259   PetscErrorCode    ierr;
260   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
261 
262   PetscFunctionBegin;
263 
264 #ifdef DEBUG
265   printf("DEBUG: In MatMultTranspose_SeqAIJMKL_SpMV2\n");
266 #endif
267   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
268   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
269 
270   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
271   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
272 
273   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
274   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
275   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
276   if (stat != SPARSE_STATUS_SUCCESS) {
277     PetscFunctionReturn(PETSC_ERR_LIB);
278   }
279   PetscFunctionReturn(0);
280 }
281 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatMultAdd_SeqAIJMKL"
285 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
286 {
287   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
288   const PetscScalar *x;
289   PetscScalar       *y,*z;
290   const MatScalar   *aa;
291   PetscErrorCode    ierr;
292   PetscInt          m=A->rmap->n;
293   const PetscInt    *aj,*ai;
294   PetscInt          i;
295 
296   /* Variables not in MatMultAdd_SeqAIJ. */
297   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
298   PetscScalar       alpha = 1.0;
299   PetscScalar       beta = 1.0;
300   char              matdescra[6];
301 
302   PetscFunctionBegin;
303   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
304   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
305 
306   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
307   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
308   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
309   aa   = a->a;  /* Nonzero elements stored row-by-row. */
310   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
311 
312   /* Call MKL sparse BLAS routine to do the MatMult. */
313   if (zz == yy) {
314     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
315     mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
316   } else {
317     /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then
318      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
319     mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z);
320     for (i=0; i<m; i++) {
321       z[i] += y[i];
322     }
323   }
324 
325   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
326   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
327   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
328   PetscFunctionReturn(0);
329 }
330 
331 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
332 #undef __FUNCT__
333 #define __FUNCT__ "MatMultAdd_SeqAIJMKL_SpMV2"
334 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
335 {
336   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
337   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
338   const PetscScalar *x;
339   PetscScalar       *y,*z;
340   const MatScalar   *aa;
341   PetscErrorCode    ierr;
342   PetscInt          m=A->rmap->n;
343   const PetscInt    *aj,*ai;
344   PetscInt          i;
345 
346   /* Variables not in MatMultAdd_SeqAIJ. */
347   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
348 
349   PetscFunctionBegin;
350 
351 #ifdef DEBUG
352   printf("DEBUG: In MatMultAdd_SeqAIJMKL_SpMV2\n");
353 #endif
354 
355   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
356   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
357   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
358   aa   = a->a;  /* Nonzero elements stored row-by-row. */
359   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
360 
361   /* Call MKL sparse BLAS routine to do the MatMult. */
362   if (zz == yy) {
363     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
364      * with alpha and beta both set to 1.0. */
365     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y);
366   } else {
367     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
368      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
369     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
370     for (i=0; i<m; i++) {
371       z[i] += y[i];
372     }
373   }
374 
375   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
376   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
377   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
378   if (stat != SPARSE_STATUS_SUCCESS) {
379     PetscFunctionReturn(PETSC_ERR_LIB);
380   }
381   PetscFunctionReturn(0);
382 }
383 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
384 
385 #undef __FUNCT__
386 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL"
387 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
388 {
389   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
390   const PetscScalar *x;
391   PetscScalar       *y,*z;
392   const MatScalar   *aa;
393   PetscErrorCode    ierr;
394   PetscInt          m=A->rmap->n;
395   const PetscInt    *aj,*ai;
396   PetscInt          i;
397 
398   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
399   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
400   PetscScalar       alpha = 1.0;
401   PetscScalar       beta = 1.0;
402   char              matdescra[6];
403 
404   PetscFunctionBegin;
405   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
406   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
407 
408   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
409   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
410   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
411   aa   = a->a;  /* Nonzero elements stored row-by-row. */
412   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
413 
414   /* Call MKL sparse BLAS routine to do the MatMult. */
415   if (zz == yy) {
416     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
417     mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
418   } else {
419     /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then
420      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
421     mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z);
422     for (i=0; i<m; i++) {
423       z[i] += y[i];
424     }
425   }
426 
427   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
428   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
429   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
434 #undef __FUNCT__
435 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL_SpMV2"
436 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
437 {
438   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
439   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
440   const PetscScalar *x;
441   PetscScalar       *y,*z;
442   const MatScalar   *aa;
443   PetscErrorCode    ierr;
444   PetscInt          m=A->rmap->n;
445   const PetscInt    *aj,*ai;
446   PetscInt          i;
447 
448   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
449   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
450 
451   PetscFunctionBegin;
452 
453 #ifdef DEBUG
454   printf("DEBUG: In MatMultTransposeAdd_SeqAIJMKL_SpMV2\n");
455 #endif
456 
457   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
458   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
459   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
460   aa   = a->a;  /* Nonzero elements stored row-by-row. */
461   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
462 
463   /* Call MKL sparse BLAS routine to do the MatMult. */
464   if (zz == yy) {
465     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
466      * with alpha and beta both set to 1.0. */
467     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y);
468   } else {
469     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
470      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
471     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
472     for (i=0; i<m; i++) {
473       z[i] += y[i];
474     }
475   }
476 
477   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
478   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
479   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
480   if (stat != SPARSE_STATUS_SUCCESS) {
481     PetscFunctionReturn(PETSC_ERR_LIB);
482   }
483   PetscFunctionReturn(0);
484 }
485 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
486 
487 
488 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
489  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
490  * routine, but can also be used to convert an assembled SeqAIJ matrix
491  * into a SeqAIJMKL one. */
492 #undef __FUNCT__
493 #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJMKL"
494 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
495 {
496   PetscErrorCode ierr;
497   Mat            B = *newmat;
498   Mat_SeqAIJMKL *aijmkl;
499   PetscBool       set;
500 
501   PetscFunctionBegin;
502   if (reuse == MAT_INITIAL_MATRIX) {
503     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
504   }
505 
506   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
507   B->spptr = (void*) aijmkl;
508 
509   /* Set function pointers for methods that we inherit from AIJ but override.
510    * Currently the transposed operations are not being set because I encounter memory corruption
511    * when these are enabled.  Need to look at this with Valgrind or similar. --RTM */
512   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
513   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
514   B->ops->destroy          = MatDestroy_SeqAIJMKL;
515 
516   /* Parse command line options. */
517 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
518   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
519 #elif
520   aijmkl->no_SpMV2 = PETSC_TRUE;
521 #endif
522   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
523   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
524   ierr = PetscOptionsEnd();CHKERRQ(ierr);
525 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
526   if(!aijmkl->no_SpMV2) {
527     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
528     aijmkl->no_SpMV2 = PETSC_TRUE;
529   }
530 #endif
531 
532   if(!aijmkl->no_SpMV2) {
533 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
534     B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
535     /* B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2; */
536     B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
537     /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; */
538 #endif
539   } else {
540     B->ops->mult             = MatMult_SeqAIJMKL;
541     /* B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL; */
542     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
543     /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; */
544   }
545 
546   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
547 
548   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
549   *newmat = B;
550   PetscFunctionReturn(0);
551 }
552 
553 #undef __FUNCT__
554 #define __FUNCT__ "MatCreateSeqAIJMKL"
555 /*@C
556    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
557    This type inherits from AIJ and is largely identical, but uses sparse BLAS
558    routines from Intel MKL whenever possible.
559    Collective on MPI_Comm
560 
561    Input Parameters:
562 +  comm - MPI communicator, set to PETSC_COMM_SELF
563 .  m - number of rows
564 .  n - number of columns
565 .  nz - number of nonzeros per row (same for all rows)
566 -  nnz - array containing the number of nonzeros in the various rows
567          (possibly different for each row) or NULL
568 
569    Output Parameter:
570 .  A - the matrix
571 
572    Notes:
573    If nnz is given then nz is ignored
574 
575    Level: intermediate
576 
577 .keywords: matrix, cray, sparse, parallel
578 
579 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
580 @*/
581 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
582 {
583   PetscErrorCode ierr;
584 
585   PetscFunctionBegin;
586   ierr = MatCreate(comm,A);CHKERRQ(ierr);
587   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
588   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
589   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
590   PetscFunctionReturn(0);
591 }
592 
593 #undef __FUNCT__
594 #define __FUNCT__ "MatCreate_SeqAIJMKL"
595 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
596 {
597   PetscErrorCode ierr;
598 
599   PetscFunctionBegin;
600   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
601   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
602   PetscFunctionReturn(0);
603 }
604