xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision f68ad4bd894d84a91e3d5d4616b62490efdc9195)
1 /*
2   Defines basic operations for the MATSEQAIJMKL matrix class.
3   This class is derived from the MATSEQAIJ class and retains the
4   compressed row storage (aka Yale sparse matrix format) but uses
5   sparse BLAS operations from the Intel Math Kernel Library (MKL)
6   wherever possible.
7 */
8 
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 typedef struct {
16   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
17   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
19   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
20   struct matrix_descr descr;
21 #endif
22 } Mat_SeqAIJMKL;
23 
24 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
25 
26 #undef __FUNCT__
27 #define __FUNCT__ "MatConvert_SeqAIJMKL_SeqAIJ"
28 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
29 {
30   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
31   /* so we will ignore 'MatType type'. */
32   PetscErrorCode ierr;
33   Mat            B       = *newmat;
34   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
35 
36   PetscFunctionBegin;
37   if (reuse == MAT_INITIAL_MATRIX) {
38     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
39     aijmkl = (Mat_SeqAIJMKL*)B->spptr;
40   }
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate        = MatDuplicate_SeqAIJ;
44   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
45   B->ops->destroy          = MatDestroy_SeqAIJ;
46   B->ops->mult             = MatMult_SeqAIJ;
47   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
48   B->ops->multadd          = MatMultAdd_SeqAIJ;
49   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
50 
51   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
52   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
53   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
54   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
55 
56   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
57    * simply involves destroying the MKL sparse matrix handle and then freeing
58    * the spptr pointer. */
59 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
60   if (aijmkl->sparse_optimized) {
61     sparse_status_t stat;
62     stat = mkl_sparse_destroy(aijmkl->csrA);
63     if (stat != SPARSE_STATUS_SUCCESS) {
64       PetscFunctionReturn(PETSC_ERR_LIB);
65     }
66   }
67 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
68   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
69 
70   /* Change the type of B to MATSEQAIJ. */
71   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
72 
73   *newmat = B;
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNCT__
78 #define __FUNCT__ "MatDestroy_SeqAIJMKL"
79 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
80 {
81   PetscErrorCode ierr;
82   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
83 
84   PetscFunctionBegin;
85 
86   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
87    * spptr pointer. */
88   if (aijmkl) {
89     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
90 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
91     if (aijmkl->sparse_optimized) {
92       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
93       stat = mkl_sparse_destroy(aijmkl->csrA);
94       if (stat != SPARSE_STATUS_SUCCESS) {
95         PetscFunctionReturn(PETSC_ERR_LIB);
96       }
97     }
98 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
99     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
100   }
101 
102   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
103    * to destroy everything that remains. */
104   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
105   /* Note that I don't call MatSetType().  I believe this is because that
106    * is only to be called when *building* a matrix.  I could be wrong, but
107    * that is how things work for the SuperLU matrix class. */
108   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
109   PetscFunctionReturn(0);
110 }
111 
112 #undef __FUNCT__
113 #define __FUNCT__ "MatDuplicate_SeqAIJMKL"
114 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
115 {
116   PetscErrorCode ierr;
117   Mat_SeqAIJMKL *aijmkl;
118   Mat_SeqAIJMKL *aijmkl_dest;
119 
120   PetscFunctionBegin;
121   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
122   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
123   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
124   ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
125   aijmkl_dest->sparse_optimized = PETSC_FALSE;
126 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
127   aijmkl_dest->csrA = NULL;
128   if (!aijmkl->no_SpMV2) {
129     sparse_status_t stat;
130     stat = mkl_sparse_copy(aijmkl->csrA,aijmkl->descr,&aijmkl_dest->csrA);
131     if (stat != SPARSE_STATUS_SUCCESS) {
132       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_copy");
133       PetscFunctionReturn(PETSC_ERR_LIB);
134     }
135     stat = mkl_sparse_optimize(aijmkl_dest->csrA);
136     if (stat != SPARSE_STATUS_SUCCESS) {
137       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
138       PetscFunctionReturn(PETSC_ERR_LIB);
139     }
140     aijmkl_dest->sparse_optimized = PETSC_TRUE;
141   }
142 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
143   PetscFunctionReturn(0);
144 }
145 
146 #undef __FUNCT__
147 #define __FUNCT__ "MatAssemblyEnd_SeqAIJMKL"
148 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
149 {
150   PetscErrorCode  ierr;
151   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
152   Mat_SeqAIJMKL   *aijmkl;
153 
154   MatScalar       *aa;
155   PetscInt        n;
156   PetscInt        *aj,*ai;
157 
158   PetscFunctionBegin;
159   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
160 
161   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
162    * extra information and some different methods, call the AssemblyEnd
163    * routine for a MATSEQAIJ.
164    * I'm not sure if this is the best way to do this, but it avoids
165    * a lot of code duplication.
166    * I also note that currently MATSEQAIJMKL doesn't know anything about
167    * the Mat_CompressedRow data structure that SeqAIJ now uses when there
168    * are many zero rows.  If the SeqAIJ assembly end routine decides to use
169    * this, this may break things.  (Don't know... haven't looked at it.
170    * Do I need to disable this somehow?) */
171   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
172   ierr         = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
173 
174   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
175 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
176   if (!aijmkl->no_SpMV2) {
177     sparse_status_t stat;
178     if (aijmkl->sparse_optimized) {
179       /* Matrix has been previously assembled and optimized. Must destroy old
180        * matrix handle before running the optimization step again. */
181       sparse_status_t stat;
182       stat = mkl_sparse_destroy(aijmkl->csrA);
183       if (stat != SPARSE_STATUS_SUCCESS) {
184         PetscFunctionReturn(PETSC_ERR_LIB);
185       }
186     }
187     /* Now perform the SpMV2 setup and matrix optimization. */
188     aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
189     aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
190     aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
191     n = A->rmap->n;
192     aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
193     aa   = a->a;  /* Nonzero elements stored row-by-row. */
194     ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
195     stat = mkl_sparse_x_create_csr (&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,n,n,ai,ai+1,aj,aa);
196     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
197     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
198     stat = mkl_sparse_optimize(aijmkl->csrA);
199     if (stat != SPARSE_STATUS_SUCCESS) {
200       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize");
201       PetscFunctionReturn(PETSC_ERR_LIB);
202     }
203     aijmkl->sparse_optimized = PETSC_TRUE;
204   }
205 #endif
206 
207   PetscFunctionReturn(0);
208 }
209 
210 #undef __FUNCT__
211 #define __FUNCT__ "MatMult_SeqAIJMKL"
212 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
213 {
214   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
215   const PetscScalar *x;
216   PetscScalar       *y;
217   const MatScalar   *aa;
218   PetscErrorCode    ierr;
219   PetscInt          m=A->rmap->n;
220   const PetscInt    *aj,*ai;
221 
222   /* Variables not in MatMult_SeqAIJ. */
223   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
224 
225   PetscFunctionBegin;
226   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
227   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
228   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
229   aa   = a->a;  /* Nonzero elements stored row-by-row. */
230   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
231 
232   /* Call MKL sparse BLAS routine to do the MatMult. */
233   mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y);
234 
235   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
236   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
237   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
238   PetscFunctionReturn(0);
239 }
240 
241 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
242 #undef __FUNCT__
243 #define __FUNCT__ "MatMult_SeqAIJMKL_SpMV2"
244 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
245 {
246   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
247   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
248   const PetscScalar *x;
249   PetscScalar       *y;
250   PetscErrorCode    ierr;
251   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
252 
253   PetscFunctionBegin;
254 
255   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
256   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
257 
258   /* Call MKL SpMV2 executor routine to do the MatMult. */
259   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
260 
261   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
262   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
263   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
264   if (stat != SPARSE_STATUS_SUCCESS) {
265     PetscFunctionReturn(PETSC_ERR_LIB);
266   }
267   PetscFunctionReturn(0);
268 }
269 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL"
273 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
274 {
275   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
276   const PetscScalar *x;
277   PetscScalar       *y;
278   const MatScalar   *aa;
279   PetscErrorCode    ierr;
280   PetscInt          m=A->rmap->n;
281   const PetscInt    *aj,*ai;
282 
283   /* Variables not in MatMultTranspose_SeqAIJ. */
284   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
285 
286   PetscFunctionBegin;
287   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
288   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
289   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
290   aa   = a->a;  /* Nonzero elements stored row-by-row. */
291   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
292 
293   /* Call MKL sparse BLAS routine to do the MatMult. */
294   mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y);
295 
296   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
297   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
298   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 
302 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
303 #undef __FUNCT__
304 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL_SpMV2"
305 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
306 {
307   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
308   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
309   const PetscScalar *x;
310   PetscScalar       *y;
311   PetscErrorCode    ierr;
312   sparse_status_t   stat;
313 
314   PetscFunctionBegin;
315 
316   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
317   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
318 
319   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
320   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
321 
322   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
323   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
324   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
325   if (stat != SPARSE_STATUS_SUCCESS) {
326     PetscFunctionReturn(PETSC_ERR_LIB);
327   }
328   PetscFunctionReturn(0);
329 }
330 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
331 
332 #undef __FUNCT__
333 #define __FUNCT__ "MatMultAdd_SeqAIJMKL"
334 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
335 {
336   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
337   const PetscScalar *x;
338   PetscScalar       *y,*z;
339   const MatScalar   *aa;
340   PetscErrorCode    ierr;
341   PetscInt          m=A->rmap->n;
342   const PetscInt    *aj,*ai;
343   PetscInt          i;
344 
345   /* Variables not in MatMultAdd_SeqAIJ. */
346   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
347   PetscScalar       alpha = 1.0;
348   PetscScalar       beta = 1.0;
349   char              matdescra[6];
350 
351   PetscFunctionBegin;
352   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
353   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
354 
355   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
356   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
357   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
358   aa   = a->a;  /* Nonzero elements stored row-by-row. */
359   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
360 
361   /* Call MKL sparse BLAS routine to do the MatMult. */
362   if (zz == yy) {
363     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
364     mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
365   } else {
366     /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then
367      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
368     mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z);
369     for (i=0; i<m; i++) {
370       z[i] += y[i];
371     }
372   }
373 
374   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
375   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
376   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
377   PetscFunctionReturn(0);
378 }
379 
380 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
381 #undef __FUNCT__
382 #define __FUNCT__ "MatMultAdd_SeqAIJMKL_SpMV2"
383 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
384 {
385   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
386   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
387   const PetscScalar *x;
388   PetscScalar       *y,*z;
389   PetscErrorCode    ierr;
390   PetscInt          m=A->rmap->n;
391   PetscInt          i;
392 
393   /* Variables not in MatMultAdd_SeqAIJ. */
394   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
395 
396   PetscFunctionBegin;
397 
398 
399   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
400   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
401 
402   /* Call MKL sparse BLAS routine to do the MatMult. */
403   if (zz == yy) {
404     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
405      * with alpha and beta both set to 1.0. */
406     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y);
407   } else {
408     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
409      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
410     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
411     for (i=0; i<m; i++) {
412       z[i] += y[i];
413     }
414   }
415 
416   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
417   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
418   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
419   if (stat != SPARSE_STATUS_SUCCESS) {
420     PetscFunctionReturn(PETSC_ERR_LIB);
421   }
422   PetscFunctionReturn(0);
423 }
424 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
425 
426 #undef __FUNCT__
427 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL"
428 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
429 {
430   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
431   const PetscScalar *x;
432   PetscScalar       *y,*z;
433   const MatScalar   *aa;
434   PetscErrorCode    ierr;
435   PetscInt          m=A->rmap->n;
436   const PetscInt    *aj,*ai;
437   PetscInt          i;
438 
439   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
440   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
441   PetscScalar       alpha = 1.0;
442   PetscScalar       beta = 1.0;
443   char              matdescra[6];
444 
445   PetscFunctionBegin;
446   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
447   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
448 
449   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
450   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
451   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
452   aa   = a->a;  /* Nonzero elements stored row-by-row. */
453   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
454 
455   /* Call MKL sparse BLAS routine to do the MatMult. */
456   if (zz == yy) {
457     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
458     mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
459   } else {
460     /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then
461      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
462     mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z);
463     for (i=0; i<m; i++) {
464       z[i] += y[i];
465     }
466   }
467 
468   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
469   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
470   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
475 #undef __FUNCT__
476 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL_SpMV2"
477 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
478 {
479   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
480   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
481   const PetscScalar *x;
482   PetscScalar       *y,*z;
483   PetscErrorCode    ierr;
484   PetscInt          m=A->rmap->n;
485   PetscInt          i;
486 
487   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
488   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
489 
490   PetscFunctionBegin;
491 
492   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
493   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
494 
495   /* Call MKL sparse BLAS routine to do the MatMult. */
496   if (zz == yy) {
497     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
498      * with alpha and beta both set to 1.0. */
499     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y);
500   } else {
501     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
502      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
503     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
504     for (i=0; i<m; i++) {
505       z[i] += y[i];
506     }
507   }
508 
509   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
510   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
511   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
512   if (stat != SPARSE_STATUS_SUCCESS) {
513     PetscFunctionReturn(PETSC_ERR_LIB);
514   }
515   PetscFunctionReturn(0);
516 }
517 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
518 
519 
520 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
521  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
522  * routine, but can also be used to convert an assembled SeqAIJ matrix
523  * into a SeqAIJMKL one. */
524 #undef __FUNCT__
525 #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJMKL"
526 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
527 {
528   PetscErrorCode ierr;
529   Mat            B = *newmat;
530   Mat_SeqAIJMKL  *aijmkl;
531   PetscBool      set;
532   PetscBool      sametype;
533 
534   PetscFunctionBegin;
535   if (reuse == MAT_INITIAL_MATRIX) {
536     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
537   }
538 
539   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
540   if (sametype) PetscFunctionReturn(0);
541 
542   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
543   B->spptr = (void*) aijmkl;
544 
545   /* Set function pointers for methods that we inherit from AIJ but override.
546    * We also parse some command line options below, since those determine some of the methods we point to.
547    * Note: Currently the transposed operations are not being set because I encounter memory corruption
548    * when these are enabled.  Need to look at this with Valgrind or similar. --RTM */
549   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
550   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
551   B->ops->destroy          = MatDestroy_SeqAIJMKL;
552 
553   aijmkl->sparse_optimized = PETSC_FALSE;
554 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
555   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
556 #elif
557   aijmkl->no_SpMV2 = PETSC_TRUE;
558 #endif
559 
560   /* Parse command line options. */
561   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
562   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
563   ierr = PetscOptionsEnd();CHKERRQ(ierr);
564 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
565   if(!aijmkl->no_SpMV2) {
566     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
567     aijmkl->no_SpMV2 = PETSC_TRUE;
568   }
569 #endif
570 
571   if(!aijmkl->no_SpMV2) {
572 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
573     B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
574     /* B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2; */
575     B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
576     /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; */
577 #endif
578   } else {
579     B->ops->mult             = MatMult_SeqAIJMKL;
580     /* B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL; */
581     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
582     /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; */
583   }
584 
585   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
586   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr);
587   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr);
589 
590   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
591   *newmat = B;
592   PetscFunctionReturn(0);
593 }
594 
595 #undef __FUNCT__
596 #define __FUNCT__ "MatCreateSeqAIJMKL"
597 /*@C
598    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
599    This type inherits from AIJ and is largely identical, but uses sparse BLAS
600    routines from Intel MKL whenever possible.
601    Collective on MPI_Comm
602 
603    Input Parameters:
604 +  comm - MPI communicator, set to PETSC_COMM_SELF
605 .  m - number of rows
606 .  n - number of columns
607 .  nz - number of nonzeros per row (same for all rows)
608 -  nnz - array containing the number of nonzeros in the various rows
609          (possibly different for each row) or NULL
610 
611    Output Parameter:
612 .  A - the matrix
613 
614    Notes:
615    If nnz is given then nz is ignored
616 
617    Level: intermediate
618 
619 .keywords: matrix, cray, sparse, parallel
620 
621 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
622 @*/
623 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
624 {
625   PetscErrorCode ierr;
626 
627   PetscFunctionBegin;
628   ierr = MatCreate(comm,A);CHKERRQ(ierr);
629   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
630   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
631   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
632   PetscFunctionReturn(0);
633 }
634 
635 #undef __FUNCT__
636 #define __FUNCT__ "MatCreate_SeqAIJMKL"
637 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
638 {
639   PetscErrorCode ierr;
640 
641   PetscFunctionBegin;
642   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
643   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
644   PetscFunctionReturn(0);
645 }
646