xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision e8a2a7c33427d47ed96daa6bcd483a005df1dcb7)
1 /*
2   Defines basic operations for the MATSEQAIJMKL matrix class.
3   This class is derived from the MATSEQAIJ class and retains the
4   compressed row storage (aka Yale sparse matrix format) but uses
5   sparse BLAS operations from the Intel Math Kernel Library (MKL)
6   wherever possible.
7 */
8 
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 typedef struct {
16   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
17   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
18   struct matrix_descr descr;
19 } Mat_SeqAIJMKL;
20 
21 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatConvert_SeqAIJMKL_SeqAIJ"
25 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
26 {
27   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
28   /* so we will ignore 'MatType type'. */
29   PetscErrorCode ierr;
30   Mat            B       = *newmat;
31   Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
32 
33   PetscFunctionBegin;
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
36   }
37 
38   /* Reset the original function pointers. */
39   B->ops->duplicate        = MatDuplicate_SeqAIJ;
40   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
41   B->ops->destroy          = MatDestroy_SeqAIJ;
42   B->ops->mult             = MatMult_SeqAIJ;
43   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
44   B->ops->multadd          = MatMultAdd_SeqAIJ;
45   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
46 
47   /* Free everything in the Mat_SeqAIJMKL data structure.
48    * We don't free the Mat_SeqAIJMKL struct itself, as this will
49    * cause problems later when MatDestroy() tries to free it. */
50   /* Actually there is nothing to do here right now.
51    * When I've added use of the MKL SpMV2 inspector-executor routines, I should
52    * see if there is some way to clean up the "handle" used by SpMV2. */
53 
54   /* Change the type of B to MATSEQAIJ. */
55   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
56 
57   *newmat = B;
58   PetscFunctionReturn(0);
59 }
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "MatDestroy_SeqAIJMKL"
63 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
64 {
65   PetscErrorCode ierr;
66   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
67 
68   PetscFunctionBegin;
69   /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
70   mkl_sparse_destroy(aijmkl->csrA);
71   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
72 
73   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
74    * to destroy everything that remains. */
75   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
76   /* Note that I don't call MatSetType().  I believe this is because that
77    * is only to be called when *building* a matrix.  I could be wrong, but
78    * that is how things work for the SuperLU matrix class. */
79   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
80   PetscFunctionReturn(0);
81 }
82 
83 #undef __FUNCT__
84 #define __FUNCT__ "MatDuplicate_SeqAIJMKL"
85 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
86 {
87   PetscErrorCode ierr;
88   Mat_SeqAIJMKL *aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
89   Mat_SeqAIJMKL *aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
90 
91   PetscFunctionBegin;
92   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
93   ierr = PetscMemcpy((*M)->spptr,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 
97 #undef __FUNCT__
98 #define __FUNCT__ "MatAssemblyEnd_SeqAIJMKL"
99 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
100 {
101   PetscErrorCode  ierr;
102   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
103   Mat_SeqAIJMKL   *aijmkl;
104 
105   MatScalar       *aa;
106   PetscInt        n;
107   PetscInt        *aj,*ai;
108   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
109   PetscBool       set;
110 
111   PetscFunctionBegin;
112   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
113 
114   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
115    * extra information and some different methods, call the AssemblyEnd
116    * routine for a MATSEQAIJ.
117    * I'm not sure if this is the best way to do this, but it avoids
118    * a lot of code duplication.
119    * I also note that currently MATSEQAIJMKL doesn't know anything about
120    * the Mat_CompressedRow data structure that SeqAIJ now uses when there
121    * are many zero rows.  If the SeqAIJ assembly end routine decides to use
122    * this, this may break things.  (Don't know... haven't looked at it.
123    * Do I need to disable this somehow?) */
124   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
125   ierr         = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
126 
127   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
128   if (!aijmkl->no_SpMV2) {
129     /* Now perform the SpMV2 setup and matrix optimization. */
130     aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
131     aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
132     aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
133     n = A->rmap->n;
134     aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
135     aa   = a->a;  /* Nonzero elements stored row-by-row. */
136     ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
137     stat = mkl_sparse_x_create_csr (&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,n,n,ai,ai+1,aj,aa);
138     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
139     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
140     stat = mkl_sparse_optimize(aijmkl->csrA);
141     if (stat != SPARSE_STATUS_SUCCESS) {
142       PetscFunctionReturn(PETSC_ERR_LIB);
143     }
144   }
145 
146   PetscFunctionReturn(0);
147 }
148 
149 #undef __FUNCT__
150 #define __FUNCT__ "MatMult_SeqAIJMKL"
151 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
152 {
153   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
154   const PetscScalar *x;
155   PetscScalar       *y;
156   const MatScalar   *aa;
157   PetscErrorCode    ierr;
158   PetscInt          m=A->rmap->n;
159   const PetscInt    *aj,*ai;
160   PetscInt          i;
161 
162   /* Variables not in MatMult_SeqAIJ. */
163   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
164 
165   PetscFunctionBegin;
166   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
167   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
168   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
169   aa   = a->a;  /* Nonzero elements stored row-by-row. */
170   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
171 
172   /* Call MKL sparse BLAS routine to do the MatMult. */
173   mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y);
174 
175   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
176   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
177   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatMult_SeqAIJMKL_SpMV2"
183 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
184 {
185   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
186   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
187   const PetscScalar *x;
188   PetscScalar       *y;
189   const MatScalar   *aa;
190   PetscErrorCode    ierr;
191   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
192 
193   PetscFunctionBegin;
194 
195 #ifdef DEBUG
196   printf("DEBUG: In MatMult_SeqAIJMKL_SpMV2\n");
197 #endif
198   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
199   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
200 
201   /* Call MKL SpMV2 executor routine to do the MatMult. */
202   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
203 
204   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
205   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
206   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
207   if (stat != SPARSE_STATUS_SUCCESS) {
208     PetscFunctionReturn(PETSC_ERR_LIB);
209   }
210   PetscFunctionReturn(0);
211 }
212 
213 #undef __FUNCT__
214 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL"
215 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
216 {
217   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
218   const PetscScalar *x;
219   PetscScalar       *y;
220   const MatScalar   *aa;
221   PetscErrorCode    ierr;
222   PetscInt          m=A->rmap->n;
223   const PetscInt    *aj,*ai;
224   PetscInt          i;
225 
226   /* Variables not in MatMultTranspose_SeqAIJ. */
227   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
228 
229   PetscFunctionBegin;
230   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
231   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
232   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
233   aa   = a->a;  /* Nonzero elements stored row-by-row. */
234   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
235 
236   /* Call MKL sparse BLAS routine to do the MatMult. */
237   mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y);
238 
239   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
240   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
241   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
242   PetscFunctionReturn(0);
243 }
244 
245 #undef __FUNCT__
246 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL_SpMV2"
247 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
248 {
249   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
250   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
251   const PetscScalar *x;
252   PetscScalar       *y;
253   const MatScalar   *aa;
254   PetscErrorCode    ierr;
255   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
256 
257   PetscFunctionBegin;
258 
259 #ifdef DEBUG
260   printf("DEBUG: In MatMultTranspose_SeqAIJMKL_SpMV2\n");
261 #endif
262   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
263   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
264 
265   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
266   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
267 
268   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
269   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
270   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
271   if (stat != SPARSE_STATUS_SUCCESS) {
272     PetscFunctionReturn(PETSC_ERR_LIB);
273   }
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatMultAdd_SeqAIJMKL"
279 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
280 {
281   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
282   const PetscScalar *x;
283   PetscScalar       *y,*z;
284   const MatScalar   *aa;
285   PetscErrorCode    ierr;
286   PetscInt          m=A->rmap->n;
287   const PetscInt    *aj,*ai;
288   PetscInt          i;
289 
290   /* Variables not in MatMultAdd_SeqAIJ. */
291   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
292   PetscScalar       alpha = 1.0;
293   PetscScalar       beta = 1.0;
294   char              matdescra[6];
295 
296   PetscFunctionBegin;
297   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
298   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
299 
300   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
301   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
302   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
303   aa   = a->a;  /* Nonzero elements stored row-by-row. */
304   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
305 
306   /* Call MKL sparse BLAS routine to do the MatMult. */
307   if (zz == yy) {
308     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
309     mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
310   } else {
311     /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then
312      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
313     mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z);
314     for (i=0; i<m; i++) {
315       z[i] += y[i];
316     }
317   }
318 
319   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
320   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
321   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
322   PetscFunctionReturn(0);
323 }
324 
325 #undef __FUNCT__
326 #define __FUNCT__ "MatMultAdd_SeqAIJMKL_SpMV2"
327 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
328 {
329   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
330   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
331   const PetscScalar *x;
332   PetscScalar       *y,*z;
333   const MatScalar   *aa;
334   PetscErrorCode    ierr;
335   PetscInt          m=A->rmap->n;
336   const PetscInt    *aj,*ai;
337   PetscInt          i;
338 
339   /* Variables not in MatMultAdd_SeqAIJ. */
340   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
341 
342   PetscFunctionBegin;
343 
344 #ifdef DEBUG
345   printf("DEBUG: In MatMultAdd_SeqAIJMKL_SpMV2\n");
346 #endif
347 
348   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
349   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
350   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
351   aa   = a->a;  /* Nonzero elements stored row-by-row. */
352   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
353 
354   /* Call MKL sparse BLAS routine to do the MatMult. */
355   if (zz == yy) {
356     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
357      * with alpha and beta both set to 1.0. */
358     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y);
359   } else {
360     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
361      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
362     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
363     for (i=0; i<m; i++) {
364       z[i] += y[i];
365     }
366   }
367 
368   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
369   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
370   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
371   if (stat != SPARSE_STATUS_SUCCESS) {
372     PetscFunctionReturn(PETSC_ERR_LIB);
373   }
374   PetscFunctionReturn(0);
375 }
376 
377 #undef __FUNCT__
378 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL"
379 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
380 {
381   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
382   const PetscScalar *x;
383   PetscScalar       *y,*z;
384   const MatScalar   *aa;
385   PetscErrorCode    ierr;
386   PetscInt          m=A->rmap->n;
387   const PetscInt    *aj,*ai;
388   PetscInt          i;
389 
390   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
391   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
392   PetscScalar       alpha = 1.0;
393   PetscScalar       beta = 1.0;
394   char              matdescra[6];
395 
396   PetscFunctionBegin;
397   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
398   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
399 
400   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
401   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
402   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
403   aa   = a->a;  /* Nonzero elements stored row-by-row. */
404   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
405 
406   /* Call MKL sparse BLAS routine to do the MatMult. */
407   if (zz == yy) {
408     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
409     mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
410   } else {
411     /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then
412      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
413     mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z);
414     for (i=0; i<m; i++) {
415       z[i] += y[i];
416     }
417   }
418 
419   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
420   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
421   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
422   PetscFunctionReturn(0);
423 }
424 
425 #undef __FUNCT__
426 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL_SpMV2"
427 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
428 {
429   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
430   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
431   const PetscScalar *x;
432   PetscScalar       *y,*z;
433   const MatScalar   *aa;
434   PetscErrorCode    ierr;
435   PetscInt          m=A->rmap->n;
436   const PetscInt    *aj,*ai;
437   PetscInt          i;
438 
439   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
440   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
441 
442   PetscFunctionBegin;
443 
444 #ifdef DEBUG
445   printf("DEBUG: In MatMultTransposeAdd_SeqAIJMKL_SpMV2\n");
446 #endif
447 
448   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
449   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
450   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
451   aa   = a->a;  /* Nonzero elements stored row-by-row. */
452   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
453 
454   /* Call MKL sparse BLAS routine to do the MatMult. */
455   if (zz == yy) {
456     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
457      * with alpha and beta both set to 1.0. */
458     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y);
459   } else {
460     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
461      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
462     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
463     for (i=0; i<m; i++) {
464       z[i] += y[i];
465     }
466   }
467 
468   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
469   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
470   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
471   if (stat != SPARSE_STATUS_SUCCESS) {
472     PetscFunctionReturn(PETSC_ERR_LIB);
473   }
474   PetscFunctionReturn(0);
475 }
476 
477 
478 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
479  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
480  * routine, but can also be used to convert an assembled SeqAIJ matrix
481  * into a SeqAIJMKL one. */
482 #undef __FUNCT__
483 #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJMKL"
484 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
485 {
486   PetscErrorCode ierr;
487   Mat            B = *newmat;
488   Mat_SeqAIJMKL *aijmkl;
489   PetscBool       set;
490 
491   PetscFunctionBegin;
492   if (reuse == MAT_INITIAL_MATRIX) {
493     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
494   }
495 
496   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
497   B->spptr = (void*) aijmkl;
498 
499   /* Set function pointers for methods that we inherit from AIJ but override.
500    * Currently the transposed operations are not being set because I encounter memory corruption
501    * when these are enabled.  Need to look at this with Valgrind or similar. --RTM */
502   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
503   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
504   B->ops->destroy          = MatDestroy_SeqAIJMKL;
505 
506   /* Parse command line options. */
507   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines. */
508   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
509   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
510   ierr = PetscOptionsEnd();CHKERRQ(ierr);
511 
512   if(!aijmkl->no_SpMV2) {
513     B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
514     /* B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2; */
515     B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
516     /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; */
517   } else {
518     B->ops->mult             = MatMult_SeqAIJMKL;
519     /* B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL; */
520     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
521     /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; */
522   }
523 
524   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
525 
526   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
527   *newmat = B;
528   PetscFunctionReturn(0);
529 }
530 
531 #undef __FUNCT__
532 #define __FUNCT__ "MatCreateSeqAIJMKL"
533 /*@C
534    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
535    This type inherits from AIJ and is largely identical, but uses sparse BLAS
536    routines from Intel MKL whenever possible.
537    Collective on MPI_Comm
538 
539    Input Parameters:
540 +  comm - MPI communicator, set to PETSC_COMM_SELF
541 .  m - number of rows
542 .  n - number of columns
543 .  nz - number of nonzeros per row (same for all rows)
544 -  nnz - array containing the number of nonzeros in the various rows
545          (possibly different for each row) or NULL
546 
547    Output Parameter:
548 .  A - the matrix
549 
550    Notes:
551    If nnz is given then nz is ignored
552 
553    Level: intermediate
554 
555 .keywords: matrix, cray, sparse, parallel
556 
557 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
558 @*/
559 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
560 {
561   PetscErrorCode ierr;
562 
563   PetscFunctionBegin;
564   ierr = MatCreate(comm,A);CHKERRQ(ierr);
565   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
566   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
567   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
568   PetscFunctionReturn(0);
569 }
570 
571 #undef __FUNCT__
572 #define __FUNCT__ "MatCreate_SeqAIJMKL"
573 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
574 {
575   PetscErrorCode ierr;
576 
577   PetscFunctionBegin;
578   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
579   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
580   PetscFunctionReturn(0);
581 }
582