xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision c5aa4e58151f233e02785795e703281edebc83f2)
1 /*
2   Defines basic operations for the MATSEQAIJMKL matrix class.
3   This class is derived from the MATSEQAIJ class and retains the
4   compressed row storage (aka Yale sparse matrix format) but uses
5   sparse BLAS operations from the Intel Math Kernel Library (MKL)
6   wherever possible.
7 */
8 
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 typedef struct {
16   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
17   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
19   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
20   struct matrix_descr descr;
21 #endif
22 } Mat_SeqAIJMKL;
23 
24 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
25 
26 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
27 {
28   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
29   /* so we will ignore 'MatType type'. */
30   PetscErrorCode ierr;
31   Mat            B       = *newmat;
32   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
33 
34   PetscFunctionBegin;
35   if (reuse == MAT_INITIAL_MATRIX) {
36     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
37     aijmkl = (Mat_SeqAIJMKL*)B->spptr;
38   }
39 
40   /* Reset the original function pointers. */
41   B->ops->duplicate        = MatDuplicate_SeqAIJ;
42   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
43   B->ops->destroy          = MatDestroy_SeqAIJ;
44   B->ops->mult             = MatMult_SeqAIJ;
45   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
46   B->ops->multadd          = MatMultAdd_SeqAIJ;
47   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
48 
49   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
50   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
51   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
52   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
53 
54   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
55    * simply involves destroying the MKL sparse matrix handle and then freeing
56    * the spptr pointer. */
57 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
58   if (aijmkl->sparse_optimized) {
59     sparse_status_t stat;
60     stat = mkl_sparse_destroy(aijmkl->csrA);
61     if (stat != SPARSE_STATUS_SUCCESS) {
62       PetscFunctionReturn(PETSC_ERR_LIB);
63     }
64   }
65 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
66   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
67 
68   /* Change the type of B to MATSEQAIJ. */
69   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
70 
71   *newmat = B;
72   PetscFunctionReturn(0);
73 }
74 
75 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
76 {
77   PetscErrorCode ierr;
78   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
79 
80   PetscFunctionBegin;
81 
82   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
83    * spptr pointer. */
84   if (aijmkl) {
85     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
86 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
87     if (aijmkl->sparse_optimized) {
88       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
89       stat = mkl_sparse_destroy(aijmkl->csrA);
90       if (stat != SPARSE_STATUS_SUCCESS) {
91         PetscFunctionReturn(PETSC_ERR_LIB);
92       }
93     }
94 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
95     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
96   }
97 
98   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
99    * to destroy everything that remains. */
100   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
101   /* Note that I don't call MatSetType().  I believe this is because that
102    * is only to be called when *building* a matrix.  I could be wrong, but
103    * that is how things work for the SuperLU matrix class. */
104   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
109 {
110   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
111   Mat_SeqAIJMKL   *aijmkl;
112   PetscInt        m,n;
113   MatScalar       *aa;
114   PetscInt        *aj,*ai;
115 
116 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
117   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
118    * does nothing. We make it callable anyway in this case because it cuts
119    * down on littering the code with #ifdefs. */
120   PetscFunctionReturn(0);
121 #else
122 
123   sparse_status_t stat;
124 
125   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
126 
127   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
128 
129   if (aijmkl->sparse_optimized) {
130     /* Matrix has been previously assembled and optimized. Must destroy old
131      * matrix handle before running the optimization step again. */
132     stat = mkl_sparse_destroy(aijmkl->csrA);
133     if (stat != SPARSE_STATUS_SUCCESS) {
134       PetscFunctionReturn(PETSC_ERR_LIB);
135     }
136   }
137 
138   /* Now perform the SpMV2 setup and matrix optimization. */
139   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
140   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
141   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
142   m = A->rmap->n;
143   n = A->cmap->n;
144   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
145   aa   = a->a;  /* Nonzero elements stored row-by-row. */
146   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
147   if (n>0) {
148     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
149     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
150     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
151     stat = mkl_sparse_optimize(aijmkl->csrA);
152     if (stat != SPARSE_STATUS_SUCCESS) {
153       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize");
154       PetscFunctionReturn(PETSC_ERR_LIB);
155     }
156     aijmkl->sparse_optimized = PETSC_TRUE;
157   }
158 
159   PetscFunctionReturn(0);
160 #endif
161 }
162 
163 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
164 {
165   PetscErrorCode ierr;
166   Mat_SeqAIJMKL *aijmkl;
167   Mat_SeqAIJMKL *aijmkl_dest;
168 
169   PetscFunctionBegin;
170   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
171   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
172   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
173   ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
174   aijmkl_dest->sparse_optimized = PETSC_FALSE;
175   ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
176   PetscFunctionReturn(0);
177 }
178 
179 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
180 {
181   PetscErrorCode  ierr;
182   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
183 
184   PetscFunctionBegin;
185   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
186 
187   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
188    * extra information and some different methods, call the AssemblyEnd
189    * routine for a MATSEQAIJ.
190    * I'm not sure if this is the best way to do this, but it avoids
191    * a lot of code duplication.
192    * I also note that currently MATSEQAIJMKL doesn't know anything about
193    * the Mat_CompressedRow data structure that SeqAIJ now uses when there
194    * are many zero rows.  If the SeqAIJ assembly end routine decides to use
195    * this, this may break things.  (Don't know... haven't looked at it.
196    * Do I need to disable this somehow?) */
197   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
198   ierr         = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
199 
200   /* Now create the MKL sparse handle (if needed; the function checks). */
201   ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
202 
203   PetscFunctionReturn(0);
204 }
205 
206 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
207 {
208   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
209   const PetscScalar *x;
210   PetscScalar       *y;
211   const MatScalar   *aa;
212   PetscErrorCode    ierr;
213   PetscInt          m=A->rmap->n;
214   PetscInt          n=A->cmap->n;
215   PetscScalar       alpha = 1.0;
216   PetscScalar       beta = 0.0;
217   const PetscInt    *aj,*ai;
218   char              matdescra[6];
219 
220 
221   /* Variables not in MatMult_SeqAIJ. */
222   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
223 
224   PetscFunctionBegin;
225   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
226   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
227   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
228   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
229   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
230   aa   = a->a;  /* Nonzero elements stored row-by-row. */
231   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
232 
233   /* Call MKL sparse BLAS routine to do the MatMult. */
234   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
235 
236   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
237   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
238   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
239   PetscFunctionReturn(0);
240 }
241 
242 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
243 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
244 {
245   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
246   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
247   const PetscScalar *x;
248   PetscScalar       *y;
249   PetscErrorCode    ierr;
250   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
251 
252   PetscFunctionBegin;
253 
254   /* If there are no rows, this is a no-op: return immediately. */
255   if(A->cmap->n < 1) PetscFunctionReturn(0);
256 
257   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
258   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
259 
260   /* Call MKL SpMV2 executor routine to do the MatMult. */
261   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
262 
263   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
264   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
265   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
266   if (stat != SPARSE_STATUS_SUCCESS) {
267     PetscFunctionReturn(PETSC_ERR_LIB);
268   }
269   PetscFunctionReturn(0);
270 }
271 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
272 
273 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
274 {
275   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
276   const PetscScalar *x;
277   PetscScalar       *y;
278   const MatScalar   *aa;
279   PetscErrorCode    ierr;
280   PetscInt          m=A->rmap->n;
281   PetscInt          n=A->cmap->n;
282   PetscScalar       alpha = 1.0;
283   PetscScalar       beta = 0.0;
284   const PetscInt    *aj,*ai;
285   char              matdescra[6];
286 
287   /* Variables not in MatMultTranspose_SeqAIJ. */
288   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
289 
290   PetscFunctionBegin;
291   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
292   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
293   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
294   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
295   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
296   aa   = a->a;  /* Nonzero elements stored row-by-row. */
297   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
298 
299   /* Call MKL sparse BLAS routine to do the MatMult. */
300   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
301 
302   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
303   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
304   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
305   PetscFunctionReturn(0);
306 }
307 
308 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
309 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
310 {
311   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
312   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
313   const PetscScalar *x;
314   PetscScalar       *y;
315   PetscErrorCode    ierr;
316   sparse_status_t   stat;
317 
318   PetscFunctionBegin;
319 
320   /* If there are no rows, this is a no-op: return immediately. */
321   if(A->cmap->n < 1) PetscFunctionReturn(0);
322 
323   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
324   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
325 
326   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
327   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
328 
329   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
330   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
331   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
332   if (stat != SPARSE_STATUS_SUCCESS) {
333     PetscFunctionReturn(PETSC_ERR_LIB);
334   }
335   PetscFunctionReturn(0);
336 }
337 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
338 
339 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
340 {
341   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
342   const PetscScalar *x;
343   PetscScalar       *y,*z;
344   const MatScalar   *aa;
345   PetscErrorCode    ierr;
346   PetscInt          m=A->rmap->n;
347   PetscInt          n=A->cmap->n;
348   const PetscInt    *aj,*ai;
349   PetscInt          i;
350 
351   /* Variables not in MatMultAdd_SeqAIJ. */
352   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
353   PetscScalar       alpha = 1.0;
354   PetscScalar       beta;
355   char              matdescra[6];
356 
357   PetscFunctionBegin;
358   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
359   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
360 
361   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
362   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
363   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
364   aa   = a->a;  /* Nonzero elements stored row-by-row. */
365   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
366 
367   /* Call MKL sparse BLAS routine to do the MatMult. */
368   if (zz == yy) {
369     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
370     beta = 1.0;
371     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
372   } else {
373     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
374      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
375     beta = 0.0;
376     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
377     for (i=0; i<m; i++) {
378       z[i] += y[i];
379     }
380   }
381 
382   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
383   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
384   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 
388 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
389 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
390 {
391   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
392   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
393   const PetscScalar *x;
394   PetscScalar       *y,*z;
395   PetscErrorCode    ierr;
396   PetscInt          m=A->rmap->n;
397   PetscInt          i;
398 
399   /* Variables not in MatMultAdd_SeqAIJ. */
400   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
401 
402   PetscFunctionBegin;
403 
404   /* If there are no rows, this is a no-op: return immediately. */
405   if(A->cmap->n < 1) PetscFunctionReturn(0);
406 
407   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
408   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
409 
410   /* Call MKL sparse BLAS routine to do the MatMult. */
411   if (zz == yy) {
412     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
413      * with alpha and beta both set to 1.0. */
414     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
415   } else {
416     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
417      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
418     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
419     for (i=0; i<m; i++) {
420       z[i] += y[i];
421     }
422   }
423 
424   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
425   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
426   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
427   if (stat != SPARSE_STATUS_SUCCESS) {
428     PetscFunctionReturn(PETSC_ERR_LIB);
429   }
430   PetscFunctionReturn(0);
431 }
432 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
433 
434 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
435 {
436   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
437   const PetscScalar *x;
438   PetscScalar       *y,*z;
439   const MatScalar   *aa;
440   PetscErrorCode    ierr;
441   PetscInt          m=A->rmap->n;
442   PetscInt          n=A->cmap->n;
443   const PetscInt    *aj,*ai;
444   PetscInt          i;
445 
446   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
447   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
448   PetscScalar       alpha = 1.0;
449   PetscScalar       beta;
450   char              matdescra[6];
451 
452   PetscFunctionBegin;
453   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
454   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
455 
456   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
457   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
458   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
459   aa   = a->a;  /* Nonzero elements stored row-by-row. */
460   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
461 
462   /* Call MKL sparse BLAS routine to do the MatMult. */
463   if (zz == yy) {
464     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
465     beta = 1.0;
466     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
467   } else {
468     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
469      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
470     beta = 0.0;
471     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
472     for (i=0; i<n; i++) {
473       z[i] += y[i];
474     }
475   }
476 
477   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
478   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
479   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
480   PetscFunctionReturn(0);
481 }
482 
483 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
484 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
485 {
486   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
487   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
488   const PetscScalar *x;
489   PetscScalar       *y,*z;
490   PetscErrorCode    ierr;
491   PetscInt          n=A->cmap->n;
492   PetscInt          i;
493 
494   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
495   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
496 
497   PetscFunctionBegin;
498 
499   /* If there are no rows, this is a no-op: return immediately. */
500   if(A->cmap->n < 1) PetscFunctionReturn(0);
501 
502   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
503   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
504 
505   /* Call MKL sparse BLAS routine to do the MatMult. */
506   if (zz == yy) {
507     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
508      * with alpha and beta both set to 1.0. */
509     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
510   } else {
511     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
512      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
513     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
514     for (i=0; i<n; i++) {
515       z[i] += y[i];
516     }
517   }
518 
519   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
520   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
521   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
522   if (stat != SPARSE_STATUS_SUCCESS) {
523     PetscFunctionReturn(PETSC_ERR_LIB);
524   }
525   PetscFunctionReturn(0);
526 }
527 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
528 
529 PETSC_INTERN PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha)
530 {
531   PetscErrorCode ierr;
532 
533   ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr);
534   ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr);
535   PetscFunctionReturn(0);
536 }
537 
538 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
539  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
540  * routine, but can also be used to convert an assembled SeqAIJ matrix
541  * into a SeqAIJMKL one. */
542 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
543 {
544   PetscErrorCode ierr;
545   Mat            B = *newmat;
546   Mat_SeqAIJMKL  *aijmkl;
547   PetscBool      set;
548   PetscBool      sametype;
549 
550   PetscFunctionBegin;
551   if (reuse == MAT_INITIAL_MATRIX) {
552     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
553   }
554 
555   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
556   if (sametype) PetscFunctionReturn(0);
557 
558   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
559   B->spptr = (void*) aijmkl;
560 
561   /* Set function pointers for methods that we inherit from AIJ but override.
562    * We also parse some command line options below, since those determine some of the methods we point to. */
563   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
564   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
565   B->ops->destroy          = MatDestroy_SeqAIJMKL;
566 
567   aijmkl->sparse_optimized = PETSC_FALSE;
568 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
569   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
570 #elif
571   aijmkl->no_SpMV2 = PETSC_TRUE;
572 #endif
573 
574   /* Parse command line options. */
575   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
576   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
577   ierr = PetscOptionsEnd();CHKERRQ(ierr);
578 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
579   if(!aijmkl->no_SpMV2) {
580     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
581     aijmkl->no_SpMV2 = PETSC_TRUE;
582   }
583 #endif
584 
585   if(!aijmkl->no_SpMV2) {
586 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
587     B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
588     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
589     B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
590     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
591 #endif
592   } else {
593     B->ops->mult             = MatMult_SeqAIJMKL;
594     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
595     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
596     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
597   }
598 
599   B->ops->scale = MatScale_SeqAIJMKL;
600 
601   ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr);
606 
607   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
608   *newmat = B;
609   PetscFunctionReturn(0);
610 }
611 
612 /*@C
613    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
614    This type inherits from AIJ and is largely identical, but uses sparse BLAS
615    routines from Intel MKL whenever possible.
616    Collective on MPI_Comm
617 
618    Input Parameters:
619 +  comm - MPI communicator, set to PETSC_COMM_SELF
620 .  m - number of rows
621 .  n - number of columns
622 .  nz - number of nonzeros per row (same for all rows)
623 -  nnz - array containing the number of nonzeros in the various rows
624          (possibly different for each row) or NULL
625 
626    Output Parameter:
627 .  A - the matrix
628 
629    Notes:
630    If nnz is given then nz is ignored
631 
632    Level: intermediate
633 
634 .keywords: matrix, cray, sparse, parallel
635 
636 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
637 @*/
638 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
639 {
640   PetscErrorCode ierr;
641 
642   PetscFunctionBegin;
643   ierr = MatCreate(comm,A);CHKERRQ(ierr);
644   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
645   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
646   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
647   PetscFunctionReturn(0);
648 }
649 
650 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
651 {
652   PetscErrorCode ierr;
653 
654   PetscFunctionBegin;
655   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
656   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
657   PetscFunctionReturn(0);
658 }
659