xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision df555b7191f89ca506518fb67cedc13d88de444b)
1 /*
2   Defines basic operations for the MATSEQAIJMKL matrix class.
3   This class is derived from the MATSEQAIJ class and retains the
4   compressed row storage (aka Yale sparse matrix format) but uses
5   sparse BLAS operations from the Intel Math Kernel Library (MKL)
6   wherever possible.
7 */
8 
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11 
12 #define USE_MKL_SPMV2 1
13 /* TODO: Eventually fix the above--I shouldn't hard code things like this!
14  * Use of MKL SpMV2 should eventually be determined at configure time, run time, or
15  * it should just always be used -- not sure what makes sense yet! --RTM */
16 
17 /* MKL include files. */
18 #include <mkl_spblas.h>  /* Sparse BLAS */
19 
20 typedef struct {
21   PetscBool use_SpMV2;  /* If PETSC_TRUE, then use the MKL SpMV2 inspector-executor routines. */
22   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
23   struct matrix_descr descr;
24 } Mat_SeqAIJMKL;
25 
26 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatConvert_SeqAIJMKL_SeqAIJ"
30 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
31 {
32   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
33   /* so we will ignore 'MatType type'. */
34   PetscErrorCode ierr;
35   Mat            B       = *newmat;
36   Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
37 
38   PetscFunctionBegin;
39   if (reuse == MAT_INITIAL_MATRIX) {
40     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
41   }
42 
43   /* Reset the original function pointers. */
44   B->ops->duplicate        = MatDuplicate_SeqAIJ;
45   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
46   B->ops->destroy          = MatDestroy_SeqAIJ;
47   B->ops->mult             = MatMult_SeqAIJ;
48   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
49   B->ops->multadd          = MatMultAdd_SeqAIJ;
50   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
51 
52   /* Free everything in the Mat_SeqAIJMKL data structure.
53    * We don't free the Mat_SeqAIJMKL struct itself, as this will
54    * cause problems later when MatDestroy() tries to free it. */
55   /* Actually there is nothing to do here right now.
56    * When I've added use of the MKL SpMV2 inspector-executor routines, I should
57    * see if there is some way to clean up the "handle" used by SpMV2. */
58 
59   /* Change the type of B to MATSEQAIJ. */
60   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
61 
62   *newmat = B;
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "MatDestroy_SeqAIJMKL"
68 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
69 {
70   PetscErrorCode ierr;
71   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
72 
73   PetscFunctionBegin;
74   /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
75   mkl_sparse_destroy(aijmkl->csrA);
76   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
77 
78   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
79    * to destroy everything that remains. */
80   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
81   /* Note that I don't call MatSetType().  I believe this is because that
82    * is only to be called when *building* a matrix.  I could be wrong, but
83    * that is how things work for the SuperLU matrix class. */
84   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
85   PetscFunctionReturn(0);
86 }
87 
88 #undef __FUNCT__
89 #define __FUNCT__ "MatDuplicate_SeqAIJMKL"
90 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
91 {
92   PetscErrorCode ierr;
93   Mat_SeqAIJMKL *aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
94   Mat_SeqAIJMKL *aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
95 
96   PetscFunctionBegin;
97   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
98   ierr = PetscMemcpy((*M)->spptr,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
99   PetscFunctionReturn(0);
100 }
101 
102 #undef __FUNCT__
103 #define __FUNCT__ "MatAssemblyEnd_SeqAIJMKL"
104 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
105 {
106   PetscErrorCode  ierr;
107   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
108   Mat_SeqAIJMKL   *aijmkl;
109 
110   MatScalar       *aa;
111   PetscInt        n;
112   PetscInt        *aj,*ai;
113   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
114 
115   PetscFunctionBegin;
116   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
117 
118   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
119    * extra information and some different methods, call the AssemblyEnd
120    * routine for a MATSEQAIJ.
121    * I'm not sure if this is the best way to do this, but it avoids
122    * a lot of code duplication.
123    * I also note that currently MATSEQAIJMKL doesn't know anything about
124    * the Mat_CompressedRow data structure that SeqAIJ now uses when there
125    * are many zero rows.  If the SeqAIJ assembly end routine decides to use
126    * this, this may break things.  (Don't know... haven't looked at it.
127    * Do I need to disable this somehow?) */
128   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
129   ierr         = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
130 
131 #ifdef USE_MKL_SPMV2
132   /* Now perform the SpMV2 setup and matrix optimization. */
133   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
134   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
135   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
136   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
137   n = A->rmap->n;
138   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
139   aa   = a->a;  /* Nonzero elements stored row-by-row. */
140   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
141   stat = mkl_sparse_x_create_csr (&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,n,n,ai,ai+1,aj,aa);
142   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
143   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
144   stat = mkl_sparse_optimize(aijmkl->csrA);
145   if (stat != SPARSE_STATUS_SUCCESS) {
146     PetscFunctionReturn(PETSC_ERR_LIB);
147   }
148 #endif /* USE_MKL_SPMV2 */
149 
150   PetscFunctionReturn(0);
151 }
152 
153 #undef __FUNCT__
154 #define __FUNCT__ "MatMult_SeqAIJMKL"
155 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
156 {
157   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
158   const PetscScalar *x;
159   PetscScalar       *y;
160   const MatScalar   *aa;
161   PetscErrorCode    ierr;
162   PetscInt          m=A->rmap->n;
163   const PetscInt    *aj,*ai;
164   PetscInt          i;
165 
166   /* Variables not in MatMult_SeqAIJ. */
167   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
168 
169   PetscFunctionBegin;
170   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
171   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
172   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
173   aa   = a->a;  /* Nonzero elements stored row-by-row. */
174   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
175 
176   /* Call MKL sparse BLAS routine to do the MatMult. */
177   mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y);
178 
179   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
180   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
181   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
182   PetscFunctionReturn(0);
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatMult_SeqAIJMKL_SpMV2"
187 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
188 {
189   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
190   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
191   const PetscScalar *x;
192   PetscScalar       *y;
193   const MatScalar   *aa;
194   PetscErrorCode    ierr;
195   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
196 
197   PetscFunctionBegin;
198 
199 #ifdef DEBUG
200   printf("DEBUG: In MatMult_SeqAIJMKL_SpMV2\n");
201 #endif
202   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
203   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
204 
205   /* Call MKL SpMV2 executor routine to do the MatMult. */
206   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
207 
208   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
209   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
210   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
211   if (stat != SPARSE_STATUS_SUCCESS) {
212     PetscFunctionReturn(PETSC_ERR_LIB);
213   }
214   PetscFunctionReturn(0);
215 }
216 
217 #undef __FUNCT__
218 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL"
219 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
220 {
221   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
222   const PetscScalar *x;
223   PetscScalar       *y;
224   const MatScalar   *aa;
225   PetscErrorCode    ierr;
226   PetscInt          m=A->rmap->n;
227   const PetscInt    *aj,*ai;
228   PetscInt          i;
229 
230   /* Variables not in MatMultTranspose_SeqAIJ. */
231   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
232 
233   PetscFunctionBegin;
234   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
235   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
236   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
237   aa   = a->a;  /* Nonzero elements stored row-by-row. */
238   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
239 
240   /* Call MKL sparse BLAS routine to do the MatMult. */
241   mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y);
242 
243   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
244   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
245   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
246   PetscFunctionReturn(0);
247 }
248 
249 #undef __FUNCT__
250 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL_SpMV2"
251 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
252 {
253   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
254   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
255   const PetscScalar *x;
256   PetscScalar       *y;
257   const MatScalar   *aa;
258   PetscErrorCode    ierr;
259   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
260 
261   PetscFunctionBegin;
262 
263 #ifdef DEBUG
264   printf("DEBUG: In MatMultTranspose_SeqAIJMKL_SpMV2\n");
265 #endif
266   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
267   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
268 
269   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
270   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
271 
272   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
273   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
274   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
275   if (stat != SPARSE_STATUS_SUCCESS) {
276     PetscFunctionReturn(PETSC_ERR_LIB);
277   }
278   PetscFunctionReturn(0);
279 }
280 
281 #undef __FUNCT__
282 #define __FUNCT__ "MatMultAdd_SeqAIJMKL"
283 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
284 {
285   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
286   const PetscScalar *x;
287   PetscScalar       *y,*z;
288   const MatScalar   *aa;
289   PetscErrorCode    ierr;
290   PetscInt          m=A->rmap->n;
291   const PetscInt    *aj,*ai;
292   PetscInt          i;
293 
294   /* Variables not in MatMultAdd_SeqAIJ. */
295   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
296   PetscScalar       alpha = 1.0;
297   PetscScalar       beta = 1.0;
298   char              matdescra[6];
299 
300   PetscFunctionBegin;
301   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
302   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
303 
304   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
305   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
306   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
307   aa   = a->a;  /* Nonzero elements stored row-by-row. */
308   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
309 
310   /* Call MKL sparse BLAS routine to do the MatMult. */
311   if (zz == yy) {
312     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
313     mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
314   } else {
315     /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then
316      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
317     mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z);
318     for (i=0; i<m; i++) {
319       z[i] += y[i];
320     }
321   }
322 
323   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
324   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
325   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
326   PetscFunctionReturn(0);
327 }
328 
329 #undef __FUNCT__
330 #define __FUNCT__ "MatMultAdd_SeqAIJMKL_SpMV2"
331 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
332 {
333   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
334   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
335   const PetscScalar *x;
336   PetscScalar       *y,*z;
337   const MatScalar   *aa;
338   PetscErrorCode    ierr;
339   PetscInt          m=A->rmap->n;
340   const PetscInt    *aj,*ai;
341   PetscInt          i;
342 
343   /* Variables not in MatMultAdd_SeqAIJ. */
344   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
345 
346   PetscFunctionBegin;
347 
348 #ifdef DEBUG
349   printf("DEBUG: In MatMultAdd_SeqAIJMKL_SpMV2\n");
350 #endif
351 
352   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
353   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
354   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
355   aa   = a->a;  /* Nonzero elements stored row-by-row. */
356   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
357 
358   /* Call MKL sparse BLAS routine to do the MatMult. */
359   if (zz == yy) {
360     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
361      * with alpha and beta both set to 1.0. */
362     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y);
363   } else {
364     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
365      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
366     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
367     for (i=0; i<m; i++) {
368       z[i] += y[i];
369     }
370   }
371 
372   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
373   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
374   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
375   if (stat != SPARSE_STATUS_SUCCESS) {
376     PetscFunctionReturn(PETSC_ERR_LIB);
377   }
378   PetscFunctionReturn(0);
379 }
380 
381 #undef __FUNCT__
382 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL"
383 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
384 {
385   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
386   const PetscScalar *x;
387   PetscScalar       *y,*z;
388   const MatScalar   *aa;
389   PetscErrorCode    ierr;
390   PetscInt          m=A->rmap->n;
391   const PetscInt    *aj,*ai;
392   PetscInt          i;
393 
394   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
395   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
396   PetscScalar       alpha = 1.0;
397   PetscScalar       beta = 1.0;
398   char              matdescra[6];
399 
400   PetscFunctionBegin;
401   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
402   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
403 
404   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
405   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
406   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
407   aa   = a->a;  /* Nonzero elements stored row-by-row. */
408   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
409 
410   /* Call MKL sparse BLAS routine to do the MatMult. */
411   if (zz == yy) {
412     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
413     mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
414   } else {
415     /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then
416      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
417     mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z);
418     for (i=0; i<m; i++) {
419       z[i] += y[i];
420     }
421   }
422 
423   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
424   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
425   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL_SpMV2"
431 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
432 {
433   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
434   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
435   const PetscScalar *x;
436   PetscScalar       *y,*z;
437   const MatScalar   *aa;
438   PetscErrorCode    ierr;
439   PetscInt          m=A->rmap->n;
440   const PetscInt    *aj,*ai;
441   PetscInt          i;
442 
443   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
444   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
445 
446   PetscFunctionBegin;
447 
448 #ifdef DEBUG
449   printf("DEBUG: In MatMultTransposeAdd_SeqAIJMKL_SpMV2\n");
450 #endif
451 
452   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
453   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
454   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
455   aa   = a->a;  /* Nonzero elements stored row-by-row. */
456   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
457 
458   /* Call MKL sparse BLAS routine to do the MatMult. */
459   if (zz == yy) {
460     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
461      * with alpha and beta both set to 1.0. */
462     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y);
463   } else {
464     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
465      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
466     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
467     for (i=0; i<m; i++) {
468       z[i] += y[i];
469     }
470   }
471 
472   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
473   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
474   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
475   if (stat != SPARSE_STATUS_SUCCESS) {
476     PetscFunctionReturn(PETSC_ERR_LIB);
477   }
478   PetscFunctionReturn(0);
479 }
480 
481 
482 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
483  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
484  * routine, but can also be used to convert an assembled SeqAIJ matrix
485  * into a SeqAIJMKL one. */
486 #undef __FUNCT__
487 #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJMKL"
488 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
489 {
490   PetscErrorCode ierr;
491   Mat            B = *newmat;
492   Mat_SeqAIJMKL *aijmkl;
493 
494   PetscFunctionBegin;
495   if (reuse == MAT_INITIAL_MATRIX) {
496     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
497   }
498 
499   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
500   B->spptr = (void*) aijmkl;
501 
502   /* Set function pointers for methods that we inherit from AIJ but override.
503    * Currently the transposed operations are not being set because I encounter memory corruption
504    * when these are enabled.  Need to look at this with Valgrind or similar. --RTM */
505   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
506   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
507   B->ops->destroy          = MatDestroy_SeqAIJMKL;
508 #ifdef USE_MKL_SPMV2
509   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
510   /* B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2; */
511   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
512   /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; */
513 #else
514   B->ops->mult             = MatMult_SeqAIJMKL;
515 //  B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
516   B->ops->multadd          = MatMultAdd_SeqAIJMKL;
517 //  B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
518 #endif /* USE_MKL_SPMV2 */
519 
520   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
521 
522   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
523   *newmat = B;
524   PetscFunctionReturn(0);
525 }
526 
527 #undef __FUNCT__
528 #define __FUNCT__ "MatCreateSeqAIJMKL"
529 /*@C
530    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
531    This type inherits from AIJ and is largely identical, but uses sparse BLAS
532    routines from Intel MKL whenever possible.
533    Collective on MPI_Comm
534 
535    Input Parameters:
536 +  comm - MPI communicator, set to PETSC_COMM_SELF
537 .  m - number of rows
538 .  n - number of columns
539 .  nz - number of nonzeros per row (same for all rows)
540 -  nnz - array containing the number of nonzeros in the various rows
541          (possibly different for each row) or NULL
542 
543    Output Parameter:
544 .  A - the matrix
545 
546    Notes:
547    If nnz is given then nz is ignored
548 
549    Level: intermediate
550 
551 .keywords: matrix, cray, sparse, parallel
552 
553 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
554 @*/
555 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
556 {
557   PetscErrorCode ierr;
558 
559   PetscFunctionBegin;
560   ierr = MatCreate(comm,A);CHKERRQ(ierr);
561   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
562   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
563   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
564   PetscFunctionReturn(0);
565 }
566 
567 #undef __FUNCT__
568 #define __FUNCT__ "MatCreate_SeqAIJMKL"
569 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
570 {
571   PetscErrorCode ierr;
572 
573   PetscFunctionBegin;
574   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
575   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
576   PetscFunctionReturn(0);
577 }
578