xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 8d3fe1b09a6310949bc4b978135e22224a1350cd)
1 /*
2   Defines basic operations for the MATSEQAIJMKL matrix class.
3   This class is derived from the MATSEQAIJ class and retains the
4   compressed row storage (aka Yale sparse matrix format) but uses
5   sparse BLAS operations from the Intel Math Kernel Library (MKL)
6   wherever possible.
7 */
8 
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 typedef struct {
16   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
17   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
19   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
20   struct matrix_descr descr;
21 #endif
22 } Mat_SeqAIJMKL;
23 
24 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
25 
26 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
27 {
28   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
29   /* so we will ignore 'MatType type'. */
30   PetscErrorCode ierr;
31   Mat            B       = *newmat;
32   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
33 
34   PetscFunctionBegin;
35   if (reuse == MAT_INITIAL_MATRIX) {
36     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
37     aijmkl = (Mat_SeqAIJMKL*)B->spptr;
38   }
39 
40   /* Reset the original function pointers. */
41   B->ops->duplicate        = MatDuplicate_SeqAIJ;
42   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
43   B->ops->destroy          = MatDestroy_SeqAIJ;
44   B->ops->mult             = MatMult_SeqAIJ;
45   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
46   B->ops->multadd          = MatMultAdd_SeqAIJ;
47   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
48 
49   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
50   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
51   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
52   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
53 
54   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
55    * simply involves destroying the MKL sparse matrix handle and then freeing
56    * the spptr pointer. */
57 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
58   if (aijmkl->sparse_optimized) {
59     sparse_status_t stat;
60     stat = mkl_sparse_destroy(aijmkl->csrA);
61     if (stat != SPARSE_STATUS_SUCCESS) {
62       PetscFunctionReturn(PETSC_ERR_LIB);
63     }
64   }
65 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
66   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
67 
68   /* Change the type of B to MATSEQAIJ. */
69   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
70 
71   *newmat = B;
72   PetscFunctionReturn(0);
73 }
74 
75 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
76 {
77   PetscErrorCode ierr;
78   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
79 
80   PetscFunctionBegin;
81 
82   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
83    * spptr pointer. */
84   if (aijmkl) {
85     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
86 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
87     if (aijmkl->sparse_optimized) {
88       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
89       stat = mkl_sparse_destroy(aijmkl->csrA);
90       if (stat != SPARSE_STATUS_SUCCESS) {
91         PetscFunctionReturn(PETSC_ERR_LIB);
92       }
93     }
94 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
95     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
96   }
97 
98   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
99    * to destroy everything that remains. */
100   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
101   /* Note that I don't call MatSetType().  I believe this is because that
102    * is only to be called when *building* a matrix.  I could be wrong, but
103    * that is how things work for the SuperLU matrix class. */
104   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
109 {
110   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
111   Mat_SeqAIJMKL   *aijmkl;
112   PetscInt        m,n;
113   MatScalar       *aa;
114   PetscInt        *aj,*ai;
115 
116 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
117   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
118    * does nothing. We make it callable anyway in this case because it cuts
119    * down on littering the code with #ifdefs. */
120   PetscFunctionReturn(0);
121 #else
122 
123   sparse_status_t stat;
124 
125   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
126 
127   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
128 
129   if (aijmkl->sparse_optimized) {
130     /* Matrix has been previously assembled and optimized. Must destroy old
131      * matrix handle before running the optimization step again. */
132     stat = mkl_sparse_destroy(aijmkl->csrA);
133     if (stat != SPARSE_STATUS_SUCCESS) {
134       PetscFunctionReturn(PETSC_ERR_LIB);
135     }
136   }
137   aijmkl->sparse_optimized = PETSC_FALSE;
138 
139   /* Now perform the SpMV2 setup and matrix optimization. */
140   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
141   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
142   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
143   m = A->rmap->n;
144   n = A->cmap->n;
145   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
146   aa   = a->a;  /* Nonzero elements stored row-by-row. */
147   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
148   if (a->nz) {
149     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
150      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
151     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
152     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
153     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
154     stat = mkl_sparse_optimize(aijmkl->csrA);
155     if (stat != SPARSE_STATUS_SUCCESS) {
156       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize");
157       PetscFunctionReturn(PETSC_ERR_LIB);
158     }
159     aijmkl->sparse_optimized = PETSC_TRUE;
160   }
161 
162   PetscFunctionReturn(0);
163 #endif
164 }
165 
166 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
167 {
168   PetscErrorCode ierr;
169   Mat_SeqAIJMKL *aijmkl;
170   Mat_SeqAIJMKL *aijmkl_dest;
171 
172   PetscFunctionBegin;
173   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
174   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
175   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
176   ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
177   aijmkl_dest->sparse_optimized = PETSC_FALSE;
178   ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
179   PetscFunctionReturn(0);
180 }
181 
182 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
183 {
184   PetscErrorCode  ierr;
185   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
186 
187   PetscFunctionBegin;
188   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
189 
190   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
191    * extra information and some different methods, call the AssemblyEnd
192    * routine for a MATSEQAIJ.
193    * I'm not sure if this is the best way to do this, but it avoids
194    * a lot of code duplication.
195    * I also note that currently MATSEQAIJMKL doesn't know anything about
196    * the Mat_CompressedRow data structure that SeqAIJ now uses when there
197    * are many zero rows.  If the SeqAIJ assembly end routine decides to use
198    * this, this may break things.  (Don't know... haven't looked at it.
199    * Do I need to disable this somehow?) */
200   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
201   ierr         = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
202 
203   /* Now create the MKL sparse handle (if needed; the function checks). */
204   ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
205 
206   PetscFunctionReturn(0);
207 }
208 
209 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
210 {
211   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
212   const PetscScalar *x;
213   PetscScalar       *y;
214   const MatScalar   *aa;
215   PetscErrorCode    ierr;
216   PetscInt          m=A->rmap->n;
217   PetscInt          n=A->cmap->n;
218   PetscScalar       alpha = 1.0;
219   PetscScalar       beta = 0.0;
220   const PetscInt    *aj,*ai;
221   char              matdescra[6];
222 
223 
224   /* Variables not in MatMult_SeqAIJ. */
225   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
226 
227   PetscFunctionBegin;
228   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
229   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
230   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
231   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
232   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
233   aa   = a->a;  /* Nonzero elements stored row-by-row. */
234   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
235 
236   /* Call MKL sparse BLAS routine to do the MatMult. */
237   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
238 
239   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
240   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
241   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
242   PetscFunctionReturn(0);
243 }
244 
245 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
246 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
247 {
248   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
249   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
250   const PetscScalar *x;
251   PetscScalar       *y;
252   PetscErrorCode    ierr;
253   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
254 
255   PetscFunctionBegin;
256 
257   /* If there are no nonzero entries, this is a no-op: return immediately. */
258   if(!a->nz) PetscFunctionReturn(0);
259 
260   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
261   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
262 
263   /* Call MKL SpMV2 executor routine to do the MatMult. */
264   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
265 
266   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
267   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
268   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
269   if (stat != SPARSE_STATUS_SUCCESS) {
270     PetscFunctionReturn(PETSC_ERR_LIB);
271   }
272   PetscFunctionReturn(0);
273 }
274 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
275 
276 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
277 {
278   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
279   const PetscScalar *x;
280   PetscScalar       *y;
281   const MatScalar   *aa;
282   PetscErrorCode    ierr;
283   PetscInt          m=A->rmap->n;
284   PetscInt          n=A->cmap->n;
285   PetscScalar       alpha = 1.0;
286   PetscScalar       beta = 0.0;
287   const PetscInt    *aj,*ai;
288   char              matdescra[6];
289 
290   /* Variables not in MatMultTranspose_SeqAIJ. */
291   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
292 
293   PetscFunctionBegin;
294   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
295   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
296   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
297   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
298   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
299   aa   = a->a;  /* Nonzero elements stored row-by-row. */
300   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
301 
302   /* Call MKL sparse BLAS routine to do the MatMult. */
303   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
304 
305   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
306   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
307   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
308   PetscFunctionReturn(0);
309 }
310 
311 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
312 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
313 {
314   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
315   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
316   const PetscScalar *x;
317   PetscScalar       *y;
318   PetscErrorCode    ierr;
319   sparse_status_t   stat;
320 
321   PetscFunctionBegin;
322 
323   /* If there are no nonzero entries, this is a no-op: return immediately. */
324   if(!a->nz) PetscFunctionReturn(0);
325 
326   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
327   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
328 
329   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
330   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
331 
332   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
333   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
334   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
335   if (stat != SPARSE_STATUS_SUCCESS) {
336     PetscFunctionReturn(PETSC_ERR_LIB);
337   }
338   PetscFunctionReturn(0);
339 }
340 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
341 
342 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
343 {
344   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
345   const PetscScalar *x;
346   PetscScalar       *y,*z;
347   const MatScalar   *aa;
348   PetscErrorCode    ierr;
349   PetscInt          m=A->rmap->n;
350   PetscInt          n=A->cmap->n;
351   const PetscInt    *aj,*ai;
352   PetscInt          i;
353 
354   /* Variables not in MatMultAdd_SeqAIJ. */
355   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
356   PetscScalar       alpha = 1.0;
357   PetscScalar       beta;
358   char              matdescra[6];
359 
360   PetscFunctionBegin;
361   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
362   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
363 
364   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
365   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
366   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
367   aa   = a->a;  /* Nonzero elements stored row-by-row. */
368   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
369 
370   /* Call MKL sparse BLAS routine to do the MatMult. */
371   if (zz == yy) {
372     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
373     beta = 1.0;
374     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
375   } else {
376     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
377      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
378     beta = 0.0;
379     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
380     for (i=0; i<m; i++) {
381       z[i] += y[i];
382     }
383   }
384 
385   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
386   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
387   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
388   PetscFunctionReturn(0);
389 }
390 
391 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
392 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
393 {
394   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
395   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
396   const PetscScalar *x;
397   PetscScalar       *y,*z;
398   PetscErrorCode    ierr;
399   PetscInt          m=A->rmap->n;
400   PetscInt          i;
401 
402   /* Variables not in MatMultAdd_SeqAIJ. */
403   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
404 
405   PetscFunctionBegin;
406 
407   /* If there are no nonzero entries, this is a no-op: return immediately. */
408   if(!a->nz) PetscFunctionReturn(0);
409 
410   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
411   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
412 
413   /* Call MKL sparse BLAS routine to do the MatMult. */
414   if (zz == yy) {
415     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
416      * with alpha and beta both set to 1.0. */
417     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
418   } else {
419     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
420      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
421     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
422     for (i=0; i<m; i++) {
423       z[i] += y[i];
424     }
425   }
426 
427   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
428   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
429   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
430   if (stat != SPARSE_STATUS_SUCCESS) {
431     PetscFunctionReturn(PETSC_ERR_LIB);
432   }
433   PetscFunctionReturn(0);
434 }
435 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
436 
437 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
438 {
439   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
440   const PetscScalar *x;
441   PetscScalar       *y,*z;
442   const MatScalar   *aa;
443   PetscErrorCode    ierr;
444   PetscInt          m=A->rmap->n;
445   PetscInt          n=A->cmap->n;
446   const PetscInt    *aj,*ai;
447   PetscInt          i;
448 
449   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
450   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
451   PetscScalar       alpha = 1.0;
452   PetscScalar       beta;
453   char              matdescra[6];
454 
455   PetscFunctionBegin;
456   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
457   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
458 
459   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
460   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
461   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
462   aa   = a->a;  /* Nonzero elements stored row-by-row. */
463   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
464 
465   /* Call MKL sparse BLAS routine to do the MatMult. */
466   if (zz == yy) {
467     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
468     beta = 1.0;
469     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
470   } else {
471     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
472      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
473     beta = 0.0;
474     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
475     for (i=0; i<n; i++) {
476       z[i] += y[i];
477     }
478   }
479 
480   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
481   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
482   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
483   PetscFunctionReturn(0);
484 }
485 
486 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
487 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
488 {
489   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
490   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
491   const PetscScalar *x;
492   PetscScalar       *y,*z;
493   PetscErrorCode    ierr;
494   PetscInt          n=A->cmap->n;
495   PetscInt          i;
496 
497   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
498   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
499 
500   PetscFunctionBegin;
501 
502   /* If there are no nonzero entries, this is a no-op: return immediately. */
503   if(!a->nz) PetscFunctionReturn(0);
504 
505   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
506   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
507 
508   /* Call MKL sparse BLAS routine to do the MatMult. */
509   if (zz == yy) {
510     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
511      * with alpha and beta both set to 1.0. */
512     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
513   } else {
514     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
515      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
516     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
517     for (i=0; i<n; i++) {
518       z[i] += y[i];
519     }
520   }
521 
522   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
523   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
524   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
525   if (stat != SPARSE_STATUS_SUCCESS) {
526     PetscFunctionReturn(PETSC_ERR_LIB);
527   }
528   PetscFunctionReturn(0);
529 }
530 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
531 
532 PETSC_INTERN PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha)
533 {
534   PetscErrorCode ierr;
535 
536   ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr);
537   ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr);
538   PetscFunctionReturn(0);
539 }
540 
541 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
542  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
543  * routine, but can also be used to convert an assembled SeqAIJ matrix
544  * into a SeqAIJMKL one. */
545 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
546 {
547   PetscErrorCode ierr;
548   Mat            B = *newmat;
549   Mat_SeqAIJMKL  *aijmkl;
550   PetscBool      set;
551   PetscBool      sametype;
552 
553   PetscFunctionBegin;
554   if (reuse == MAT_INITIAL_MATRIX) {
555     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
556   }
557 
558   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
559   if (sametype) PetscFunctionReturn(0);
560 
561   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
562   B->spptr = (void*) aijmkl;
563 
564   /* Set function pointers for methods that we inherit from AIJ but override.
565    * We also parse some command line options below, since those determine some of the methods we point to. */
566   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
567   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
568   B->ops->destroy          = MatDestroy_SeqAIJMKL;
569 
570   aijmkl->sparse_optimized = PETSC_FALSE;
571 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
572   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
573 #elif
574   aijmkl->no_SpMV2 = PETSC_TRUE;
575 #endif
576 
577   /* Parse command line options. */
578   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
579   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
580   ierr = PetscOptionsEnd();CHKERRQ(ierr);
581 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
582   if(!aijmkl->no_SpMV2) {
583     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
584     aijmkl->no_SpMV2 = PETSC_TRUE;
585   }
586 #endif
587 
588   if(!aijmkl->no_SpMV2) {
589 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
590     B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
591     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
592     B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
593     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
594 #endif
595   } else {
596     B->ops->mult             = MatMult_SeqAIJMKL;
597     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
598     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
599     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
600   }
601 
602   B->ops->scale = MatScale_SeqAIJMKL;
603 
604   ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr);
607   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr);
608   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr);
609 
610   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
611   *newmat = B;
612   PetscFunctionReturn(0);
613 }
614 
615 /*@C
616    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
617    This type inherits from AIJ and is largely identical, but uses sparse BLAS
618    routines from Intel MKL whenever possible.
619    Collective on MPI_Comm
620 
621    Input Parameters:
622 +  comm - MPI communicator, set to PETSC_COMM_SELF
623 .  m - number of rows
624 .  n - number of columns
625 .  nz - number of nonzeros per row (same for all rows)
626 -  nnz - array containing the number of nonzeros in the various rows
627          (possibly different for each row) or NULL
628 
629    Output Parameter:
630 .  A - the matrix
631 
632    Notes:
633    If nnz is given then nz is ignored
634 
635    Level: intermediate
636 
637 .keywords: matrix, cray, sparse, parallel
638 
639 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
640 @*/
641 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
642 {
643   PetscErrorCode ierr;
644 
645   PetscFunctionBegin;
646   ierr = MatCreate(comm,A);CHKERRQ(ierr);
647   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
648   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
649   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 
653 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
654 {
655   PetscErrorCode ierr;
656 
657   PetscFunctionBegin;
658   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
659   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
660   PetscFunctionReturn(0);
661 }
662