xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision a9041576a417233ac9c4cfe237d2866307a590c7)
1 /*
2   Defines basic operations for the MATSEQAIJMKL matrix class.
3   This class is derived from the MATSEQAIJ class and retains the
4   compressed row storage (aka Yale sparse matrix format) but uses
5   sparse BLAS operations from the Intel Math Kernel Library (MKL)
6   wherever possible.
7 */
8 
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 typedef struct {
16   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
17   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
19   struct matrix_descr descr;
20 } Mat_SeqAIJMKL;
21 
22 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
23 
24 #undef __FUNCT__
25 #define __FUNCT__ "MatConvert_SeqAIJMKL_SeqAIJ"
26 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
27 {
28   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
29   /* so we will ignore 'MatType type'. */
30   PetscErrorCode ierr;
31   Mat            B       = *newmat;
32   Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
33 
34   PetscFunctionBegin;
35   if (reuse == MAT_INITIAL_MATRIX) {
36     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
37   }
38 
39   /* Reset the original function pointers. */
40   B->ops->duplicate        = MatDuplicate_SeqAIJ;
41   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
42   B->ops->destroy          = MatDestroy_SeqAIJ;
43   B->ops->mult             = MatMult_SeqAIJ;
44   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
45   B->ops->multadd          = MatMultAdd_SeqAIJ;
46   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
47 
48   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
49    * simply involves destroying the MKL sparse matrix handle.
50    * We don't free the Mat_SeqAIJMKL struct itself, as this will
51    * cause problems later when MatDestroy() tries to free it. */
52 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
53   if (aijmkl->sparse_optimized) {
54     sparse_status_t stat = SPARSE_STATUS_SUCCESS;
55     stat = mkl_sparse_destroy(aijmkl->csrA);
56     if (stat != SPARSE_STATUS_SUCCESS) {
57       PetscFunctionReturn(PETSC_ERR_LIB);
58     }
59   }
60 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
61 
62   /* Change the type of B to MATSEQAIJ. */
63   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
64 
65   *newmat = B;
66   PetscFunctionReturn(0);
67 }
68 
69 #undef __FUNCT__
70 #define __FUNCT__ "MatDestroy_SeqAIJMKL"
71 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
72 {
73   PetscErrorCode ierr;
74   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
75 
76   PetscFunctionBegin;
77   /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
78 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
79   if (aijmkl->sparse_optimized) {
80     sparse_status_t stat = SPARSE_STATUS_SUCCESS;
81     stat = mkl_sparse_destroy(aijmkl->csrA);
82     if (stat != SPARSE_STATUS_SUCCESS) {
83       PetscFunctionReturn(PETSC_ERR_LIB);
84     }
85   }
86 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
87   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
88 
89   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
90    * to destroy everything that remains. */
91   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
92   /* Note that I don't call MatSetType().  I believe this is because that
93    * is only to be called when *building* a matrix.  I could be wrong, but
94    * that is how things work for the SuperLU matrix class. */
95   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
96   PetscFunctionReturn(0);
97 }
98 
99 #undef __FUNCT__
100 #define __FUNCT__ "MatDuplicate_SeqAIJMKL"
101 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
102 {
103   PetscErrorCode ierr;
104   Mat_SeqAIJMKL *aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
105   Mat_SeqAIJMKL *aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
106 
107   PetscFunctionBegin;
108   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
109   ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
110   PetscFunctionReturn(0);
111 }
112 
113 #undef __FUNCT__
114 #define __FUNCT__ "MatAssemblyEnd_SeqAIJMKL"
115 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
116 {
117   PetscErrorCode  ierr;
118   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
119   Mat_SeqAIJMKL   *aijmkl;
120 
121   MatScalar       *aa;
122   PetscInt        n;
123   PetscInt        *aj,*ai;
124 
125   PetscFunctionBegin;
126   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
127 
128   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
129    * extra information and some different methods, call the AssemblyEnd
130    * routine for a MATSEQAIJ.
131    * I'm not sure if this is the best way to do this, but it avoids
132    * a lot of code duplication.
133    * I also note that currently MATSEQAIJMKL doesn't know anything about
134    * the Mat_CompressedRow data structure that SeqAIJ now uses when there
135    * are many zero rows.  If the SeqAIJ assembly end routine decides to use
136    * this, this may break things.  (Don't know... haven't looked at it.
137    * Do I need to disable this somehow?) */
138   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
139   ierr         = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
140 
141   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
142 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
143   if (!aijmkl->no_SpMV2) {
144     sparse_status_t stat = SPARSE_STATUS_SUCCESS;
145     /* Now perform the SpMV2 setup and matrix optimization. */
146     aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
147     aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
148     aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
149     n = A->rmap->n;
150     aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
151     aa   = a->a;  /* Nonzero elements stored row-by-row. */
152     ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
153     stat = mkl_sparse_x_create_csr (&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,n,n,ai,ai+1,aj,aa);
154     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
155     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
156     stat = mkl_sparse_optimize(aijmkl->csrA);
157     if (stat != SPARSE_STATUS_SUCCESS) {
158       PetscFunctionReturn(PETSC_ERR_LIB);
159     }
160     aijmkl->sparse_optimized = PETSC_TRUE;
161   }
162 #endif
163 
164   PetscFunctionReturn(0);
165 }
166 
167 #undef __FUNCT__
168 #define __FUNCT__ "MatMult_SeqAIJMKL"
169 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
170 {
171   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
172   const PetscScalar *x;
173   PetscScalar       *y;
174   const MatScalar   *aa;
175   PetscErrorCode    ierr;
176   PetscInt          m=A->rmap->n;
177   const PetscInt    *aj,*ai;
178 
179   /* Variables not in MatMult_SeqAIJ. */
180   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
181 
182   PetscFunctionBegin;
183   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
184   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
185   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
186   aa   = a->a;  /* Nonzero elements stored row-by-row. */
187   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
188 
189   /* Call MKL sparse BLAS routine to do the MatMult. */
190   mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y);
191 
192   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
193   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
194   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
195   PetscFunctionReturn(0);
196 }
197 
198 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
199 #undef __FUNCT__
200 #define __FUNCT__ "MatMult_SeqAIJMKL_SpMV2"
201 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
202 {
203   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
204   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
205   const PetscScalar *x;
206   PetscScalar       *y;
207   PetscErrorCode    ierr;
208   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
209 
210   PetscFunctionBegin;
211 
212 #ifdef DEBUG
213   printf("DEBUG: In MatMult_SeqAIJMKL_SpMV2\n");
214 #endif
215   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
216   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
217 
218   /* Call MKL SpMV2 executor routine to do the MatMult. */
219   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
220 
221   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
222   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
223   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
224   if (stat != SPARSE_STATUS_SUCCESS) {
225     PetscFunctionReturn(PETSC_ERR_LIB);
226   }
227   PetscFunctionReturn(0);
228 }
229 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
230 
231 #undef __FUNCT__
232 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL"
233 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
234 {
235   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
236   const PetscScalar *x;
237   PetscScalar       *y;
238   const MatScalar   *aa;
239   PetscErrorCode    ierr;
240   PetscInt          m=A->rmap->n;
241   const PetscInt    *aj,*ai;
242 
243   /* Variables not in MatMultTranspose_SeqAIJ. */
244   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
245 
246   PetscFunctionBegin;
247   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
248   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
249   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
250   aa   = a->a;  /* Nonzero elements stored row-by-row. */
251   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
252 
253   /* Call MKL sparse BLAS routine to do the MatMult. */
254   mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y);
255 
256   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
257   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
258   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
259   PetscFunctionReturn(0);
260 }
261 
262 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
263 #undef __FUNCT__
264 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL_SpMV2"
265 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
266 {
267   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
268   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
269   const PetscScalar *x;
270   PetscScalar       *y;
271   PetscErrorCode    ierr;
272   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
273 
274   PetscFunctionBegin;
275 
276 #ifdef DEBUG
277   printf("DEBUG: In MatMultTranspose_SeqAIJMKL_SpMV2\n");
278 #endif
279   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
280   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
281 
282   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
283   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
284 
285   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
286   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
287   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
288   if (stat != SPARSE_STATUS_SUCCESS) {
289     PetscFunctionReturn(PETSC_ERR_LIB);
290   }
291   PetscFunctionReturn(0);
292 }
293 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
294 
295 #undef __FUNCT__
296 #define __FUNCT__ "MatMultAdd_SeqAIJMKL"
297 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
298 {
299   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
300   const PetscScalar *x;
301   PetscScalar       *y,*z;
302   const MatScalar   *aa;
303   PetscErrorCode    ierr;
304   PetscInt          m=A->rmap->n;
305   const PetscInt    *aj,*ai;
306   PetscInt          i;
307 
308   /* Variables not in MatMultAdd_SeqAIJ. */
309   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
310   PetscScalar       alpha = 1.0;
311   PetscScalar       beta = 1.0;
312   char              matdescra[6];
313 
314   PetscFunctionBegin;
315   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
316   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
317 
318   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
319   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
320   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
321   aa   = a->a;  /* Nonzero elements stored row-by-row. */
322   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
323 
324   /* Call MKL sparse BLAS routine to do the MatMult. */
325   if (zz == yy) {
326     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
327     mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
328   } else {
329     /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then
330      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
331     mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z);
332     for (i=0; i<m; i++) {
333       z[i] += y[i];
334     }
335   }
336 
337   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
338   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
339   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
340   PetscFunctionReturn(0);
341 }
342 
343 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
344 #undef __FUNCT__
345 #define __FUNCT__ "MatMultAdd_SeqAIJMKL_SpMV2"
346 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
347 {
348   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
349   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
350   const PetscScalar *x;
351   PetscScalar       *y,*z;
352   PetscErrorCode    ierr;
353   PetscInt          m=A->rmap->n;
354   PetscInt          i;
355 
356   /* Variables not in MatMultAdd_SeqAIJ. */
357   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
358 
359   PetscFunctionBegin;
360 
361 #ifdef DEBUG
362   printf("DEBUG: In MatMultAdd_SeqAIJMKL_SpMV2\n");
363 #endif
364 
365   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
366   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
367 
368   /* Call MKL sparse BLAS routine to do the MatMult. */
369   if (zz == yy) {
370     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
371      * with alpha and beta both set to 1.0. */
372     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y);
373   } else {
374     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
375      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
376     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
377     for (i=0; i<m; i++) {
378       z[i] += y[i];
379     }
380   }
381 
382   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
383   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
384   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
385   if (stat != SPARSE_STATUS_SUCCESS) {
386     PetscFunctionReturn(PETSC_ERR_LIB);
387   }
388   PetscFunctionReturn(0);
389 }
390 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
391 
392 #undef __FUNCT__
393 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL"
394 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
395 {
396   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
397   const PetscScalar *x;
398   PetscScalar       *y,*z;
399   const MatScalar   *aa;
400   PetscErrorCode    ierr;
401   PetscInt          m=A->rmap->n;
402   const PetscInt    *aj,*ai;
403   PetscInt          i;
404 
405   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
406   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
407   PetscScalar       alpha = 1.0;
408   PetscScalar       beta = 1.0;
409   char              matdescra[6];
410 
411   PetscFunctionBegin;
412   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
413   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
414 
415   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
416   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
417   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
418   aa   = a->a;  /* Nonzero elements stored row-by-row. */
419   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
420 
421   /* Call MKL sparse BLAS routine to do the MatMult. */
422   if (zz == yy) {
423     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
424     mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
425   } else {
426     /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then
427      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
428     mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z);
429     for (i=0; i<m; i++) {
430       z[i] += y[i];
431     }
432   }
433 
434   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
435   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
436   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
437   PetscFunctionReturn(0);
438 }
439 
440 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
441 #undef __FUNCT__
442 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL_SpMV2"
443 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
444 {
445   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
446   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
447   const PetscScalar *x;
448   PetscScalar       *y,*z;
449   PetscErrorCode    ierr;
450   PetscInt          m=A->rmap->n;
451   PetscInt          i;
452 
453   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
454   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
455 
456   PetscFunctionBegin;
457 
458 #ifdef DEBUG
459   printf("DEBUG: In MatMultTransposeAdd_SeqAIJMKL_SpMV2\n");
460 #endif
461 
462   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
463   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
464 
465   /* Call MKL sparse BLAS routine to do the MatMult. */
466   if (zz == yy) {
467     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
468      * with alpha and beta both set to 1.0. */
469     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,y);
470   } else {
471     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
472      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
473     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
474     for (i=0; i<m; i++) {
475       z[i] += y[i];
476     }
477   }
478 
479   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
480   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
481   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
482   if (stat != SPARSE_STATUS_SUCCESS) {
483     PetscFunctionReturn(PETSC_ERR_LIB);
484   }
485   PetscFunctionReturn(0);
486 }
487 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
488 
489 
490 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
491  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
492  * routine, but can also be used to convert an assembled SeqAIJ matrix
493  * into a SeqAIJMKL one. */
494 #undef __FUNCT__
495 #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJMKL"
496 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
497 {
498   PetscErrorCode ierr;
499   Mat            B = *newmat;
500   Mat_SeqAIJMKL *aijmkl;
501   PetscBool       set;
502 
503   PetscFunctionBegin;
504   if (reuse == MAT_INITIAL_MATRIX) {
505     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
506   }
507 
508   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
509   B->spptr = (void*) aijmkl;
510 
511   /* Set function pointers for methods that we inherit from AIJ but override.
512    * Currently the transposed operations are not being set because I encounter memory corruption
513    * when these are enabled.  Need to look at this with Valgrind or similar. --RTM */
514   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
515   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
516   B->ops->destroy          = MatDestroy_SeqAIJMKL;
517 
518   aijmkl->sparse_optimized = PETSC_FALSE;
519 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
520   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
521 #elif
522   aijmkl->no_SpMV2 = PETSC_TRUE;
523 #endif
524 
525   /* Parse command line options. */
526   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
527   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
528   ierr = PetscOptionsEnd();CHKERRQ(ierr);
529 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
530   if(!aijmkl->no_SpMV2) {
531     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
532     aijmkl->no_SpMV2 = PETSC_TRUE;
533   }
534 #endif
535 
536   if(!aijmkl->no_SpMV2) {
537 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
538     B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
539     /* B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2; */
540     B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
541     /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; */
542 #endif
543   } else {
544     B->ops->mult             = MatMult_SeqAIJMKL;
545     /* B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL; */
546     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
547     /* B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; */
548   }
549 
550   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
551 
552   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
553   *newmat = B;
554   PetscFunctionReturn(0);
555 }
556 
557 #undef __FUNCT__
558 #define __FUNCT__ "MatCreateSeqAIJMKL"
559 /*@C
560    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
561    This type inherits from AIJ and is largely identical, but uses sparse BLAS
562    routines from Intel MKL whenever possible.
563    Collective on MPI_Comm
564 
565    Input Parameters:
566 +  comm - MPI communicator, set to PETSC_COMM_SELF
567 .  m - number of rows
568 .  n - number of columns
569 .  nz - number of nonzeros per row (same for all rows)
570 -  nnz - array containing the number of nonzeros in the various rows
571          (possibly different for each row) or NULL
572 
573    Output Parameter:
574 .  A - the matrix
575 
576    Notes:
577    If nnz is given then nz is ignored
578 
579    Level: intermediate
580 
581 .keywords: matrix, cray, sparse, parallel
582 
583 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
584 @*/
585 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
586 {
587   PetscErrorCode ierr;
588 
589   PetscFunctionBegin;
590   ierr = MatCreate(comm,A);CHKERRQ(ierr);
591   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
592   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
593   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
594   PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "MatCreate_SeqAIJMKL"
599 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
600 {
601   PetscErrorCode ierr;
602 
603   PetscFunctionBegin;
604   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
605   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
606   PetscFunctionReturn(0);
607 }
608