xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 7c0b37d00eeca107eaf113dfd23b3efcc7a4c91c)
1 
2 /*
3   Defines basic operations for the MATSEQAIJMKL matrix class.
4   This class is derived from the MATSEQAIJ class and retains the
5   compressed row storage (aka Yale sparse matrix format) but uses
6   sparse BLAS operations from the Intel Math Kernel Library (MKL)
7   wherever possible.
8 */
9 
10 #include <../src/mat/impls/aij/seq/aij.h>
11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12 #include <mkl_spblas.h>
13 
14 typedef struct {
15   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
16   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
17   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   PetscObjectState    state;
19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21   struct matrix_descr descr;
22 #endif
23 } Mat_SeqAIJMKL;
24 
25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
26 
27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
28 {
29   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30   /* so we will ignore 'MatType type'. */
31   PetscErrorCode ierr;
32   Mat            B       = *newmat;
33 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
34   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
35 #endif
36 
37   PetscFunctionBegin;
38   if (reuse == MAT_INITIAL_MATRIX) {
39     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
40   }
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate        = MatDuplicate_SeqAIJ;
44   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
45   B->ops->destroy          = MatDestroy_SeqAIJ;
46   B->ops->mult             = MatMult_SeqAIJ;
47   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
48   B->ops->multadd          = MatMultAdd_SeqAIJ;
49   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
50   B->ops->matmult          = MatMatMult_SeqAIJ_SeqAIJ;
51   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJ_SeqAIJ;
52   B->ops->ptap             = MatPtAP_SeqAIJ_SeqAIJ;
53   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJ_SeqAIJ;
54   B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ;
55 
56   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
57   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
58   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
59   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
60   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqaijmkl_C",NULL);CHKERRQ(ierr);
61 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
62   if(!aijmkl->no_SpMV2) {
63     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
64 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
65     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
66 #endif
67     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
68   }
69 
70   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
71    * simply involves destroying the MKL sparse matrix handle and then freeing
72    * the spptr pointer. */
73   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
74 
75   if (aijmkl->sparse_optimized) {
76     sparse_status_t stat;
77     stat = mkl_sparse_destroy(aijmkl->csrA);
78     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
79   }
80 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
81   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
82 
83   /* Change the type of B to MATSEQAIJ. */
84   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
85 
86   *newmat = B;
87   PetscFunctionReturn(0);
88 }
89 
90 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
91 {
92   PetscErrorCode ierr;
93   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
94 
95   PetscFunctionBegin;
96 
97   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
98    * spptr pointer. */
99   if (aijmkl) {
100     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
101 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
102     if (aijmkl->sparse_optimized) {
103       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
104       stat = mkl_sparse_destroy(aijmkl->csrA);
105       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
106     }
107 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
108     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
109   }
110 
111   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
112    * to destroy everything that remains. */
113   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
114   /* Note that I don't call MatSetType().  I believe this is because that
115    * is only to be called when *building* a matrix.  I could be wrong, but
116    * that is how things work for the SuperLU matrix class. */
117   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
122  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
123  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
124  * handle, creates a new one, and then calls mkl_sparse_optimize().
125  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
126  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
127  * an unoptimized matrix handle here. */
128 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
129 {
130 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
131   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
132    * does nothing. We make it callable anyway in this case because it cuts
133    * down on littering the code with #ifdefs. */
134   PetscFunctionBegin;
135   PetscFunctionReturn(0);
136 #else
137   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
138   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
139   PetscInt         m,n;
140   MatScalar        *aa;
141   PetscInt         *aj,*ai;
142   sparse_status_t  stat;
143   PetscErrorCode   ierr;
144 
145   PetscFunctionBegin;
146 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
147   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
148    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
149    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
150    * mkl_sparse_optimize() later. */
151   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
152 #endif
153 
154   if (aijmkl->sparse_optimized) {
155     /* Matrix has been previously assembled and optimized. Must destroy old
156      * matrix handle before running the optimization step again. */
157     stat = mkl_sparse_destroy(aijmkl->csrA);
158     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
159   }
160   aijmkl->sparse_optimized = PETSC_FALSE;
161 
162   /* Now perform the SpMV2 setup and matrix optimization. */
163   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
164   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
165   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
166   m = A->rmap->n;
167   n = A->cmap->n;
168   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
169   aa   = a->a;  /* Nonzero elements stored row-by-row. */
170   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
171   if ((a->nz!=0) & !(A->structure_only)) {
172     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
173      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
174     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
175     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle");
176     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
177     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
178     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
179     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
180     if (!aijmkl->no_SpMV2) {
181       stat = mkl_sparse_optimize(aijmkl->csrA);
182       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
183     }
184     aijmkl->sparse_optimized = PETSC_TRUE;
185     ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
186   }
187 
188   PetscFunctionReturn(0);
189 #endif
190 }
191 
192 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle.
193  * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized)
194  * matrix handle.
195  * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as
196  * there is no good alternative. */
197 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
198 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)
199 {
200   PetscErrorCode      ierr;
201   sparse_status_t     stat;
202   sparse_index_base_t indexing;
203   PetscInt            nrows, ncols;
204   PetscInt            *aj,*ai,*dummy;
205   MatScalar           *aa;
206   Mat                 A;
207   Mat_SeqAIJMKL       *aijmkl;
208 
209   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
210   stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
211   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
212 
213   if (reuse == MAT_REUSE_MATRIX) {
214     ierr = MatDestroy(mat);CHKERRQ(ierr);
215   }
216   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
217   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
218   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
219   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
220    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
221    * they will be destroyed when the MKL handle is destroyed.
222    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
223   ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr);
224 
225   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
226    * Now turn it into a MATSEQAIJMKL. */
227   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
228 
229   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
230   aijmkl->csrA = csrA;
231 
232   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
233    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
234    * and just need to be able to run the MKL optimization step. */
235   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
236   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
237   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
238   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
239   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
240   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
241   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
242   if (!aijmkl->no_SpMV2) {
243     stat = mkl_sparse_optimize(aijmkl->csrA);
244     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
245   }
246   aijmkl->sparse_optimized = PETSC_TRUE;
247   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
248 
249   *mat = A;
250   PetscFunctionReturn(0);
251 }
252 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
253 
254 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
255  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
256  * MatMatMultNumeric(). */
257 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
258 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
259 {
260   PetscInt            i;
261   PetscInt            nrows,ncols;
262   PetscInt            nz;
263   PetscInt            *ai,*aj,*dummy;
264   PetscScalar         *aa;
265   PetscErrorCode      ierr;
266   Mat_SeqAIJMKL       *aijmkl;
267   sparse_status_t     stat;
268   sparse_index_base_t indexing;
269 
270   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
271 
272   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
273   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
274   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
275 
276   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
277    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
278   for (i=0; i<nrows; i++) {
279     nz = ai[i+1] - ai[i];
280     ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr);
281   }
282 
283   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
284   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
285 
286   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
287   /* We mark our matrix as having a valid, optimized MKL handle.
288    * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */
289   aijmkl->sparse_optimized = PETSC_TRUE;
290 
291   PetscFunctionReturn(0);
292 }
293 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
294 
295 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
296 {
297   PetscErrorCode ierr;
298   Mat_SeqAIJMKL  *aijmkl;
299   Mat_SeqAIJMKL  *aijmkl_dest;
300 
301   PetscFunctionBegin;
302   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
303   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
304   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
305   ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr);
306   aijmkl_dest->sparse_optimized = PETSC_FALSE;
307   if (aijmkl->eager_inspection) {
308     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
309   }
310   PetscFunctionReturn(0);
311 }
312 
313 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
314 {
315   PetscErrorCode  ierr;
316   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
317   Mat_SeqAIJMKL   *aijmkl;
318 
319   PetscFunctionBegin;
320   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
321 
322   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
323    * extra information and some different methods, call the AssemblyEnd
324    * routine for a MATSEQAIJ.
325    * I'm not sure if this is the best way to do this, but it avoids
326    * a lot of code duplication. */
327   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
328   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
329 
330   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
331    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
332   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
333   if (aijmkl->eager_inspection) {
334     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
335   }
336 
337   PetscFunctionReturn(0);
338 }
339 
340 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
341 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
342 {
343   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
344   const PetscScalar *x;
345   PetscScalar       *y;
346   const MatScalar   *aa;
347   PetscErrorCode    ierr;
348   PetscInt          m=A->rmap->n;
349   PetscInt          n=A->cmap->n;
350   PetscScalar       alpha = 1.0;
351   PetscScalar       beta = 0.0;
352   const PetscInt    *aj,*ai;
353   char              matdescra[6];
354 
355 
356   /* Variables not in MatMult_SeqAIJ. */
357   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
358 
359   PetscFunctionBegin;
360   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
361   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
362   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
363   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
364   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
365   aa   = a->a;  /* Nonzero elements stored row-by-row. */
366   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
367 
368   /* Call MKL sparse BLAS routine to do the MatMult. */
369   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
370 
371   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
372   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
373   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
374   PetscFunctionReturn(0);
375 }
376 #endif
377 
378 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
379 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
380 {
381   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
382   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
383   const PetscScalar *x;
384   PetscScalar       *y;
385   PetscErrorCode    ierr;
386   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
387   PetscObjectState  state;
388 
389   PetscFunctionBegin;
390 
391   /* If there are no nonzero entries, zero yy and return immediately. */
392   if(!a->nz) {
393     PetscInt i;
394     PetscInt m=A->rmap->n;
395     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
396     for (i=0; i<m; i++) {
397       y[i] = 0.0;
398     }
399     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
400     PetscFunctionReturn(0);
401   }
402 
403   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
404   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
405 
406   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
407    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
408    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
409   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
410   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
411     MatSeqAIJMKL_create_mkl_handle(A);
412   }
413 
414   /* Call MKL SpMV2 executor routine to do the MatMult. */
415   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
416   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
417 
418   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
419   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
420   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
421   PetscFunctionReturn(0);
422 }
423 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
424 
425 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
426 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
427 {
428   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
429   const PetscScalar *x;
430   PetscScalar       *y;
431   const MatScalar   *aa;
432   PetscErrorCode    ierr;
433   PetscInt          m=A->rmap->n;
434   PetscInt          n=A->cmap->n;
435   PetscScalar       alpha = 1.0;
436   PetscScalar       beta = 0.0;
437   const PetscInt    *aj,*ai;
438   char              matdescra[6];
439 
440   /* Variables not in MatMultTranspose_SeqAIJ. */
441   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
442 
443   PetscFunctionBegin;
444   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
445   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
446   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
447   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
448   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
449   aa   = a->a;  /* Nonzero elements stored row-by-row. */
450   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
451 
452   /* Call MKL sparse BLAS routine to do the MatMult. */
453   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
454 
455   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
456   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
457   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
458   PetscFunctionReturn(0);
459 }
460 #endif
461 
462 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
463 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
464 {
465   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
466   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
467   const PetscScalar *x;
468   PetscScalar       *y;
469   PetscErrorCode    ierr;
470   sparse_status_t   stat;
471   PetscObjectState  state;
472 
473   PetscFunctionBegin;
474 
475   /* If there are no nonzero entries, zero yy and return immediately. */
476   if(!a->nz) {
477     PetscInt i;
478     PetscInt n=A->cmap->n;
479     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
480     for (i=0; i<n; i++) {
481       y[i] = 0.0;
482     }
483     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
484     PetscFunctionReturn(0);
485   }
486 
487   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
488   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
489 
490   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
491    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
492    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
493   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
494   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
495     MatSeqAIJMKL_create_mkl_handle(A);
496   }
497 
498   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
499   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
500   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
501 
502   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
503   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
504   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
505   PetscFunctionReturn(0);
506 }
507 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
508 
509 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
510 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
511 {
512   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
513   const PetscScalar *x;
514   PetscScalar       *y,*z;
515   const MatScalar   *aa;
516   PetscErrorCode    ierr;
517   PetscInt          m=A->rmap->n;
518   PetscInt          n=A->cmap->n;
519   const PetscInt    *aj,*ai;
520   PetscInt          i;
521 
522   /* Variables not in MatMultAdd_SeqAIJ. */
523   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
524   PetscScalar       alpha = 1.0;
525   PetscScalar       beta;
526   char              matdescra[6];
527 
528   PetscFunctionBegin;
529   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
530   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
531 
532   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
533   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
534   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
535   aa   = a->a;  /* Nonzero elements stored row-by-row. */
536   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
537 
538   /* Call MKL sparse BLAS routine to do the MatMult. */
539   if (zz == yy) {
540     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
541     beta = 1.0;
542     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
543   } else {
544     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
545      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
546     beta = 0.0;
547     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
548     for (i=0; i<m; i++) {
549       z[i] += y[i];
550     }
551   }
552 
553   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
554   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
555   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 #endif
559 
560 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
561 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
562 {
563   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
564   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
565   const PetscScalar *x;
566   PetscScalar       *y,*z;
567   PetscErrorCode    ierr;
568   PetscInt          m=A->rmap->n;
569   PetscInt          i;
570 
571   /* Variables not in MatMultAdd_SeqAIJ. */
572   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
573   PetscObjectState  state;
574 
575   PetscFunctionBegin;
576 
577   /* If there are no nonzero entries, set zz = yy and return immediately. */
578   if(!a->nz) {
579     PetscInt i;
580     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
581     for (i=0; i<m; i++) {
582       z[i] = y[i];
583     }
584     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
585     PetscFunctionReturn(0);
586   }
587 
588   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
589   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
590 
591   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
592    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
593    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
594   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
595   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
596     MatSeqAIJMKL_create_mkl_handle(A);
597   }
598 
599   /* Call MKL sparse BLAS routine to do the MatMult. */
600   if (zz == yy) {
601     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
602      * with alpha and beta both set to 1.0. */
603     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
604     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
605   } else {
606     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
607      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
608     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
609     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
610     for (i=0; i<m; i++) {
611       z[i] += y[i];
612     }
613   }
614 
615   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
616   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
617   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
618   PetscFunctionReturn(0);
619 }
620 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
621 
622 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
623 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
624 {
625   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
626   const PetscScalar *x;
627   PetscScalar       *y,*z;
628   const MatScalar   *aa;
629   PetscErrorCode    ierr;
630   PetscInt          m=A->rmap->n;
631   PetscInt          n=A->cmap->n;
632   const PetscInt    *aj,*ai;
633   PetscInt          i;
634 
635   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
636   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
637   PetscScalar       alpha = 1.0;
638   PetscScalar       beta;
639   char              matdescra[6];
640 
641   PetscFunctionBegin;
642   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
643   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
644 
645   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
646   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
647   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
648   aa   = a->a;  /* Nonzero elements stored row-by-row. */
649   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
650 
651   /* Call MKL sparse BLAS routine to do the MatMult. */
652   if (zz == yy) {
653     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
654     beta = 1.0;
655     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
656   } else {
657     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
658      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
659     beta = 0.0;
660     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
661     for (i=0; i<n; i++) {
662       z[i] += y[i];
663     }
664   }
665 
666   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
667   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
668   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
669   PetscFunctionReturn(0);
670 }
671 #endif
672 
673 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
674 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
675 {
676   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
677   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
678   const PetscScalar *x;
679   PetscScalar       *y,*z;
680   PetscErrorCode    ierr;
681   PetscInt          n=A->cmap->n;
682   PetscInt          i;
683   PetscObjectState  state;
684 
685   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
686   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
687 
688   PetscFunctionBegin;
689 
690   /* If there are no nonzero entries, set zz = yy and return immediately. */
691   if(!a->nz) {
692     PetscInt i;
693     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
694     for (i=0; i<n; i++) {
695       z[i] = y[i];
696     }
697     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
698     PetscFunctionReturn(0);
699   }
700 
701   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
702   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
703 
704   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
705    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
706    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
707   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
708   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
709     MatSeqAIJMKL_create_mkl_handle(A);
710   }
711 
712   /* Call MKL sparse BLAS routine to do the MatMult. */
713   if (zz == yy) {
714     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
715      * with alpha and beta both set to 1.0. */
716     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
717     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
718   } else {
719     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
720      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
721     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
722     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
723     for (i=0; i<n; i++) {
724       z[i] += y[i];
725     }
726   }
727 
728   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
729   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
730   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
731   PetscFunctionReturn(0);
732 }
733 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
734 
735 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
736 /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because
737  * the MatMatMult() interface code calls MatMatMultNumeric() in this case.
738  * For releases of MKL prior to version 18, update 2:
739  * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the
740  * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more
741  * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing
742  * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */
743 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
744 {
745   Mat_SeqAIJMKL    *a, *b;
746   sparse_matrix_t  csrA, csrB, csrC;
747   PetscErrorCode   ierr;
748   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
749   PetscObjectState state;
750 
751   PetscFunctionBegin;
752   a = (Mat_SeqAIJMKL*)A->spptr;
753   b = (Mat_SeqAIJMKL*)B->spptr;
754   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
755   if (!a->sparse_optimized || a->state != state) {
756     MatSeqAIJMKL_create_mkl_handle(A);
757   }
758   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
759   if (!b->sparse_optimized || b->state != state) {
760     MatSeqAIJMKL_create_mkl_handle(B);
761   }
762   csrA = a->csrA;
763   csrB = b->csrA;
764 
765   stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC);
766   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
767 
768   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
769 
770   PetscFunctionReturn(0);
771 }
772 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
773 
774 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
775 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C)
776 {
777   Mat_SeqAIJMKL       *a, *b, *c;
778   sparse_matrix_t     csrA, csrB, csrC;
779   PetscErrorCode      ierr;
780   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
781   struct matrix_descr descr_type_gen;
782   PetscObjectState    state;
783 
784   PetscFunctionBegin;
785   a = (Mat_SeqAIJMKL*)A->spptr;
786   b = (Mat_SeqAIJMKL*)B->spptr;
787   c = (Mat_SeqAIJMKL*)C->spptr;
788   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
789   if (!a->sparse_optimized || a->state != state) {
790     MatSeqAIJMKL_create_mkl_handle(A);
791   }
792   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
793   if (!b->sparse_optimized || b->state != state) {
794     MatSeqAIJMKL_create_mkl_handle(B);
795   }
796   csrA = a->csrA;
797   csrB = b->csrA;
798   csrC = c->csrA;
799   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
800 
801   stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA,
802                          SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB,
803                          SPARSE_STAGE_FINALIZE_MULT,&csrC);
804 
805   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply");
806 
807   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
808   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
809 
810   PetscFunctionReturn(0);
811 }
812 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
813 
814 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
815 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
816 {
817   Mat_SeqAIJMKL    *a, *b;
818   sparse_matrix_t  csrA, csrB, csrC;
819   PetscErrorCode   ierr;
820   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
821   PetscObjectState state;
822 
823   PetscFunctionBegin;
824   a = (Mat_SeqAIJMKL*)A->spptr;
825   b = (Mat_SeqAIJMKL*)B->spptr;
826   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
827   if (!a->sparse_optimized || a->state != state) {
828     MatSeqAIJMKL_create_mkl_handle(A);
829   }
830   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
831   if (!b->sparse_optimized || b->state != state) {
832     MatSeqAIJMKL_create_mkl_handle(B);
833   }
834   csrA = a->csrA;
835   csrB = b->csrA;
836 
837   stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC);
838   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
839 
840   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
841 
842   PetscFunctionReturn(0);
843 }
844 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
845 
846 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
847 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C)
848 {
849   Mat_SeqAIJMKL       *a, *p, *c;
850   sparse_matrix_t     csrA, csrP, csrC;
851   PetscBool           set, flag;
852   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
853   struct matrix_descr descr_type_sym;
854   PetscObjectState    state;
855   PetscErrorCode      ierr;
856 
857   PetscFunctionBegin;
858   ierr = MatIsSymmetricKnown(A,&set,&flag);
859   if (!set || (set && !flag)) {
860     ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr);
861     PetscFunctionReturn(0);
862   }
863 
864   a = (Mat_SeqAIJMKL*)A->spptr;
865   p = (Mat_SeqAIJMKL*)P->spptr;
866   c = (Mat_SeqAIJMKL*)C->spptr;
867   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
868   if (!a->sparse_optimized || a->state != state) {
869     MatSeqAIJMKL_create_mkl_handle(A);
870   }
871   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
872   if (!p->sparse_optimized || p->state != state) {
873     MatSeqAIJMKL_create_mkl_handle(P);
874   }
875   csrA = a->csrA;
876   csrP = p->csrA;
877   csrC = c->csrA;
878   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
879   descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
880   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
881 
882   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
883   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
884   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr");
885 
886   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
887   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
888 
889   PetscFunctionReturn(0);
890 }
891 #endif
892 
893 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
894 PetscErrorCode MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
895 {
896   Mat_SeqAIJMKL       *a, *p;
897   sparse_matrix_t     csrA, csrP, csrC;
898   PetscBool           set, flag;
899   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
900   struct matrix_descr descr_type_sym;
901   PetscObjectState    state;
902   PetscErrorCode      ierr;
903 
904   PetscFunctionBegin;
905   ierr = MatIsSymmetricKnown(A,&set,&flag);
906   if (!set || (set && !flag)) {
907     ierr = MatPtAP_SeqAIJ_SeqAIJ(A,P,scall,fill,C);CHKERRQ(ierr);
908     PetscFunctionReturn(0);
909   }
910 
911   if (scall == MAT_REUSE_MATRIX) {
912     ierr = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(A,P,*C);CHKERRQ(ierr);
913     PetscFunctionReturn(0);
914   }
915 
916   a = (Mat_SeqAIJMKL*)A->spptr;
917   p = (Mat_SeqAIJMKL*)P->spptr;
918   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
919   if (!a->sparse_optimized || a->state != state) {
920     MatSeqAIJMKL_create_mkl_handle(A);
921   }
922   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
923   if (!p->sparse_optimized || p->state != state) {
924     MatSeqAIJMKL_create_mkl_handle(P);
925   }
926   csrA = a->csrA;
927   csrP = p->csrA;
928   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
929   descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
930   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
931 
932   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
933   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT);
934   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete full mkl_sparse_sypr");
935 
936   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
937   ierr = MatSetOption(*C,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
938 
939   PetscFunctionReturn(0);
940 }
941 #endif
942 
943 /* This function prototype is needed in MatConvert_SeqAIJ_SeqAIJMKL(), below. */
944 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
945 
946 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
947  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
948  * routine, but can also be used to convert an assembled SeqAIJ matrix
949  * into a SeqAIJMKL one. */
950 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
951 {
952   PetscErrorCode ierr;
953   Mat            B = *newmat;
954   Mat_SeqAIJMKL  *aijmkl;
955   PetscBool      set;
956   PetscBool      sametype;
957 
958   PetscFunctionBegin;
959   if (reuse == MAT_INITIAL_MATRIX) {
960     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
961   }
962 
963   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
964   if (sametype) PetscFunctionReturn(0);
965 
966   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
967   B->spptr = (void*) aijmkl;
968 
969   /* Set function pointers for methods that we inherit from AIJ but override.
970    * We also parse some command line options below, since those determine some of the methods we point to. */
971   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
972   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
973   B->ops->destroy          = MatDestroy_SeqAIJMKL;
974 
975   aijmkl->sparse_optimized = PETSC_FALSE;
976 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
977   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
978 #else
979   aijmkl->no_SpMV2 = PETSC_TRUE;
980 #endif
981   aijmkl->eager_inspection = PETSC_FALSE;
982 
983   /* Parse command line options. */
984   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
985   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
986   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
987   ierr = PetscOptionsEnd();CHKERRQ(ierr);
988 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
989   if(!aijmkl->no_SpMV2) {
990     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
991     aijmkl->no_SpMV2 = PETSC_TRUE;
992   }
993 #endif
994 
995 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
996   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
997   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
998   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
999   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1000   B->ops->matmult          = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
1001 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1002   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
1003 #   if !defined(PETSC_USE_COMPLEX)
1004   B->ops->ptap             = MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2;
1005   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
1006 #   endif
1007 # endif
1008   B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
1009 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1010 
1011 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1012   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1013    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1014    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1015    * versions in which the older interface has not been deprecated, we use the old interface. */
1016   if (aijmkl->no_SpMV2) {
1017     B->ops->mult             = MatMult_SeqAIJMKL;
1018     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1019     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1020     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1021   }
1022 #endif
1023 
1024   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
1025   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr);
1026   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr);
1027   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr);
1028   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqaijmkl_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
1029   if(!aijmkl->no_SpMV2) {
1030 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1031     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
1032 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1033     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
1034 #endif
1035     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
1036 #endif
1037   }
1038 
1039   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
1040   *newmat = B;
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 /*@C
1045    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1046    This type inherits from AIJ and is largely identical, but uses sparse BLAS
1047    routines from Intel MKL whenever possible.
1048    If the installed version of MKL supports the "SpMV2" sparse
1049    inspector-executor routines, then those are used by default.
1050    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1051    symmetric A) operations are currently supported.
1052    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
1053 
1054    Collective
1055 
1056    Input Parameters:
1057 +  comm - MPI communicator, set to PETSC_COMM_SELF
1058 .  m - number of rows
1059 .  n - number of columns
1060 .  nz - number of nonzeros per row (same for all rows)
1061 -  nnz - array containing the number of nonzeros in the various rows
1062          (possibly different for each row) or NULL
1063 
1064    Output Parameter:
1065 .  A - the matrix
1066 
1067    Options Database Keys:
1068 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1069 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1070 
1071    Notes:
1072    If nnz is given then nz is ignored
1073 
1074    Level: intermediate
1075 
1076 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1077 @*/
1078 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1079 {
1080   PetscErrorCode ierr;
1081 
1082   PetscFunctionBegin;
1083   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1084   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1085   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
1086   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1091 {
1092   PetscErrorCode ierr;
1093 
1094   PetscFunctionBegin;
1095   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1096   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099