xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision ccb4e88a40f0b86eaeca07ff64c64e4de2fae686)
1 
2 /*
3   Defines basic operations for the MATSEQAIJMKL matrix class.
4   This class is derived from the MATSEQAIJ class and retains the
5   compressed row storage (aka Yale sparse matrix format) but uses
6   sparse BLAS operations from the Intel Math Kernel Library (MKL)
7   wherever possible.
8 */
9 
10 #include <../src/mat/impls/aij/seq/aij.h>
11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12 #include <mkl_spblas.h>
13 
14 typedef struct {
15   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
16   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
17   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   PetscObjectState    state;
19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21   struct matrix_descr descr;
22 #endif
23 } Mat_SeqAIJMKL;
24 
25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
26 
27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
28 {
29   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30   /* so we will ignore 'MatType type'. */
31   PetscErrorCode ierr;
32   Mat            B       = *newmat;
33 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
34   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
35 #endif
36 
37   PetscFunctionBegin;
38   if (reuse == MAT_INITIAL_MATRIX) {
39     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
40   }
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate               = MatDuplicate_SeqAIJ;
44   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
45   B->ops->destroy                 = MatDestroy_SeqAIJ;
46   B->ops->mult                    = MatMult_SeqAIJ;
47   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
48   B->ops->multadd                 = MatMultAdd_SeqAIJ;
49   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
50   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
51   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
52   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
54   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
55   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
56 
57   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
58 
59 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
60   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
61    * simply involves destroying the MKL sparse matrix handle and then freeing
62    * the spptr pointer. */
63   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
64 
65   if (aijmkl->sparse_optimized) {
66     sparse_status_t stat;
67     stat = mkl_sparse_destroy(aijmkl->csrA);
68     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize()");
69   }
70 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
71   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
72 
73   /* Change the type of B to MATSEQAIJ. */
74   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
75 
76   *newmat = B;
77   PetscFunctionReturn(0);
78 }
79 
80 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
81 {
82   PetscErrorCode ierr;
83   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
84 
85   PetscFunctionBegin;
86 
87   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
88   if (aijmkl) {
89     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
90 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
91     if (aijmkl->sparse_optimized) {
92       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
93       stat = mkl_sparse_destroy(aijmkl->csrA);
94       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
95     }
96 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
97     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
98   }
99 
100   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
101    * to destroy everything that remains. */
102   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
103   /* Note that I don't call MatSetType().  I believe this is because that
104    * is only to be called when *building* a matrix.  I could be wrong, but
105    * that is how things work for the SuperLU matrix class. */
106   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
107   PetscFunctionReturn(0);
108 }
109 
110 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
111  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
112  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
113  * handle, creates a new one, and then calls mkl_sparse_optimize().
114  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
115  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
116  * an unoptimized matrix handle here. */
117 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
118 {
119 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
120   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
121    * does nothing. We make it callable anyway in this case because it cuts
122    * down on littering the code with #ifdefs. */
123   PetscFunctionBegin;
124   PetscFunctionReturn(0);
125 #else
126   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
127   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
128   PetscInt         m,n;
129   MatScalar        *aa;
130   PetscInt         *aj,*ai;
131   sparse_status_t  stat;
132   PetscErrorCode   ierr;
133 
134   PetscFunctionBegin;
135 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
136   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
137    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
138    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
139    * mkl_sparse_optimize() later. */
140   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
141 #endif
142 
143   if (aijmkl->sparse_optimized) {
144     /* Matrix has been previously assembled and optimized. Must destroy old
145      * matrix handle before running the optimization step again. */
146     stat = mkl_sparse_destroy(aijmkl->csrA);
147     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
148   }
149   aijmkl->sparse_optimized = PETSC_FALSE;
150 
151   /* Now perform the SpMV2 setup and matrix optimization. */
152   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
153   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
154   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
155   m = A->rmap->n;
156   n = A->cmap->n;
157   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
158   aa   = a->a;  /* Nonzero elements stored row-by-row. */
159   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
160   if (a->nz && aa && !A->structure_only) {
161     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
162      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
163     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
164     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle, mkl_sparse_x_create_csr()");
165     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
166     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
167     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
168     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
169     if (!aijmkl->no_SpMV2) {
170       stat = mkl_sparse_optimize(aijmkl->csrA);
171       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize()");
172     }
173     aijmkl->sparse_optimized = PETSC_TRUE;
174     ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
175   } else {
176     aijmkl->csrA = PETSC_NULL;
177   }
178 
179   PetscFunctionReturn(0);
180 #endif
181 }
182 
183 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
184 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
185 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)
186 {
187   PetscErrorCode      ierr;
188   sparse_status_t     stat;
189   sparse_index_base_t indexing;
190   PetscInt            m,n;
191   PetscInt            *aj,*ai,*dummy;
192   MatScalar           *aa;
193   Mat_SeqAIJMKL       *aijmkl;
194 
195   PetscFunctionBegin;
196   if (csrA) {
197     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
198     stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa);
199     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
200     if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
201   } else {
202     aj = ai = PETSC_NULL;
203     aa = PETSC_NULL;
204   }
205 
206   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
207   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
208   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
209    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
210    * they will be destroyed when the MKL handle is destroyed.
211    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
212   if (csrA) {
213     ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);CHKERRQ(ierr);
214   } else {
215     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
216     ierr = MatSetUp(A);CHKERRQ(ierr);
217     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
218     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
219   }
220 
221   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
222    * Now turn it into a MATSEQAIJMKL. */
223   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
224 
225   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
226   aijmkl->csrA = csrA;
227 
228   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
229    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
230    * and just need to be able to run the MKL optimization step. */
231   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
232   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
233   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
234   if (csrA) {
235     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
236     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
237     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
238     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
239   }
240   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
241   PetscFunctionReturn(0);
242 }
243 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
244 
245 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
246  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
247  * MatMatMultNumeric(). */
248 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
249 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
250 {
251   PetscInt            i;
252   PetscInt            nrows,ncols;
253   PetscInt            nz;
254   PetscInt            *ai,*aj,*dummy;
255   PetscScalar         *aa;
256   PetscErrorCode      ierr;
257   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
258   sparse_status_t     stat;
259   sparse_index_base_t indexing;
260 
261   PetscFunctionBegin;
262   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
263   if (!aijmkl->csrA) PetscFunctionReturn(0);
264 
265   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
266   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
267   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
268 
269   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
270    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
271   for (i=0; i<nrows; i++) {
272     nz = ai[i+1] - ai[i];
273     ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr);
274   }
275 
276   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
277   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
278 
279   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
280   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
281    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
282    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
283   aijmkl->sparse_optimized = PETSC_FALSE;
284   PetscFunctionReturn(0);
285 }
286 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
287 
288 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
289 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)
290 {
291   PetscInt            i,j,k;
292   PetscInt            nrows,ncols;
293   PetscInt            nz;
294   PetscInt            *ai,*aj,*dummy;
295   PetscScalar         *aa;
296   PetscErrorCode      ierr;
297   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
298   sparse_status_t     stat;
299   sparse_index_base_t indexing;
300 
301   PetscFunctionBegin;
302   ierr = PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");CHKERRQ(ierr);
303 
304   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
305   if (!aijmkl->csrA) {
306     ierr = PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");CHKERRQ(ierr);
307     PetscFunctionReturn(0);
308   }
309 
310   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
311   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
312   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
313 
314   k = 0;
315   for (i=0; i<nrows; i++) {
316     ierr = PetscViewerASCIIPrintf(viewer,"row %D: ",i);CHKERRQ(ierr);
317     nz = ai[i+1] - ai[i];
318     for (j=0; j<nz; j++) {
319       if (aa) {
320         ierr = PetscViewerASCIIPrintf(viewer,"(%D, %g)  ",aj[k],PetscRealPart(aa[k]));CHKERRQ(ierr);
321       } else {
322         ierr = PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);CHKERRQ(ierr);
323       }
324       k++;
325     }
326     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
327   }
328   PetscFunctionReturn(0);
329 }
330 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
331 
332 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
333 {
334   PetscErrorCode ierr;
335   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
336   Mat_SeqAIJMKL  *aijmkl_dest;
337 
338   PetscFunctionBegin;
339   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
340   aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr;
341   ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr);
342   aijmkl_dest->sparse_optimized = PETSC_FALSE;
343   if (aijmkl->eager_inspection) {
344     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
345   }
346   PetscFunctionReturn(0);
347 }
348 
349 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
350 {
351   PetscErrorCode  ierr;
352   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
353   Mat_SeqAIJMKL   *aijmkl;
354 
355   PetscFunctionBegin;
356   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
357 
358   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
359    * extra information and some different methods, call the AssemblyEnd
360    * routine for a MATSEQAIJ.
361    * I'm not sure if this is the best way to do this, but it avoids
362    * a lot of code duplication. */
363   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
364   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
365 
366   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
367    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
368   aijmkl = (Mat_SeqAIJMKL*)A->spptr;
369   if (aijmkl->eager_inspection) {
370     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
371   }
372 
373   PetscFunctionReturn(0);
374 }
375 
376 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
377 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
378 {
379   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
380   const PetscScalar *x;
381   PetscScalar       *y;
382   const MatScalar   *aa;
383   PetscErrorCode    ierr;
384   PetscInt          m = A->rmap->n;
385   PetscInt          n = A->cmap->n;
386   PetscScalar       alpha = 1.0;
387   PetscScalar       beta = 0.0;
388   const PetscInt    *aj,*ai;
389   char              matdescra[6];
390 
391   /* Variables not in MatMult_SeqAIJ. */
392   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
393 
394   PetscFunctionBegin;
395   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
396   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
397   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
398   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
399   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
400   aa   = a->a;  /* Nonzero elements stored row-by-row. */
401   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
402 
403   /* Call MKL sparse BLAS routine to do the MatMult. */
404   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
405 
406   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
407   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
408   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
409   PetscFunctionReturn(0);
410 }
411 #endif
412 
413 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
414 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
415 {
416   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
417   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
418   const PetscScalar *x;
419   PetscScalar       *y;
420   PetscErrorCode    ierr;
421   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
422   PetscObjectState  state;
423 
424   PetscFunctionBegin;
425 
426   /* If there are no nonzero entries, zero yy and return immediately. */
427   if (!a->nz) {
428     PetscInt i;
429     PetscInt m=A->rmap->n;
430     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
431     for (i=0; i<m; i++) {
432       y[i] = 0.0;
433     }
434     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
435     PetscFunctionReturn(0);
436   }
437 
438   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
439   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
440 
441   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
442    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
443    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
444   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
445   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
446     MatSeqAIJMKL_create_mkl_handle(A);
447   }
448 
449   /* Call MKL SpMV2 executor routine to do the MatMult. */
450   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
451   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
452 
453   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
454   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
455   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
456   PetscFunctionReturn(0);
457 }
458 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
459 
460 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
461 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
462 {
463   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
464   const PetscScalar *x;
465   PetscScalar       *y;
466   const MatScalar   *aa;
467   PetscErrorCode    ierr;
468   PetscInt          m = A->rmap->n;
469   PetscInt          n = A->cmap->n;
470   PetscScalar       alpha = 1.0;
471   PetscScalar       beta = 0.0;
472   const PetscInt    *aj,*ai;
473   char              matdescra[6];
474 
475   /* Variables not in MatMultTranspose_SeqAIJ. */
476   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
477 
478   PetscFunctionBegin;
479   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
480   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
481   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
482   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
483   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
484   aa   = a->a;  /* Nonzero elements stored row-by-row. */
485   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
486 
487   /* Call MKL sparse BLAS routine to do the MatMult. */
488   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
489 
490   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
491   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
492   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
493   PetscFunctionReturn(0);
494 }
495 #endif
496 
497 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
498 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
499 {
500   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
501   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
502   const PetscScalar *x;
503   PetscScalar       *y;
504   PetscErrorCode    ierr;
505   sparse_status_t   stat;
506   PetscObjectState  state;
507 
508   PetscFunctionBegin;
509 
510   /* If there are no nonzero entries, zero yy and return immediately. */
511   if (!a->nz) {
512     PetscInt i;
513     PetscInt n=A->cmap->n;
514     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
515     for (i=0; i<n; i++) {
516       y[i] = 0.0;
517     }
518     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
519     PetscFunctionReturn(0);
520   }
521 
522   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
523   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
524 
525   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
526    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
527    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
528   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
529   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
530     MatSeqAIJMKL_create_mkl_handle(A);
531   }
532 
533   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
534   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
535   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
536 
537   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
538   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
539   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
540   PetscFunctionReturn(0);
541 }
542 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
543 
544 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
545 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
546 {
547   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
548   const PetscScalar *x;
549   PetscScalar       *y,*z;
550   const MatScalar   *aa;
551   PetscErrorCode    ierr;
552   PetscInt          m = A->rmap->n;
553   PetscInt          n = A->cmap->n;
554   const PetscInt    *aj,*ai;
555   PetscInt          i;
556 
557   /* Variables not in MatMultAdd_SeqAIJ. */
558   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
559   PetscScalar       alpha = 1.0;
560   PetscScalar       beta;
561   char              matdescra[6];
562 
563   PetscFunctionBegin;
564   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
565   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
566 
567   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
568   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
569   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
570   aa   = a->a;  /* Nonzero elements stored row-by-row. */
571   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
572 
573   /* Call MKL sparse BLAS routine to do the MatMult. */
574   if (zz == yy) {
575     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
576     beta = 1.0;
577     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
578   } else {
579     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
580      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
581     beta = 0.0;
582     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
583     for (i=0; i<m; i++) {
584       z[i] += y[i];
585     }
586   }
587 
588   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
589   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
590   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
591   PetscFunctionReturn(0);
592 }
593 #endif
594 
595 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
596 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
597 {
598   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
599   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
600   const PetscScalar *x;
601   PetscScalar       *y,*z;
602   PetscErrorCode    ierr;
603   PetscInt          m = A->rmap->n;
604   PetscInt          i;
605 
606   /* Variables not in MatMultAdd_SeqAIJ. */
607   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
608   PetscObjectState  state;
609 
610   PetscFunctionBegin;
611 
612   /* If there are no nonzero entries, set zz = yy and return immediately. */
613   if (!a->nz) {
614     PetscInt i;
615     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
616     for (i=0; i<m; i++) {
617       z[i] = y[i];
618     }
619     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
620     PetscFunctionReturn(0);
621   }
622 
623   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
624   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
625 
626   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
627    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
628    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
629   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
630   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
631     MatSeqAIJMKL_create_mkl_handle(A);
632   }
633 
634   /* Call MKL sparse BLAS routine to do the MatMult. */
635   if (zz == yy) {
636     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
637      * with alpha and beta both set to 1.0. */
638     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
639     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
640   } else {
641     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
642      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
643     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
644     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
645     for (i=0; i<m; i++) {
646       z[i] += y[i];
647     }
648   }
649 
650   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
651   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
652   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
653   PetscFunctionReturn(0);
654 }
655 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
656 
657 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
658 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
659 {
660   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
661   const PetscScalar *x;
662   PetscScalar       *y,*z;
663   const MatScalar   *aa;
664   PetscErrorCode    ierr;
665   PetscInt          m = A->rmap->n;
666   PetscInt          n = A->cmap->n;
667   const PetscInt    *aj,*ai;
668   PetscInt          i;
669 
670   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
671   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
672   PetscScalar       alpha = 1.0;
673   PetscScalar       beta;
674   char              matdescra[6];
675 
676   PetscFunctionBegin;
677   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
678   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
679 
680   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
681   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
682   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
683   aa   = a->a;  /* Nonzero elements stored row-by-row. */
684   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
685 
686   /* Call MKL sparse BLAS routine to do the MatMult. */
687   if (zz == yy) {
688     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
689     beta = 1.0;
690     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
691   } else {
692     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
693      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
694     beta = 0.0;
695     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
696     for (i=0; i<n; i++) {
697       z[i] += y[i];
698     }
699   }
700 
701   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
702   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
703   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 #endif
707 
708 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
709 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
710 {
711   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
712   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
713   const PetscScalar *x;
714   PetscScalar       *y,*z;
715   PetscErrorCode    ierr;
716   PetscInt          n = A->cmap->n;
717   PetscInt          i;
718   PetscObjectState  state;
719 
720   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
721   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
722 
723   PetscFunctionBegin;
724 
725   /* If there are no nonzero entries, set zz = yy and return immediately. */
726   if (!a->nz) {
727     PetscInt i;
728     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
729     for (i=0; i<n; i++) {
730       z[i] = y[i];
731     }
732     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
733     PetscFunctionReturn(0);
734   }
735 
736   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
737   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
738 
739   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
740    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
741    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
742   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
743   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
744     MatSeqAIJMKL_create_mkl_handle(A);
745   }
746 
747   /* Call MKL sparse BLAS routine to do the MatMult. */
748   if (zz == yy) {
749     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
750      * with alpha and beta both set to 1.0. */
751     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
752     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
753   } else {
754     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
755      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
756     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
757     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
758     for (i=0; i<n; i++) {
759       z[i] += y[i];
760     }
761   }
762 
763   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
764   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
765   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
766   PetscFunctionReturn(0);
767 }
768 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
769 
770 /* -------------------------- MatProduct code -------------------------- */
771 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
772 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
773 {
774   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr;
775   sparse_matrix_t     csrA,csrB,csrC;
776   PetscInt            nrows,ncols;
777   PetscErrorCode      ierr;
778   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
779   struct matrix_descr descr_type_gen;
780   PetscObjectState    state;
781 
782   PetscFunctionBegin;
783   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
784    * not handle sparse matrices with zero rows or columns. */
785   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
786   else nrows = A->cmap->N;
787   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
788   else ncols = B->rmap->N;
789 
790   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
791   if (!a->sparse_optimized || a->state != state) {
792     MatSeqAIJMKL_create_mkl_handle(A);
793   }
794   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
795   if (!b->sparse_optimized || b->state != state) {
796     MatSeqAIJMKL_create_mkl_handle(B);
797   }
798   csrA = a->csrA;
799   csrB = b->csrA;
800   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
801 
802   if (csrA && csrB) {
803     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
804     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
805   } else {
806     csrC = PETSC_NULL;
807   }
808 
809   ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);CHKERRQ(ierr);
810 
811   PetscFunctionReturn(0);
812 }
813 
814 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
815 {
816   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
817   sparse_matrix_t     csrA, csrB, csrC;
818   PetscErrorCode      ierr;
819   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
820   struct matrix_descr descr_type_gen;
821   PetscObjectState    state;
822 
823   PetscFunctionBegin;
824   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
825   if (!a->sparse_optimized || a->state != state) {
826     MatSeqAIJMKL_create_mkl_handle(A);
827   }
828   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
829   if (!b->sparse_optimized || b->state != state) {
830     MatSeqAIJMKL_create_mkl_handle(B);
831   }
832   csrA = a->csrA;
833   csrB = b->csrA;
834   csrC = c->csrA;
835   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
836 
837   if (csrA && csrB) {
838     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC);
839     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
840   } else {
841     csrC = PETSC_NULL;
842   }
843 
844   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
845   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
846 
847   PetscFunctionReturn(0);
848 }
849 
850 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
851 {
852   PetscErrorCode ierr;
853 
854   PetscFunctionBegin;
855   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
856   PetscFunctionReturn(0);
857 }
858 
859 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
860 {
861   PetscErrorCode ierr;
862 
863   PetscFunctionBegin;
864   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
865   PetscFunctionReturn(0);
866 }
867 
868 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
869 {
870   PetscErrorCode ierr;
871 
872   PetscFunctionBegin;
873   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
874   PetscFunctionReturn(0);
875 }
876 
877 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
878 {
879   PetscErrorCode ierr;
880 
881   PetscFunctionBegin;
882   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
887 {
888   PetscErrorCode ierr;
889 
890   PetscFunctionBegin;
891   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr);
892   PetscFunctionReturn(0);
893 }
894 
895 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
896 {
897   PetscErrorCode ierr;
898 
899   PetscFunctionBegin;
900   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
905 {
906   PetscErrorCode ierr;
907   Mat_Product    *product = C->product;
908   Mat            A = product->A,B = product->B;
909 
910   PetscFunctionBegin;
911   ierr = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);CHKERRQ(ierr);
912   PetscFunctionReturn(0);
913 }
914 
915 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
916 {
917   PetscErrorCode ierr;
918   Mat_Product    *product = C->product;
919   Mat            A = product->A,B = product->B;
920   PetscReal      fill = product->fill;
921 
922   PetscFunctionBegin;
923   ierr = MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);CHKERRQ(ierr);
924   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
925   PetscFunctionReturn(0);
926 }
927 
928 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C)
929 {
930   Mat                 Ct;
931   Vec                 zeros;
932   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
933   sparse_matrix_t     csrA, csrP, csrC;
934   PetscBool           set, flag;
935   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
936   struct matrix_descr descr_type_sym;
937   PetscObjectState    state;
938   PetscErrorCode      ierr;
939 
940   PetscFunctionBegin;
941   ierr = MatIsSymmetricKnown(A,&set,&flag);CHKERRQ(ierr);
942   if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
943 
944   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
945   if (!a->sparse_optimized || a->state != state) {
946     MatSeqAIJMKL_create_mkl_handle(A);
947   }
948   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
949   if (!p->sparse_optimized || p->state != state) {
950     MatSeqAIJMKL_create_mkl_handle(P);
951   }
952   csrA = a->csrA;
953   csrP = p->csrA;
954   csrC = c->csrA;
955   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
956   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
957   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
958 
959   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
960   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
961   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr()");
962 
963   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
964    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
965    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
966    * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY
967    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
968    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
969    * the full matrix. */
970   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
971   ierr = MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);CHKERRQ(ierr);
972   ierr = MatCreateVecs(C,&zeros,NULL);CHKERRQ(ierr);
973   ierr = VecSetFromOptions(zeros);CHKERRQ(ierr);
974   ierr = VecZeroEntries(zeros);CHKERRQ(ierr);
975   ierr = MatDiagonalSet(Ct,zeros,INSERT_VALUES);CHKERRQ(ierr);
976   ierr = MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
977   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
978   ierr = MatProductCreateWithMat(A,P,PETSC_NULL,C);CHKERRQ(ierr);
979   ierr = MatProductSetType(C,MATPRODUCT_PtAP);CHKERRQ(ierr);
980   ierr = MatSeqAIJMKL_create_mkl_handle(C);CHKERRQ(ierr);
981   ierr = VecDestroy(&zeros);CHKERRQ(ierr);
982   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
983   PetscFunctionReturn(0);
984 }
985 
986 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
987 {
988   Mat_Product         *product = C->product;
989   Mat                 A = product->A,P = product->B;
990   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr;
991   sparse_matrix_t     csrA,csrP,csrC;
992   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
993   struct matrix_descr descr_type_sym;
994   PetscObjectState    state;
995   PetscErrorCode      ierr;
996 
997   PetscFunctionBegin;
998   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
999   if (!a->sparse_optimized || a->state != state) {
1000     MatSeqAIJMKL_create_mkl_handle(A);
1001   }
1002   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
1003   if (!p->sparse_optimized || p->state != state) {
1004     MatSeqAIJMKL_create_mkl_handle(P);
1005   }
1006   csrA = a->csrA;
1007   csrP = p->csrA;
1008   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
1009   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
1010   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
1011 
1012   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
1013   if (csrP && csrA) {
1014     stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL);
1015     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr()");
1016   } else {
1017     csrC = PETSC_NULL;
1018   }
1019 
1020   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
1021    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
1022    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
1023    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
1024    * is guaranteed. */
1025   ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);CHKERRQ(ierr);
1026 
1027   C->ops->productnumeric = MatProductNumeric_PtAP;
1028   PetscFunctionReturn(0);
1029 }
1030 
1031 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
1032 {
1033   PetscFunctionBegin;
1034   C->ops->productsymbolic = MatProductSymbolic_AB;
1035   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
1040 {
1041   PetscFunctionBegin;
1042   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
1043   PetscFunctionReturn(0);
1044 }
1045 
1046 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
1047 {
1048   PetscFunctionBegin;
1049   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
1050   C->ops->productsymbolic          = MatProductSymbolic_ABt;
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
1055 {
1056   PetscErrorCode ierr;
1057   Mat_Product    *product = C->product;
1058   Mat            A = product->A;
1059   PetscBool      set, flag;
1060 
1061   PetscFunctionBegin;
1062   if (PetscDefined(USE_COMPLEX)) {
1063     /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
1064      * We do this in several other locations in this file. This works for the time being, but these
1065      * routines are considered unsafe and may be removed from the MatProduct code in the future.
1066      * TODO: Add proper MATSEQAIJMKL implementations */
1067     C->ops->productsymbolic = NULL;
1068   } else {
1069     /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
1070     ierr = MatIsSymmetricKnown(A,&set,&flag);CHKERRQ(ierr);
1071     if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1072     else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
1073     /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
1074      * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
1075   }
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
1080 {
1081   PetscFunctionBegin;
1082   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
1087 {
1088   PetscFunctionBegin;
1089   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
1094 {
1095   PetscErrorCode ierr;
1096   Mat_Product    *product = C->product;
1097 
1098   PetscFunctionBegin;
1099   switch (product->type) {
1100   case MATPRODUCT_AB:
1101     ierr = MatProductSetFromOptions_SeqAIJMKL_AB(C);CHKERRQ(ierr);
1102     break;
1103   case MATPRODUCT_AtB:
1104     ierr = MatProductSetFromOptions_SeqAIJMKL_AtB(C);CHKERRQ(ierr);
1105     break;
1106   case MATPRODUCT_ABt:
1107     ierr = MatProductSetFromOptions_SeqAIJMKL_ABt(C);CHKERRQ(ierr);
1108     break;
1109   case MATPRODUCT_PtAP:
1110     ierr = MatProductSetFromOptions_SeqAIJMKL_PtAP(C);CHKERRQ(ierr);
1111     break;
1112   case MATPRODUCT_RARt:
1113     ierr = MatProductSetFromOptions_SeqAIJMKL_RARt(C);CHKERRQ(ierr);
1114     break;
1115   case MATPRODUCT_ABC:
1116     ierr = MatProductSetFromOptions_SeqAIJMKL_ABC(C);CHKERRQ(ierr);
1117     break;
1118   default:
1119     break;
1120   }
1121   PetscFunctionReturn(0);
1122 }
1123 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
1124 /* ------------------------ End MatProduct code ------------------------ */
1125 
1126 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
1127  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
1128  * routine, but can also be used to convert an assembled SeqAIJ matrix
1129  * into a SeqAIJMKL one. */
1130 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
1131 {
1132   PetscErrorCode ierr;
1133   Mat            B = *newmat;
1134   Mat_SeqAIJMKL  *aijmkl;
1135   PetscBool      set;
1136   PetscBool      sametype;
1137 
1138   PetscFunctionBegin;
1139   if (reuse == MAT_INITIAL_MATRIX) {
1140     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
1141   }
1142 
1143   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
1144   if (sametype) PetscFunctionReturn(0);
1145 
1146   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
1147   B->spptr = (void*) aijmkl;
1148 
1149   /* Set function pointers for methods that we inherit from AIJ but override.
1150    * We also parse some command line options below, since those determine some of the methods we point to. */
1151   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
1152   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
1153   B->ops->destroy          = MatDestroy_SeqAIJMKL;
1154 
1155   aijmkl->sparse_optimized = PETSC_FALSE;
1156 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1157   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
1158 #else
1159   aijmkl->no_SpMV2 = PETSC_TRUE;
1160 #endif
1161   aijmkl->eager_inspection = PETSC_FALSE;
1162 
1163   /* Parse command line options. */
1164   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
1165   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
1166   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
1167   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1168 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1169   if (!aijmkl->no_SpMV2) {
1170     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");CHKERRQ(ierr);
1171     aijmkl->no_SpMV2 = PETSC_TRUE;
1172   }
1173 #endif
1174 
1175 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1176   B->ops->mult                    = MatMult_SeqAIJMKL_SpMV2;
1177   B->ops->multtranspose           = MatMultTranspose_SeqAIJMKL_SpMV2;
1178   B->ops->multadd                 = MatMultAdd_SeqAIJMKL_SpMV2;
1179   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1180 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1181   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1182   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1183   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1184   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1185   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1186 #   if !defined(PETSC_USE_COMPLEX)
1187   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1188 #   else
1189   B->ops->ptapnumeric             = NULL;
1190 #   endif
1191 # endif
1192 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1193 
1194 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1195   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1196    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1197    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1198    * versions in which the older interface has not been deprecated, we use the old interface. */
1199   if (aijmkl->no_SpMV2) {
1200     B->ops->mult             = MatMult_SeqAIJMKL;
1201     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1202     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1203     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1204   }
1205 #endif
1206 
1207   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
1208 
1209   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
1210   *newmat = B;
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 /*@C
1215    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1216    This type inherits from AIJ and is largely identical, but uses sparse BLAS
1217    routines from Intel MKL whenever possible.
1218    If the installed version of MKL supports the "SpMV2" sparse
1219    inspector-executor routines, then those are used by default.
1220    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1221    symmetric A) operations are currently supported.
1222    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
1223 
1224    Collective
1225 
1226    Input Parameters:
1227 +  comm - MPI communicator, set to PETSC_COMM_SELF
1228 .  m - number of rows
1229 .  n - number of columns
1230 .  nz - number of nonzeros per row (same for all rows)
1231 -  nnz - array containing the number of nonzeros in the various rows
1232          (possibly different for each row) or NULL
1233 
1234    Output Parameter:
1235 .  A - the matrix
1236 
1237    Options Database Keys:
1238 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1239 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1240 
1241    Notes:
1242    If nnz is given then nz is ignored
1243 
1244    Level: intermediate
1245 
1246 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1247 @*/
1248 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1249 {
1250   PetscErrorCode ierr;
1251 
1252   PetscFunctionBegin;
1253   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1254   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1255   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
1256   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1261 {
1262   PetscErrorCode ierr;
1263 
1264   PetscFunctionBegin;
1265   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1266   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
1267   PetscFunctionReturn(0);
1268 }
1269