xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision db04c2a0c0f07ba9eda1fcdd8ad55f8a122c8773)
1 
2 /*
3   Defines basic operations for the MATSEQAIJMKL matrix class.
4   This class is derived from the MATSEQAIJ class and retains the
5   compressed row storage (aka Yale sparse matrix format) but uses
6   sparse BLAS operations from the Intel Math Kernel Library (MKL)
7   wherever possible.
8 */
9 
10 #include <../src/mat/impls/aij/seq/aij.h>
11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12 #include <mkl_spblas.h>
13 
14 typedef struct {
15   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
16   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
17   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   PetscObjectState    state;
19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21   struct matrix_descr descr;
22 #endif
23 } Mat_SeqAIJMKL;
24 
25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
26 
27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
28 {
29   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30   /* so we will ignore 'MatType type'. */
31   PetscErrorCode ierr;
32   Mat            B       = *newmat;
33 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
34   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
35 #endif
36 
37   PetscFunctionBegin;
38   if (reuse == MAT_INITIAL_MATRIX) {
39     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
40   }
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate               = MatDuplicate_SeqAIJ;
44   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
45   B->ops->destroy                 = MatDestroy_SeqAIJ;
46   B->ops->mult                    = MatMult_SeqAIJ;
47   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
48   B->ops->multadd                 = MatMultAdd_SeqAIJ;
49   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
50   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
51   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
52   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
54   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
55   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
56 
57   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
58 
59 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
60   if (!aijmkl->no_SpMV2) {
61 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
62     ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
63 #endif
64   }
65 
66   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
67    * simply involves destroying the MKL sparse matrix handle and then freeing
68    * the spptr pointer. */
69   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
70 
71   if (aijmkl->sparse_optimized) {
72     sparse_status_t stat;
73     stat = mkl_sparse_destroy(aijmkl->csrA);
74     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize()");
75   }
76 #endif
77   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
78 
79   /* Change the type of B to MATSEQAIJ. */
80   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
81 
82   *newmat = B;
83   PetscFunctionReturn(0);
84 }
85 
86 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
87 {
88   PetscErrorCode ierr;
89   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
90 
91   PetscFunctionBegin;
92 
93   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
94    * spptr pointer. */
95   if (aijmkl) {
96     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
97 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
98     if (aijmkl->sparse_optimized) {
99       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
100       stat = mkl_sparse_destroy(aijmkl->csrA);
101       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
102     }
103 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
104     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
105   }
106 
107   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
108    * to destroy everything that remains. */
109   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
110   /* Note that I don't call MatSetType().  I believe this is because that
111    * is only to be called when *building* a matrix.  I could be wrong, but
112    * that is how things work for the SuperLU matrix class. */
113   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
114   PetscFunctionReturn(0);
115 }
116 
117 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
118  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
119  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
120  * handle, creates a new one, and then calls mkl_sparse_optimize().
121  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
122  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
123  * an unoptimized matrix handle here. */
124 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
125 {
126 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
127   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
128    * does nothing. We make it callable anyway in this case because it cuts
129    * down on littering the code with #ifdefs. */
130   PetscFunctionBegin;
131   PetscFunctionReturn(0);
132 #else
133   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
134   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
135   PetscInt         m,n;
136   MatScalar        *aa;
137   PetscInt         *aj,*ai;
138   sparse_status_t  stat;
139   PetscErrorCode   ierr;
140 
141   PetscFunctionBegin;
142 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
143   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
144    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
145    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
146    * mkl_sparse_optimize() later. */
147   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
148 #endif
149 
150   if (aijmkl->sparse_optimized) {
151     /* Matrix has been previously assembled and optimized. Must destroy old
152      * matrix handle before running the optimization step again. */
153     stat = mkl_sparse_destroy(aijmkl->csrA);
154     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
155   }
156   aijmkl->sparse_optimized = PETSC_FALSE;
157 
158   /* Now perform the SpMV2 setup and matrix optimization. */
159   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
160   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
161   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
162   m = A->rmap->n;
163   n = A->cmap->n;
164   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
165   aa   = a->a;  /* Nonzero elements stored row-by-row. */
166   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
167   if (a->nz && aa && !A->structure_only) {
168     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
169      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
170     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
171     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle, mkl_sparse_x_create_csr()");
172     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
173     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
174     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
175     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
176     if (!aijmkl->no_SpMV2) {
177       stat = mkl_sparse_optimize(aijmkl->csrA);
178       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize()");
179     }
180     aijmkl->sparse_optimized = PETSC_TRUE;
181     ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
182   } else {
183     aijmkl->csrA = PETSC_NULL;
184   }
185 
186   PetscFunctionReturn(0);
187 #endif
188 }
189 
190 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
191 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
192 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)
193 {
194   PetscErrorCode      ierr;
195   sparse_status_t     stat;
196   sparse_index_base_t indexing;
197   PetscInt            m,n;
198   PetscInt            *aj,*ai,*dummy;
199   MatScalar           *aa;
200   Mat_SeqAIJMKL       *aijmkl;
201 
202   if (csrA) {
203     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
204     stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa);
205     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
206     if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
207   } else {
208     aj = ai = PETSC_NULL;
209     aa = PETSC_NULL;
210   }
211 
212   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
213   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
214   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
215    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
216    * they will be destroyed when the MKL handle is destroyed.
217    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
218   if (csrA) {
219     ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);CHKERRQ(ierr);
220   } else {
221     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
222     ierr = MatSetUp(A);CHKERRQ(ierr);
223     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
224     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
225   }
226 
227   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
228    * Now turn it into a MATSEQAIJMKL. */
229   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
230 
231   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
232   aijmkl->csrA = csrA;
233 
234   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
235    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
236    * and just need to be able to run the MKL optimization step. */
237   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
238   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
239   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
240   if (csrA) {
241     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
242     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
243     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
244     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
245   }
246   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
250 
251 
252 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
253  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
254  * MatMatMultNumeric(). */
255 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
256 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
257 {
258   PetscInt            i;
259   PetscInt            nrows,ncols;
260   PetscInt            nz;
261   PetscInt            *ai,*aj,*dummy;
262   PetscScalar         *aa;
263   PetscErrorCode      ierr;
264   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
265   sparse_status_t     stat;
266   sparse_index_base_t indexing;
267 
268   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
269   if (!aijmkl->csrA) PetscFunctionReturn(0);
270 
271   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
272   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
273   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
274 
275   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
276    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
277   for (i=0; i<nrows; i++) {
278     nz = ai[i+1] - ai[i];
279     ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr);
280   }
281 
282   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
283   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
284 
285   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
286   /* We mark our matrix as having a valid, optimized MKL handle.
287    * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */
288   aijmkl->sparse_optimized = PETSC_TRUE;
289   PetscFunctionReturn(0);
290 }
291 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
292 
293 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
294 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)
295 {
296   PetscInt            i,j,k;
297   PetscInt            nrows,ncols;
298   PetscInt            nz;
299   PetscInt            *ai,*aj,*dummy;
300   PetscScalar         *aa;
301   PetscErrorCode      ierr;
302   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
303   sparse_status_t     stat;
304   sparse_index_base_t indexing;
305 
306   ierr = PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");CHKERRQ(ierr);
307 
308   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
309   if (!aijmkl->csrA) {
310     ierr = PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");CHKERRQ(ierr);
311     PetscFunctionReturn(0);
312   }
313 
314   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
315   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
316   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
317 
318   k = 0;
319   for (i=0; i<nrows; i++) {
320     ierr = PetscViewerASCIIPrintf(viewer,"row %D: ",i);CHKERRQ(ierr);
321     nz = ai[i+1] - ai[i];
322     for (j=0; j<nz; j++) {
323       if (aa) {
324         ierr = PetscViewerASCIIPrintf(viewer,"(%D, %g)  ",aj[k],aa[k]);CHKERRQ(ierr);
325       } else {
326         ierr = PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);CHKERRQ(ierr);
327       }
328       k++;
329     }
330     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
331   }
332   PetscFunctionReturn(0);
333 }
334 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
335 
336 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
337 {
338   PetscErrorCode ierr;
339   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
340   Mat_SeqAIJMKL  *aijmkl_dest;
341 
342   PetscFunctionBegin;
343   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
344   aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr;
345   ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr);
346   aijmkl_dest->sparse_optimized = PETSC_FALSE;
347   if (aijmkl->eager_inspection) {
348     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
349   }
350   PetscFunctionReturn(0);
351 }
352 
353 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
354 {
355   PetscErrorCode  ierr;
356   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
357   Mat_SeqAIJMKL   *aijmkl;
358 
359   PetscFunctionBegin;
360   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
361 
362   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
363    * extra information and some different methods, call the AssemblyEnd
364    * routine for a MATSEQAIJ.
365    * I'm not sure if this is the best way to do this, but it avoids
366    * a lot of code duplication. */
367   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
368   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
369 
370   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
371    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
372   aijmkl = (Mat_SeqAIJMKL*)A->spptr;
373   if (aijmkl->eager_inspection) {
374     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
375   }
376 
377   PetscFunctionReturn(0);
378 }
379 
380 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
381 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
382 {
383   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
384   const PetscScalar *x;
385   PetscScalar       *y;
386   const MatScalar   *aa;
387   PetscErrorCode    ierr;
388   PetscInt          m = A->rmap->n;
389   PetscInt          n = A->cmap->n;
390   PetscScalar       alpha = 1.0;
391   PetscScalar       beta = 0.0;
392   const PetscInt    *aj,*ai;
393   char              matdescra[6];
394 
395 
396   /* Variables not in MatMult_SeqAIJ. */
397   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
398 
399   PetscFunctionBegin;
400   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
401   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
402   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
403   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
404   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
405   aa   = a->a;  /* Nonzero elements stored row-by-row. */
406   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
407 
408   /* Call MKL sparse BLAS routine to do the MatMult. */
409   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
410 
411   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
412   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
413   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416 #endif
417 
418 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
419 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
420 {
421   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
422   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
423   const PetscScalar *x;
424   PetscScalar       *y;
425   PetscErrorCode    ierr;
426   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
427   PetscObjectState  state;
428 
429   PetscFunctionBegin;
430 
431   /* If there are no nonzero entries, zero yy and return immediately. */
432   if(!a->nz) {
433     PetscInt i;
434     PetscInt m=A->rmap->n;
435     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
436     for (i=0; i<m; i++) {
437       y[i] = 0.0;
438     }
439     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
440     PetscFunctionReturn(0);
441   }
442 
443   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
444   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
445 
446   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
447    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
448    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
449   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
450   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
451     MatSeqAIJMKL_create_mkl_handle(A);
452   }
453 
454   /* Call MKL SpMV2 executor routine to do the MatMult. */
455   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
456   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
457 
458   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
459   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
460   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
464 
465 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
466 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
467 {
468   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
469   const PetscScalar *x;
470   PetscScalar       *y;
471   const MatScalar   *aa;
472   PetscErrorCode    ierr;
473   PetscInt          m = A->rmap->n;
474   PetscInt          n = A->cmap->n;
475   PetscScalar       alpha = 1.0;
476   PetscScalar       beta = 0.0;
477   const PetscInt    *aj,*ai;
478   char              matdescra[6];
479 
480   /* Variables not in MatMultTranspose_SeqAIJ. */
481   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
482 
483   PetscFunctionBegin;
484   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
485   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
486   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
487   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
488   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
489   aa   = a->a;  /* Nonzero elements stored row-by-row. */
490   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
491 
492   /* Call MKL sparse BLAS routine to do the MatMult. */
493   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
494 
495   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
496   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
497   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
498   PetscFunctionReturn(0);
499 }
500 #endif
501 
502 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
503 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
504 {
505   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
506   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
507   const PetscScalar *x;
508   PetscScalar       *y;
509   PetscErrorCode    ierr;
510   sparse_status_t   stat;
511   PetscObjectState  state;
512 
513   PetscFunctionBegin;
514 
515   /* If there are no nonzero entries, zero yy and return immediately. */
516   if(!a->nz) {
517     PetscInt i;
518     PetscInt n=A->cmap->n;
519     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
520     for (i=0; i<n; i++) {
521       y[i] = 0.0;
522     }
523     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
524     PetscFunctionReturn(0);
525   }
526 
527   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
528   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
529 
530   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
531    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
532    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
533   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
534   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
535     MatSeqAIJMKL_create_mkl_handle(A);
536   }
537 
538   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
539   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
540   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
541 
542   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
543   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
544   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
545   PetscFunctionReturn(0);
546 }
547 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
548 
549 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
550 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
551 {
552   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
553   const PetscScalar *x;
554   PetscScalar       *y,*z;
555   const MatScalar   *aa;
556   PetscErrorCode    ierr;
557   PetscInt          m = A->rmap->n;
558   PetscInt          n = A->cmap->n;
559   const PetscInt    *aj,*ai;
560   PetscInt          i;
561 
562   /* Variables not in MatMultAdd_SeqAIJ. */
563   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
564   PetscScalar       alpha = 1.0;
565   PetscScalar       beta;
566   char              matdescra[6];
567 
568   PetscFunctionBegin;
569   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
570   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
571 
572   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
573   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
574   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
575   aa   = a->a;  /* Nonzero elements stored row-by-row. */
576   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
577 
578   /* Call MKL sparse BLAS routine to do the MatMult. */
579   if (zz == yy) {
580     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
581     beta = 1.0;
582     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
583   } else {
584     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
585      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
586     beta = 0.0;
587     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
588     for (i=0; i<m; i++) {
589       z[i] += y[i];
590     }
591   }
592 
593   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
594   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
595   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
596   PetscFunctionReturn(0);
597 }
598 #endif
599 
600 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
601 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
602 {
603   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
604   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
605   const PetscScalar *x;
606   PetscScalar       *y,*z;
607   PetscErrorCode    ierr;
608   PetscInt          m = A->rmap->n;
609   PetscInt          i;
610 
611   /* Variables not in MatMultAdd_SeqAIJ. */
612   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
613   PetscObjectState  state;
614 
615   PetscFunctionBegin;
616 
617   /* If there are no nonzero entries, set zz = yy and return immediately. */
618   if(!a->nz) {
619     PetscInt i;
620     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
621     for (i=0; i<m; i++) {
622       z[i] = y[i];
623     }
624     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
625     PetscFunctionReturn(0);
626   }
627 
628   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
629   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
630 
631   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
632    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
633    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
634   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
635   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
636     MatSeqAIJMKL_create_mkl_handle(A);
637   }
638 
639   /* Call MKL sparse BLAS routine to do the MatMult. */
640   if (zz == yy) {
641     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
642      * with alpha and beta both set to 1.0. */
643     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
644     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
645   } else {
646     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
647      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
648     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
649     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
650     for (i=0; i<m; i++) {
651       z[i] += y[i];
652     }
653   }
654 
655   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
656   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
657   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
658   PetscFunctionReturn(0);
659 }
660 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
661 
662 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
663 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
664 {
665   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
666   const PetscScalar *x;
667   PetscScalar       *y,*z;
668   const MatScalar   *aa;
669   PetscErrorCode    ierr;
670   PetscInt          m = A->rmap->n;
671   PetscInt          n = A->cmap->n;
672   const PetscInt    *aj,*ai;
673   PetscInt          i;
674 
675   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
676   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
677   PetscScalar       alpha = 1.0;
678   PetscScalar       beta;
679   char              matdescra[6];
680 
681   PetscFunctionBegin;
682   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
683   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
684 
685   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
686   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
687   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
688   aa   = a->a;  /* Nonzero elements stored row-by-row. */
689   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
690 
691   /* Call MKL sparse BLAS routine to do the MatMult. */
692   if (zz == yy) {
693     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
694     beta = 1.0;
695     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
696   } else {
697     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
698      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
699     beta = 0.0;
700     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
701     for (i=0; i<n; i++) {
702       z[i] += y[i];
703     }
704   }
705 
706   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
707   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
708   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 #endif
712 
713 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
714 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
715 {
716   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
717   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
718   const PetscScalar *x;
719   PetscScalar       *y,*z;
720   PetscErrorCode    ierr;
721   PetscInt          n = A->cmap->n;
722   PetscInt          i;
723   PetscObjectState  state;
724 
725   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
726   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
727 
728   PetscFunctionBegin;
729 
730   /* If there are no nonzero entries, set zz = yy and return immediately. */
731   if(!a->nz) {
732     PetscInt i;
733     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
734     for (i=0; i<n; i++) {
735       z[i] = y[i];
736     }
737     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
738     PetscFunctionReturn(0);
739   }
740 
741   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
742   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
743 
744   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
745    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
746    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
747   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
748   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
749     MatSeqAIJMKL_create_mkl_handle(A);
750   }
751 
752   /* Call MKL sparse BLAS routine to do the MatMult. */
753   if (zz == yy) {
754     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
755      * with alpha and beta both set to 1.0. */
756     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
757     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
758   } else {
759     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
760      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
761     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
762     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
763     for (i=0; i<n; i++) {
764       z[i] += y[i];
765     }
766   }
767 
768   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
769   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
770   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
771   PetscFunctionReturn(0);
772 }
773 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
774 
775 /* -------------------------- MatProduct code -------------------------- */
776 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
777 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
778 {
779   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr;
780   sparse_matrix_t     csrA,csrB,csrC;
781   PetscInt            nrows,ncols;
782   PetscErrorCode      ierr;
783   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
784   struct matrix_descr descr_type_gen;
785   PetscObjectState    state;
786 
787   PetscFunctionBegin;
788   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
789    * not handle sparse matrices with zero rows or columns. */
790   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
791   else nrows = A->cmap->N;
792   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
793   else ncols = B->rmap->N;
794 
795   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
796   if (!a->sparse_optimized || a->state != state) {
797     MatSeqAIJMKL_create_mkl_handle(A);
798   }
799   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
800   if (!b->sparse_optimized || b->state != state) {
801     MatSeqAIJMKL_create_mkl_handle(B);
802   }
803   csrA = a->csrA;
804   csrB = b->csrA;
805   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
806 
807   if (csrA && csrB) {
808     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
809     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
810   } else {
811     csrC = PETSC_NULL;
812   }
813 
814   ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);CHKERRQ(ierr);
815 
816   PetscFunctionReturn(0);
817 }
818 
819 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
820 {
821   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
822   sparse_matrix_t     csrA, csrB, csrC;
823   PetscErrorCode      ierr;
824   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
825   struct matrix_descr descr_type_gen;
826   PetscObjectState    state;
827 
828   PetscFunctionBegin;
829   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
830   if (!a->sparse_optimized || a->state != state) {
831     MatSeqAIJMKL_create_mkl_handle(A);
832   }
833   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
834   if (!b->sparse_optimized || b->state != state) {
835     MatSeqAIJMKL_create_mkl_handle(B);
836   }
837   csrA = a->csrA;
838   csrB = b->csrA;
839   csrC = c->csrA;
840   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
841 
842   if (csrA && csrB) {
843     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC);
844     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
845   } else {
846     csrC = PETSC_NULL;
847   }
848 
849   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
850   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
851 
852   PetscFunctionReturn(0);
853 }
854 
855 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
856 {
857   PetscErrorCode ierr;
858 
859   PetscFunctionBegin;
860   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
861   PetscFunctionReturn(0);
862 }
863 
864 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
865 {
866   PetscErrorCode ierr;
867 
868   PetscFunctionBegin;
869   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
874 {
875   PetscErrorCode ierr;
876 
877   PetscFunctionBegin;
878   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
879   PetscFunctionReturn(0);
880 }
881 
882 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
883 {
884   PetscErrorCode ierr;
885 
886   PetscFunctionBegin;
887   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
888   PetscFunctionReturn(0);
889 }
890 
891 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
892 {
893   PetscErrorCode ierr;
894 
895   PetscFunctionBegin;
896   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr);
897   PetscFunctionReturn(0);
898 }
899 
900 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
901 {
902   PetscErrorCode ierr;
903 
904   PetscFunctionBegin;
905   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr);
906   PetscFunctionReturn(0);
907 }
908 
909 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
910 {
911   PetscErrorCode ierr;
912   Mat_Product    *product = C->product;
913   Mat            A = product->A,B = product->B;
914 
915   PetscFunctionBegin;
916   ierr = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);CHKERRQ(ierr);
917   PetscFunctionReturn(0);
918 }
919 
920 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
921 {
922   PetscErrorCode ierr;
923   Mat_Product    *product = C->product;
924   Mat            A = product->A,B = product->B;
925   PetscReal      fill = product->fill;
926 
927   PetscFunctionBegin;
928   ierr = MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);CHKERRQ(ierr);
929   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
930   PetscFunctionReturn(0);
931 }
932 
933 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C)
934 {
935   Mat                 Ct;
936   Vec                 zeros;
937   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
938   sparse_matrix_t     csrA, csrP, csrC;
939   PetscBool           set, flag;
940   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
941   struct matrix_descr descr_type_sym;
942   PetscObjectState    state;
943   PetscErrorCode      ierr;
944 
945   PetscFunctionBegin;
946   ierr = MatIsSymmetricKnown(A,&set,&flag);
947   if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
948 
949   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
950   if (!a->sparse_optimized || a->state != state) {
951     MatSeqAIJMKL_create_mkl_handle(A);
952   }
953   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
954   if (!p->sparse_optimized || p->state != state) {
955     MatSeqAIJMKL_create_mkl_handle(P);
956   }
957   csrA = a->csrA;
958   csrP = p->csrA;
959   csrC = c->csrA;
960   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
961   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
962   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
963 
964   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
965   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
966   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr()");
967 
968   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
969    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
970    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
971    * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY
972    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
973    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
974    * the full matrix. */
975   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
976   ierr = MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);CHKERRQ(ierr);
977   ierr = MatCreateVecs(C,&zeros,NULL);CHKERRQ(ierr);
978   ierr = VecSetFromOptions(zeros);CHKERRQ(ierr);
979   ierr = VecZeroEntries(zeros);CHKERRQ(ierr);
980   ierr = MatDiagonalSet(Ct,zeros,INSERT_VALUES);CHKERRQ(ierr);
981   ierr = MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
982   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
983   ierr = MatProductCreateWithMat(A,P,PETSC_NULL,C);CHKERRQ(ierr);
984   ierr = MatProductSetType(C,MATPRODUCT_PtAP);CHKERRQ(ierr);
985   ierr = MatSeqAIJMKL_create_mkl_handle(C);CHKERRQ(ierr);
986   ierr = VecDestroy(&zeros);CHKERRQ(ierr);
987   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
988   PetscFunctionReturn(0);
989 }
990 
991 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
992 {
993   Mat_Product         *product = C->product;
994   Mat                 A = product->A,P = product->B;
995   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr;
996   sparse_matrix_t     csrA,csrP,csrC;
997   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
998   struct matrix_descr descr_type_sym;
999   PetscObjectState    state;
1000   PetscErrorCode      ierr;
1001 
1002   PetscFunctionBegin;
1003   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
1004   if (!a->sparse_optimized || a->state != state) {
1005     MatSeqAIJMKL_create_mkl_handle(A);
1006   }
1007   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
1008   if (!p->sparse_optimized || p->state != state) {
1009     MatSeqAIJMKL_create_mkl_handle(P);
1010   }
1011   csrA = a->csrA;
1012   csrP = p->csrA;
1013   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
1014   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
1015   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
1016 
1017   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
1018   if (csrP && csrA) {
1019     stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL);
1020     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr()");
1021   } else {
1022     csrC = PETSC_NULL;
1023   }
1024 
1025   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
1026    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
1027    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
1028    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
1029    * is guaranteed. */
1030   ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);CHKERRQ(ierr);
1031 
1032   C->ops->productnumeric = MatProductNumeric_PtAP;
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
1037 {
1038   PetscFunctionBegin;
1039   C->ops->productsymbolic = MatProductSymbolic_AB;
1040   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
1045 {
1046   PetscFunctionBegin;
1047   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
1052 {
1053   PetscFunctionBegin;
1054   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
1055   C->ops->productsymbolic          = MatProductSymbolic_ABt;
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
1060 {
1061   PetscErrorCode ierr;
1062   Mat_Product    *product = C->product;
1063   Mat            A = product->A;
1064   PetscBool      set, flag;
1065 
1066   PetscFunctionBegin;
1067 #if defined(PETSC_USE_COMPLEX)
1068   /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Basic() will be used.
1069    * We do this in several other locations in this file. This works for the time being, but the _Basic()
1070    * routines are considered unsafe and may be removed from the MatProduct code in the future.
1071    * TODO: Add proper MATSEQAIJMKL implementations, instead of relying on the _Basic() routines. */
1072   C->ops->productsymbolic = NULL;
1073 #else
1074   /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
1075   ierr = MatIsSymmetricKnown(A,&set,&flag);CHKERRQ(ierr);
1076   if (set && flag) {
1077     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1078     PetscFunctionReturn(0);
1079   } else {
1080     C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1081   }
1082   /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
1083    * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
1084 #endif
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
1089 {
1090   PetscFunctionBegin;
1091   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
1096 {
1097   PetscFunctionBegin;
1098   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
1103 {
1104   PetscErrorCode ierr;
1105   Mat_Product    *product = C->product;
1106 
1107   PetscFunctionBegin;
1108   switch (product->type) {
1109   case MATPRODUCT_AB:
1110     ierr = MatProductSetFromOptions_SeqAIJMKL_AB(C);CHKERRQ(ierr);
1111     break;
1112   case MATPRODUCT_AtB:
1113     ierr = MatProductSetFromOptions_SeqAIJMKL_AtB(C);CHKERRQ(ierr);
1114     break;
1115   case MATPRODUCT_ABt:
1116     ierr = MatProductSetFromOptions_SeqAIJMKL_ABt(C);CHKERRQ(ierr);
1117     break;
1118   case MATPRODUCT_PtAP:
1119     ierr = MatProductSetFromOptions_SeqAIJMKL_PtAP(C);CHKERRQ(ierr);
1120     break;
1121   case MATPRODUCT_RARt:
1122     ierr = MatProductSetFromOptions_SeqAIJMKL_RARt(C);CHKERRQ(ierr);
1123     break;
1124   case MATPRODUCT_ABC:
1125     ierr = MatProductSetFromOptions_SeqAIJMKL_ABC(C);CHKERRQ(ierr);
1126     break;
1127   default:
1128     break;
1129   }
1130   PetscFunctionReturn(0);
1131 }
1132 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
1133 /* ------------------------ End MatProduct code ------------------------ */
1134 
1135 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
1136  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
1137  * routine, but can also be used to convert an assembled SeqAIJ matrix
1138  * into a SeqAIJMKL one. */
1139 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
1140 {
1141   PetscErrorCode ierr;
1142   Mat            B = *newmat;
1143   Mat_SeqAIJMKL  *aijmkl;
1144   PetscBool      set;
1145   PetscBool      sametype;
1146 
1147   PetscFunctionBegin;
1148   if (reuse == MAT_INITIAL_MATRIX) {
1149     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
1150   }
1151 
1152   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
1153   if (sametype) PetscFunctionReturn(0);
1154 
1155   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
1156   B->spptr = (void*) aijmkl;
1157 
1158   /* Set function pointers for methods that we inherit from AIJ but override.
1159    * We also parse some command line options below, since those determine some of the methods we point to. */
1160   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
1161   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
1162   B->ops->destroy          = MatDestroy_SeqAIJMKL;
1163 
1164   aijmkl->sparse_optimized = PETSC_FALSE;
1165 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1166   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
1167 #else
1168   aijmkl->no_SpMV2 = PETSC_TRUE;
1169 #endif
1170   aijmkl->eager_inspection = PETSC_FALSE;
1171 
1172   /* Parse command line options. */
1173   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
1174   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
1175   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
1176   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1177 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1178   if(!aijmkl->no_SpMV2) {
1179     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
1180     aijmkl->no_SpMV2 = PETSC_TRUE;
1181   }
1182 #endif
1183 
1184 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1185   B->ops->mult                    = MatMult_SeqAIJMKL_SpMV2;
1186   B->ops->multtranspose           = MatMultTranspose_SeqAIJMKL_SpMV2;
1187   B->ops->multadd                 = MatMultAdd_SeqAIJMKL_SpMV2;
1188   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1189 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1190   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1191   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1192   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1193   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1194   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1195 #   if !defined(PETSC_USE_COMPLEX)
1196   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1197 #   else
1198   B->ops->ptapnumeric             = NULL;
1199 #   endif
1200 # endif
1201 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1202 
1203 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1204   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1205    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1206    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1207    * versions in which the older interface has not been deprecated, we use the old interface. */
1208   if (aijmkl->no_SpMV2) {
1209     B->ops->mult             = MatMult_SeqAIJMKL;
1210     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1211     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1212     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1213   }
1214 #endif
1215 
1216   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
1217 
1218   if(!aijmkl->no_SpMV2) {
1219 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1220 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1221     ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",MatProductSetFromOptions_SeqAIJMKL);CHKERRQ(ierr);
1222 #endif
1223 #endif
1224   }
1225 
1226   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
1227   *newmat = B;
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 /*@C
1232    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1233    This type inherits from AIJ and is largely identical, but uses sparse BLAS
1234    routines from Intel MKL whenever possible.
1235    If the installed version of MKL supports the "SpMV2" sparse
1236    inspector-executor routines, then those are used by default.
1237    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1238    symmetric A) operations are currently supported.
1239    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
1240 
1241    Collective
1242 
1243    Input Parameters:
1244 +  comm - MPI communicator, set to PETSC_COMM_SELF
1245 .  m - number of rows
1246 .  n - number of columns
1247 .  nz - number of nonzeros per row (same for all rows)
1248 -  nnz - array containing the number of nonzeros in the various rows
1249          (possibly different for each row) or NULL
1250 
1251    Output Parameter:
1252 .  A - the matrix
1253 
1254    Options Database Keys:
1255 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1256 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1257 
1258    Notes:
1259    If nnz is given then nz is ignored
1260 
1261    Level: intermediate
1262 
1263 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1264 @*/
1265 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1266 {
1267   PetscErrorCode ierr;
1268 
1269   PetscFunctionBegin;
1270   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1271   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1272   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
1273   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1278 {
1279   PetscErrorCode ierr;
1280 
1281   PetscFunctionBegin;
1282   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1283   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
1284   PetscFunctionReturn(0);
1285 }
1286