xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 501b8e3370a48794b2be620e06e4ce1b1481277a)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
10 {
11   Mat            B;
12 
13   PetscFunctionBegin;
14   PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&B));
15   PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject)B));
16   PetscCall(MatGetRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done));
17   PetscCall(MatDestroy(&B));
18   PetscFunctionReturn(0);
19 }
20 
21 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
22 {
23   Mat            B;
24 
25   PetscFunctionBegin;
26   PetscCall(PetscObjectQuery((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject*)&B));
27   PetscCall(MatRestoreRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done));
28   PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",NULL));
29   PetscFunctionReturn(0);
30 }
31 
32 /*MC
33    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
34 
35    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
36    and MATMPIAIJ otherwise.  As a result, for single process communicators,
37   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
38   for communicators controlling multiple processes.  It is recommended that you call both of
39   the above preallocation routines for simplicity.
40 
41    Options Database Keys:
42 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
43 
44   Developer Notes:
45     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
46    enough exist.
47 
48   Level: beginner
49 
50 .seealso: `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
51 M*/
52 
53 /*MC
54    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
55 
56    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
57    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
58    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
59   for communicators controlling multiple processes.  It is recommended that you call both of
60   the above preallocation routines for simplicity.
61 
62    Options Database Keys:
63 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
64 
65   Level: beginner
66 
67 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
68 M*/
69 
70 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
71 {
72   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
73 
74   PetscFunctionBegin;
75 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
76   A->boundtocpu = flg;
77 #endif
78   if (a->A) PetscCall(MatBindToCPU(a->A,flg));
79   if (a->B) PetscCall(MatBindToCPU(a->B,flg));
80 
81   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
82    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
83    * to differ from the parent matrix. */
84   if (a->lvec) PetscCall(VecBindToCPU(a->lvec,flg));
85   if (a->diag) PetscCall(VecBindToCPU(a->diag,flg));
86 
87   PetscFunctionReturn(0);
88 }
89 
90 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
91 {
92   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
93 
94   PetscFunctionBegin;
95   if (mat->A) {
96     PetscCall(MatSetBlockSizes(mat->A,rbs,cbs));
97     PetscCall(MatSetBlockSizes(mat->B,rbs,1));
98   }
99   PetscFunctionReturn(0);
100 }
101 
102 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
103 {
104   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
105   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
106   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
107   const PetscInt  *ia,*ib;
108   const MatScalar *aa,*bb,*aav,*bav;
109   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
110   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
111 
112   PetscFunctionBegin;
113   *keptrows = NULL;
114 
115   ia   = a->i;
116   ib   = b->i;
117   PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav));
118   PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav));
119   for (i=0; i<m; i++) {
120     na = ia[i+1] - ia[i];
121     nb = ib[i+1] - ib[i];
122     if (!na && !nb) {
123       cnt++;
124       goto ok1;
125     }
126     aa = aav + ia[i];
127     for (j=0; j<na; j++) {
128       if (aa[j] != 0.0) goto ok1;
129     }
130     bb = bav + ib[i];
131     for (j=0; j <nb; j++) {
132       if (bb[j] != 0.0) goto ok1;
133     }
134     cnt++;
135 ok1:;
136   }
137   PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M)));
138   if (!n0rows) {
139     PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
140     PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
141     PetscFunctionReturn(0);
142   }
143   PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows));
144   cnt  = 0;
145   for (i=0; i<m; i++) {
146     na = ia[i+1] - ia[i];
147     nb = ib[i+1] - ib[i];
148     if (!na && !nb) continue;
149     aa = aav + ia[i];
150     for (j=0; j<na;j++) {
151       if (aa[j] != 0.0) {
152         rows[cnt++] = rstart + i;
153         goto ok2;
154       }
155     }
156     bb = bav + ib[i];
157     for (j=0; j<nb; j++) {
158       if (bb[j] != 0.0) {
159         rows[cnt++] = rstart + i;
160         goto ok2;
161       }
162     }
163 ok2:;
164   }
165   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows));
166   PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
167   PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
172 {
173   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
174   PetscBool         cong;
175 
176   PetscFunctionBegin;
177   PetscCall(MatHasCongruentLayouts(Y,&cong));
178   if (Y->assembled && cong) {
179     PetscCall(MatDiagonalSet(aij->A,D,is));
180   } else {
181     PetscCall(MatDiagonalSet_Default(Y,D,is));
182   }
183   PetscFunctionReturn(0);
184 }
185 
186 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
187 {
188   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
189   PetscInt       i,rstart,nrows,*rows;
190 
191   PetscFunctionBegin;
192   *zrows = NULL;
193   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows));
194   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
195   for (i=0; i<nrows; i++) rows[i] += rstart;
196   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows));
197   PetscFunctionReturn(0);
198 }
199 
200 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
201 {
202   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
203   PetscInt          i,m,n,*garray = aij->garray;
204   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
205   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
206   PetscReal         *work;
207   const PetscScalar *dummy;
208 
209   PetscFunctionBegin;
210   PetscCall(MatGetSize(A,&m,&n));
211   PetscCall(PetscCalloc1(n,&work));
212   PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy));
213   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy));
214   PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy));
215   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy));
216   if (type == NORM_2) {
217     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
218       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
219     }
220     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
221       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
222     }
223   } else if (type == NORM_1) {
224     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
225       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
226     }
227     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
228       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
229     }
230   } else if (type == NORM_INFINITY) {
231     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
232       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
233     }
234     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
235       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
236     }
237   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
238     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
239       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
240     }
241     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
242       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
243     }
244   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
245     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
246       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
247     }
248     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
249       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
250     }
251   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
252   if (type == NORM_INFINITY) {
253     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
254   } else {
255     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
256   }
257   PetscCall(PetscFree(work));
258   if (type == NORM_2) {
259     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
260   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
261     for (i=0; i<n; i++) reductions[i] /= m;
262   }
263   PetscFunctionReturn(0);
264 }
265 
266 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
267 {
268   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
269   IS              sis,gis;
270   const PetscInt  *isis,*igis;
271   PetscInt        n,*iis,nsis,ngis,rstart,i;
272 
273   PetscFunctionBegin;
274   PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis));
275   PetscCall(MatFindNonzeroRows(a->B,&gis));
276   PetscCall(ISGetSize(gis,&ngis));
277   PetscCall(ISGetSize(sis,&nsis));
278   PetscCall(ISGetIndices(sis,&isis));
279   PetscCall(ISGetIndices(gis,&igis));
280 
281   PetscCall(PetscMalloc1(ngis+nsis,&iis));
282   PetscCall(PetscArraycpy(iis,igis,ngis));
283   PetscCall(PetscArraycpy(iis+ngis,isis,nsis));
284   n    = ngis + nsis;
285   PetscCall(PetscSortRemoveDupsInt(&n,iis));
286   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
287   for (i=0; i<n; i++) iis[i] += rstart;
288   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is));
289 
290   PetscCall(ISRestoreIndices(sis,&isis));
291   PetscCall(ISRestoreIndices(gis,&igis));
292   PetscCall(ISDestroy(&sis));
293   PetscCall(ISDestroy(&gis));
294   PetscFunctionReturn(0);
295 }
296 
297 /*
298   Local utility routine that creates a mapping from the global column
299 number to the local number in the off-diagonal part of the local
300 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
301 a slightly higher hash table cost; without it it is not scalable (each processor
302 has an order N integer array but is fast to access.
303 */
304 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
305 {
306   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
307   PetscInt       n = aij->B->cmap->n,i;
308 
309   PetscFunctionBegin;
310   PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
311 #if defined(PETSC_USE_CTABLE)
312   PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap));
313   for (i=0; i<n; i++) {
314     PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES));
315   }
316 #else
317   PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap));
318   PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt)));
319   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
320 #endif
321   PetscFunctionReturn(0);
322 }
323 
324 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
325 { \
326     if (col <= lastcol1)  low1 = 0;     \
327     else                 high1 = nrow1; \
328     lastcol1 = col;\
329     while (high1-low1 > 5) { \
330       t = (low1+high1)/2; \
331       if (rp1[t] > col) high1 = t; \
332       else              low1  = t; \
333     } \
334       for (_i=low1; _i<high1; _i++) { \
335         if (rp1[_i] > col) break; \
336         if (rp1[_i] == col) { \
337           if (addv == ADD_VALUES) { \
338             ap1[_i] += value;   \
339             /* Not sure LogFlops will slow dow the code or not */ \
340             (void)PetscLogFlops(1.0);   \
341            } \
342           else                    ap1[_i] = value; \
343           goto a_noinsert; \
344         } \
345       }  \
346       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
347       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
348       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
349       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
350       N = nrow1++ - 1; a->nz++; high1++; \
351       /* shift up all the later entries in this row */ \
352       PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\
353       PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\
354       rp1[_i] = col;  \
355       ap1[_i] = value;  \
356       A->nonzerostate++;\
357       a_noinsert: ; \
358       ailen[row] = nrow1; \
359 }
360 
361 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
362   { \
363     if (col <= lastcol2) low2 = 0;                        \
364     else high2 = nrow2;                                   \
365     lastcol2 = col;                                       \
366     while (high2-low2 > 5) {                              \
367       t = (low2+high2)/2;                                 \
368       if (rp2[t] > col) high2 = t;                        \
369       else             low2  = t;                         \
370     }                                                     \
371     for (_i=low2; _i<high2; _i++) {                       \
372       if (rp2[_i] > col) break;                           \
373       if (rp2[_i] == col) {                               \
374         if (addv == ADD_VALUES) {                         \
375           ap2[_i] += value;                               \
376           (void)PetscLogFlops(1.0);                       \
377         }                                                 \
378         else                    ap2[_i] = value;          \
379         goto b_noinsert;                                  \
380       }                                                   \
381     }                                                     \
382     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
383     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
384     PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
385     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
386     N = nrow2++ - 1; b->nz++; high2++;                    \
387     /* shift up all the later entries in this row */      \
388     PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\
389     PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\
390     rp2[_i] = col;                                        \
391     ap2[_i] = value;                                      \
392     B->nonzerostate++;                                    \
393     b_noinsert: ;                                         \
394     bilen[row] = nrow2;                                   \
395   }
396 
397 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
398 {
399   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
400   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
401   PetscInt       l,*garray = mat->garray,diag;
402   PetscScalar    *aa,*ba;
403 
404   PetscFunctionBegin;
405   /* code only works for square matrices A */
406 
407   /* find size of row to the left of the diagonal part */
408   PetscCall(MatGetOwnershipRange(A,&diag,NULL));
409   row  = row - diag;
410   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
411     if (garray[b->j[b->i[row]+l]] > diag) break;
412   }
413   if (l) {
414     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
415     PetscCall(PetscArraycpy(ba+b->i[row],v,l));
416     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
417   }
418 
419   /* diagonal part */
420   if (a->i[row+1]-a->i[row]) {
421     PetscCall(MatSeqAIJGetArray(mat->A,&aa));
422     PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row])));
423     PetscCall(MatSeqAIJRestoreArray(mat->A,&aa));
424   }
425 
426   /* right of diagonal part */
427   if (b->i[row+1]-b->i[row]-l) {
428     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
429     PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l));
430     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
431   }
432   PetscFunctionReturn(0);
433 }
434 
435 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
436 {
437   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
438   PetscScalar    value = 0.0;
439   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
440   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
441   PetscBool      roworiented = aij->roworiented;
442 
443   /* Some Variables required in the macro */
444   Mat        A                    = aij->A;
445   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
446   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
447   PetscBool  ignorezeroentries    = a->ignorezeroentries;
448   Mat        B                    = aij->B;
449   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
450   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
451   MatScalar  *aa,*ba;
452   PetscInt   *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
453   PetscInt   nonew;
454   MatScalar  *ap1,*ap2;
455 
456   PetscFunctionBegin;
457   PetscCall(MatSeqAIJGetArray(A,&aa));
458   PetscCall(MatSeqAIJGetArray(B,&ba));
459   for (i=0; i<m; i++) {
460     if (im[i] < 0) continue;
461     PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
462     if (im[i] >= rstart && im[i] < rend) {
463       row      = im[i] - rstart;
464       lastcol1 = -1;
465       rp1      = aj + ai[row];
466       ap1      = aa + ai[row];
467       rmax1    = aimax[row];
468       nrow1    = ailen[row];
469       low1     = 0;
470       high1    = nrow1;
471       lastcol2 = -1;
472       rp2      = bj + bi[row];
473       ap2      = ba + bi[row];
474       rmax2    = bimax[row];
475       nrow2    = bilen[row];
476       low2     = 0;
477       high2    = nrow2;
478 
479       for (j=0; j<n; j++) {
480         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
481         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
482         if (in[j] >= cstart && in[j] < cend) {
483           col   = in[j] - cstart;
484           nonew = a->nonew;
485           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
486         } else if (in[j] < 0) {
487           continue;
488         } else {
489           PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
490           if (mat->was_assembled) {
491             if (!aij->colmap) {
492               PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
493             }
494 #if defined(PETSC_USE_CTABLE)
495             PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */
496             col--;
497 #else
498             col = aij->colmap[in[j]] - 1;
499 #endif
500             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
501               PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */
502               col  =  in[j];
503               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
504               B        = aij->B;
505               b        = (Mat_SeqAIJ*)B->data;
506               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
507               rp2      = bj + bi[row];
508               ap2      = ba + bi[row];
509               rmax2    = bimax[row];
510               nrow2    = bilen[row];
511               low2     = 0;
512               high2    = nrow2;
513               bm       = aij->B->rmap->n;
514               ba       = b->a;
515             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
516               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
517                 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j]));
518               } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
519             }
520           } else col = in[j];
521           nonew = b->nonew;
522           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
523         }
524       }
525     } else {
526       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
527       if (!aij->donotstash) {
528         mat->assembled = PETSC_FALSE;
529         if (roworiented) {
530           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
531         } else {
532           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
533         }
534       }
535     }
536   }
537   PetscCall(MatSeqAIJRestoreArray(A,&aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
538   PetscCall(MatSeqAIJRestoreArray(B,&ba));
539   PetscFunctionReturn(0);
540 }
541 
542 /*
543     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
544     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
545     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
546 */
547 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
548 {
549   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
550   Mat            A           = aij->A; /* diagonal part of the matrix */
551   Mat            B           = aij->B; /* offdiagonal part of the matrix */
552   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
553   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
554   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
555   PetscInt       *ailen      = a->ilen,*aj = a->j;
556   PetscInt       *bilen      = b->ilen,*bj = b->j;
557   PetscInt       am          = aij->A->rmap->n,j;
558   PetscInt       diag_so_far = 0,dnz;
559   PetscInt       offd_so_far = 0,onz;
560 
561   PetscFunctionBegin;
562   /* Iterate over all rows of the matrix */
563   for (j=0; j<am; j++) {
564     dnz = onz = 0;
565     /*  Iterate over all non-zero columns of the current row */
566     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
567       /* If column is in the diagonal */
568       if (mat_j[col] >= cstart && mat_j[col] < cend) {
569         aj[diag_so_far++] = mat_j[col] - cstart;
570         dnz++;
571       } else { /* off-diagonal entries */
572         bj[offd_so_far++] = mat_j[col];
573         onz++;
574       }
575     }
576     ailen[j] = dnz;
577     bilen[j] = onz;
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 /*
583     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
584     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
585     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
586     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
587     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
588 */
589 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
590 {
591   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
592   Mat            A      = aij->A; /* diagonal part of the matrix */
593   Mat            B      = aij->B; /* offdiagonal part of the matrix */
594   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
595   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
596   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
597   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
598   PetscInt       *ailen = a->ilen,*aj = a->j;
599   PetscInt       *bilen = b->ilen,*bj = b->j;
600   PetscInt       am     = aij->A->rmap->n,j;
601   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
602   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
603   PetscScalar    *aa = a->a,*ba = b->a;
604 
605   PetscFunctionBegin;
606   /* Iterate over all rows of the matrix */
607   for (j=0; j<am; j++) {
608     dnz_row = onz_row = 0;
609     rowstart_offd = full_offd_i[j];
610     rowstart_diag = full_diag_i[j];
611     /*  Iterate over all non-zero columns of the current row */
612     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
613       /* If column is in the diagonal */
614       if (mat_j[col] >= cstart && mat_j[col] < cend) {
615         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
616         aa[rowstart_diag+dnz_row] = mat_a[col];
617         dnz_row++;
618       } else { /* off-diagonal entries */
619         bj[rowstart_offd+onz_row] = mat_j[col];
620         ba[rowstart_offd+onz_row] = mat_a[col];
621         onz_row++;
622       }
623     }
624     ailen[j] = dnz_row;
625     bilen[j] = onz_row;
626   }
627   PetscFunctionReturn(0);
628 }
629 
630 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
631 {
632   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
633   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
634   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
635 
636   PetscFunctionBegin;
637   for (i=0; i<m; i++) {
638     if (idxm[i] < 0) continue; /* negative row */
639     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
640     if (idxm[i] >= rstart && idxm[i] < rend) {
641       row = idxm[i] - rstart;
642       for (j=0; j<n; j++) {
643         if (idxn[j] < 0) continue; /* negative column */
644         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
645         if (idxn[j] >= cstart && idxn[j] < cend) {
646           col  = idxn[j] - cstart;
647           PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j));
648         } else {
649           if (!aij->colmap) {
650             PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
651           }
652 #if defined(PETSC_USE_CTABLE)
653           PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col));
654           col--;
655 #else
656           col = aij->colmap[idxn[j]] - 1;
657 #endif
658           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
659           else {
660             PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j));
661           }
662         }
663       }
664     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
665   }
666   PetscFunctionReturn(0);
667 }
668 
669 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
670 {
671   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
672   PetscInt       nstash,reallocs;
673 
674   PetscFunctionBegin;
675   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
676 
677   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
678   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
679   PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
680   PetscFunctionReturn(0);
681 }
682 
683 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
684 {
685   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
686   PetscMPIInt    n;
687   PetscInt       i,j,rstart,ncols,flg;
688   PetscInt       *row,*col;
689   PetscBool      other_disassembled;
690   PetscScalar    *val;
691 
692   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
693 
694   PetscFunctionBegin;
695   if (!aij->donotstash && !mat->nooffprocentries) {
696     while (1) {
697       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
698       if (!flg) break;
699 
700       for (i=0; i<n;) {
701         /* Now identify the consecutive vals belonging to the same row */
702         for (j=i,rstart=row[j]; j<n; j++) {
703           if (row[j] != rstart) break;
704         }
705         if (j < n) ncols = j-i;
706         else       ncols = n-i;
707         /* Now assemble all these values with a single function call */
708         PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
709         i    = j;
710       }
711     }
712     PetscCall(MatStashScatterEnd_Private(&mat->stash));
713   }
714 #if defined(PETSC_HAVE_DEVICE)
715   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
716   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
717   if (mat->boundtocpu) {
718     PetscCall(MatBindToCPU(aij->A,PETSC_TRUE));
719     PetscCall(MatBindToCPU(aij->B,PETSC_TRUE));
720   }
721 #endif
722   PetscCall(MatAssemblyBegin(aij->A,mode));
723   PetscCall(MatAssemblyEnd(aij->A,mode));
724 
725   /* determine if any processor has disassembled, if so we must
726      also disassemble ourself, in order that we may reassemble. */
727   /*
728      if nonzero structure of submatrix B cannot change then we know that
729      no processor disassembled thus we can skip this stuff
730   */
731   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
732     PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat)));
733     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
734       PetscCall(MatDisAssemble_MPIAIJ(mat));
735     }
736   }
737   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
738     PetscCall(MatSetUpMultiply_MPIAIJ(mat));
739   }
740   PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE));
741 #if defined(PETSC_HAVE_DEVICE)
742   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
743 #endif
744   PetscCall(MatAssemblyBegin(aij->B,mode));
745   PetscCall(MatAssemblyEnd(aij->B,mode));
746 
747   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
748 
749   aij->rowvalues = NULL;
750 
751   PetscCall(VecDestroy(&aij->diag));
752 
753   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
754   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
755     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
756     PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
757   }
758 #if defined(PETSC_HAVE_DEVICE)
759   mat->offloadmask = PETSC_OFFLOAD_BOTH;
760 #endif
761   PetscFunctionReturn(0);
762 }
763 
764 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
765 {
766   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
767 
768   PetscFunctionBegin;
769   PetscCall(MatZeroEntries(l->A));
770   PetscCall(MatZeroEntries(l->B));
771   PetscFunctionReturn(0);
772 }
773 
774 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
775 {
776   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
777   PetscObjectState sA, sB;
778   PetscInt        *lrows;
779   PetscInt         r, len;
780   PetscBool        cong, lch, gch;
781 
782   PetscFunctionBegin;
783   /* get locally owned rows */
784   PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows));
785   PetscCall(MatHasCongruentLayouts(A,&cong));
786   /* fix right hand side if needed */
787   if (x && b) {
788     const PetscScalar *xx;
789     PetscScalar       *bb;
790 
791     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
792     PetscCall(VecGetArrayRead(x, &xx));
793     PetscCall(VecGetArray(b, &bb));
794     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
795     PetscCall(VecRestoreArrayRead(x, &xx));
796     PetscCall(VecRestoreArray(b, &bb));
797   }
798 
799   sA = mat->A->nonzerostate;
800   sB = mat->B->nonzerostate;
801 
802   if (diag != 0.0 && cong) {
803     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
804     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
805   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
806     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
807     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
808     PetscInt   nnwA, nnwB;
809     PetscBool  nnzA, nnzB;
810 
811     nnwA = aijA->nonew;
812     nnwB = aijB->nonew;
813     nnzA = aijA->keepnonzeropattern;
814     nnzB = aijB->keepnonzeropattern;
815     if (!nnzA) {
816       PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
817       aijA->nonew = 0;
818     }
819     if (!nnzB) {
820       PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
821       aijB->nonew = 0;
822     }
823     /* Must zero here before the next loop */
824     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
825     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
826     for (r = 0; r < len; ++r) {
827       const PetscInt row = lrows[r] + A->rmap->rstart;
828       if (row >= A->cmap->N) continue;
829       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
830     }
831     aijA->nonew = nnwA;
832     aijB->nonew = nnwB;
833   } else {
834     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
835     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
836   }
837   PetscCall(PetscFree(lrows));
838   PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
839   PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
840 
841   /* reduce nonzerostate */
842   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
843   PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A)));
844   if (gch) A->nonzerostate++;
845   PetscFunctionReturn(0);
846 }
847 
848 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
849 {
850   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
851   PetscMPIInt       n = A->rmap->n;
852   PetscInt          i,j,r,m,len = 0;
853   PetscInt          *lrows,*owners = A->rmap->range;
854   PetscMPIInt       p = 0;
855   PetscSFNode       *rrows;
856   PetscSF           sf;
857   const PetscScalar *xx;
858   PetscScalar       *bb,*mask,*aij_a;
859   Vec               xmask,lmask;
860   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
861   const PetscInt    *aj, *ii,*ridx;
862   PetscScalar       *aa;
863 
864   PetscFunctionBegin;
865   /* Create SF where leaves are input rows and roots are owned rows */
866   PetscCall(PetscMalloc1(n, &lrows));
867   for (r = 0; r < n; ++r) lrows[r] = -1;
868   PetscCall(PetscMalloc1(N, &rrows));
869   for (r = 0; r < N; ++r) {
870     const PetscInt idx   = rows[r];
871     PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
872     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
873       PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p));
874     }
875     rrows[r].rank  = p;
876     rrows[r].index = rows[r] - owners[p];
877   }
878   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf));
879   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
880   /* Collect flags for rows to be zeroed */
881   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
882   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
883   PetscCall(PetscSFDestroy(&sf));
884   /* Compress and put in row numbers */
885   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
886   /* zero diagonal part of matrix */
887   PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b));
888   /* handle off diagonal part of matrix */
889   PetscCall(MatCreateVecs(A,&xmask,NULL));
890   PetscCall(VecDuplicate(l->lvec,&lmask));
891   PetscCall(VecGetArray(xmask,&bb));
892   for (i=0; i<len; i++) bb[lrows[i]] = 1;
893   PetscCall(VecRestoreArray(xmask,&bb));
894   PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
895   PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
896   PetscCall(VecDestroy(&xmask));
897   if (x && b) { /* this code is buggy when the row and column layout don't match */
898     PetscBool cong;
899 
900     PetscCall(MatHasCongruentLayouts(A,&cong));
901     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
902     PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
903     PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
904     PetscCall(VecGetArrayRead(l->lvec,&xx));
905     PetscCall(VecGetArray(b,&bb));
906   }
907   PetscCall(VecGetArray(lmask,&mask));
908   /* remove zeroed rows of off diagonal matrix */
909   PetscCall(MatSeqAIJGetArray(l->B,&aij_a));
910   ii = aij->i;
911   for (i=0; i<len; i++) {
912     PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]));
913   }
914   /* loop over all elements of off process part of matrix zeroing removed columns*/
915   if (aij->compressedrow.use) {
916     m    = aij->compressedrow.nrows;
917     ii   = aij->compressedrow.i;
918     ridx = aij->compressedrow.rindex;
919     for (i=0; i<m; i++) {
920       n  = ii[i+1] - ii[i];
921       aj = aij->j + ii[i];
922       aa = aij_a + ii[i];
923 
924       for (j=0; j<n; j++) {
925         if (PetscAbsScalar(mask[*aj])) {
926           if (b) bb[*ridx] -= *aa*xx[*aj];
927           *aa = 0.0;
928         }
929         aa++;
930         aj++;
931       }
932       ridx++;
933     }
934   } else { /* do not use compressed row format */
935     m = l->B->rmap->n;
936     for (i=0; i<m; i++) {
937       n  = ii[i+1] - ii[i];
938       aj = aij->j + ii[i];
939       aa = aij_a + ii[i];
940       for (j=0; j<n; j++) {
941         if (PetscAbsScalar(mask[*aj])) {
942           if (b) bb[i] -= *aa*xx[*aj];
943           *aa = 0.0;
944         }
945         aa++;
946         aj++;
947       }
948     }
949   }
950   if (x && b) {
951     PetscCall(VecRestoreArray(b,&bb));
952     PetscCall(VecRestoreArrayRead(l->lvec,&xx));
953   }
954   PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a));
955   PetscCall(VecRestoreArray(lmask,&mask));
956   PetscCall(VecDestroy(&lmask));
957   PetscCall(PetscFree(lrows));
958 
959   /* only change matrix nonzero state if pattern was allowed to be changed */
960   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
961     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
962     PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A)));
963   }
964   PetscFunctionReturn(0);
965 }
966 
967 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
968 {
969   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
970   PetscInt       nt;
971   VecScatter     Mvctx = a->Mvctx;
972 
973   PetscFunctionBegin;
974   PetscCall(VecGetLocalSize(xx,&nt));
975   PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt);
976   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
977   PetscCall((*a->A->ops->mult)(a->A,xx,yy));
978   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
979   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy));
980   PetscFunctionReturn(0);
981 }
982 
983 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
984 {
985   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
986 
987   PetscFunctionBegin;
988   PetscCall(MatMultDiagonalBlock(a->A,bb,xx));
989   PetscFunctionReturn(0);
990 }
991 
992 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
993 {
994   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
995   VecScatter     Mvctx = a->Mvctx;
996 
997   PetscFunctionBegin;
998   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
999   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz));
1000   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
1001   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz));
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1006 {
1007   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1008 
1009   PetscFunctionBegin;
1010   /* do nondiagonal part */
1011   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1012   /* do local part */
1013   PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy));
1014   /* add partial results together */
1015   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1016   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1021 {
1022   MPI_Comm       comm;
1023   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1024   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1025   IS             Me,Notme;
1026   PetscInt       M,N,first,last,*notme,i;
1027   PetscBool      lf;
1028   PetscMPIInt    size;
1029 
1030   PetscFunctionBegin;
1031   /* Easy test: symmetric diagonal block */
1032   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1033   PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf));
1034   PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat)));
1035   if (!*f) PetscFunctionReturn(0);
1036   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
1037   PetscCallMPI(MPI_Comm_size(comm,&size));
1038   if (size == 1) PetscFunctionReturn(0);
1039 
1040   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1041   PetscCall(MatGetSize(Amat,&M,&N));
1042   PetscCall(MatGetOwnershipRange(Amat,&first,&last));
1043   PetscCall(PetscMalloc1(N-last+first,&notme));
1044   for (i=0; i<first; i++) notme[i] = i;
1045   for (i=last; i<M; i++) notme[i-last+first] = i;
1046   PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme));
1047   PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me));
1048   PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs));
1049   Aoff = Aoffs[0];
1050   PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs));
1051   Boff = Boffs[0];
1052   PetscCall(MatIsTranspose(Aoff,Boff,tol,f));
1053   PetscCall(MatDestroyMatrices(1,&Aoffs));
1054   PetscCall(MatDestroyMatrices(1,&Boffs));
1055   PetscCall(ISDestroy(&Me));
1056   PetscCall(ISDestroy(&Notme));
1057   PetscCall(PetscFree(notme));
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1062 {
1063   PetscFunctionBegin;
1064   PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f));
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1069 {
1070   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1071 
1072   PetscFunctionBegin;
1073   /* do nondiagonal part */
1074   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1075   /* do local part */
1076   PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz));
1077   /* add partial results together */
1078   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1079   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 /*
1084   This only works correctly for square matrices where the subblock A->A is the
1085    diagonal block
1086 */
1087 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1088 {
1089   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1090 
1091   PetscFunctionBegin;
1092   PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1093   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1094   PetscCall(MatGetDiagonal(a->A,v));
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1099 {
1100   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1101 
1102   PetscFunctionBegin;
1103   PetscCall(MatScale(a->A,aa));
1104   PetscCall(MatScale(a->B,aa));
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1109 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1110 {
1111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1112 
1113   PetscFunctionBegin;
1114   PetscCall(PetscSFDestroy(&aij->coo_sf));
1115   PetscCall(PetscFree(aij->Aperm1));
1116   PetscCall(PetscFree(aij->Bperm1));
1117   PetscCall(PetscFree(aij->Ajmap1));
1118   PetscCall(PetscFree(aij->Bjmap1));
1119 
1120   PetscCall(PetscFree(aij->Aimap2));
1121   PetscCall(PetscFree(aij->Bimap2));
1122   PetscCall(PetscFree(aij->Aperm2));
1123   PetscCall(PetscFree(aij->Bperm2));
1124   PetscCall(PetscFree(aij->Ajmap2));
1125   PetscCall(PetscFree(aij->Bjmap2));
1126 
1127   PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf));
1128   PetscCall(PetscFree(aij->Cperm1));
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1133 {
1134   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1135 
1136   PetscFunctionBegin;
1137 #if defined(PETSC_USE_LOG)
1138   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1139 #endif
1140   PetscCall(MatStashDestroy_Private(&mat->stash));
1141   PetscCall(VecDestroy(&aij->diag));
1142   PetscCall(MatDestroy(&aij->A));
1143   PetscCall(MatDestroy(&aij->B));
1144 #if defined(PETSC_USE_CTABLE)
1145   PetscCall(PetscTableDestroy(&aij->colmap));
1146 #else
1147   PetscCall(PetscFree(aij->colmap));
1148 #endif
1149   PetscCall(PetscFree(aij->garray));
1150   PetscCall(VecDestroy(&aij->lvec));
1151   PetscCall(VecScatterDestroy(&aij->Mvctx));
1152   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
1153   PetscCall(PetscFree(aij->ld));
1154 
1155   /* Free COO */
1156   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
1157 
1158   PetscCall(PetscFree(mat->data));
1159 
1160   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1161   PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL));
1162 
1163   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
1164   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
1165   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
1166   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL));
1167   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL));
1168   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL));
1169   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL));
1170   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL));
1171   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL));
1172   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL));
1173 #if defined(PETSC_HAVE_CUDA)
1174   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL));
1175 #endif
1176 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1177   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL));
1178 #endif
1179   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
1180 #if defined(PETSC_HAVE_ELEMENTAL)
1181   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL));
1182 #endif
1183 #if defined(PETSC_HAVE_SCALAPACK)
1184   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL));
1185 #endif
1186 #if defined(PETSC_HAVE_HYPRE)
1187   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL));
1188   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL));
1189 #endif
1190   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1191   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL));
1192   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL));
1193   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL));
1194   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL));
1195   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL));
1196 #if defined(PETSC_HAVE_MKL_SPARSE)
1197   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL));
1198 #endif
1199   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL));
1200   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1201   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL));
1202   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL));
1203   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL));
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1208 {
1209   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1210   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1211   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1212   const PetscInt    *garray = aij->garray;
1213   const PetscScalar *aa,*ba;
1214   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1215   PetscInt          *rowlens;
1216   PetscInt          *colidxs;
1217   PetscScalar       *matvals;
1218 
1219   PetscFunctionBegin;
1220   PetscCall(PetscViewerSetUp(viewer));
1221 
1222   M  = mat->rmap->N;
1223   N  = mat->cmap->N;
1224   m  = mat->rmap->n;
1225   rs = mat->rmap->rstart;
1226   cs = mat->cmap->rstart;
1227   nz = A->nz + B->nz;
1228 
1229   /* write matrix header */
1230   header[0] = MAT_FILE_CLASSID;
1231   header[1] = M; header[2] = N; header[3] = nz;
1232   PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat)));
1233   PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT));
1234 
1235   /* fill in and store row lengths  */
1236   PetscCall(PetscMalloc1(m,&rowlens));
1237   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1238   PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT));
1239   PetscCall(PetscFree(rowlens));
1240 
1241   /* fill in and store column indices */
1242   PetscCall(PetscMalloc1(nz,&colidxs));
1243   for (cnt=0, i=0; i<m; i++) {
1244     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1245       if (garray[B->j[jb]] > cs) break;
1246       colidxs[cnt++] = garray[B->j[jb]];
1247     }
1248     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1249       colidxs[cnt++] = A->j[ja] + cs;
1250     for (; jb<B->i[i+1]; jb++)
1251       colidxs[cnt++] = garray[B->j[jb]];
1252   }
1253   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1254   PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
1255   PetscCall(PetscFree(colidxs));
1256 
1257   /* fill in and store nonzero values */
1258   PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa));
1259   PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba));
1260   PetscCall(PetscMalloc1(nz,&matvals));
1261   for (cnt=0, i=0; i<m; i++) {
1262     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1263       if (garray[B->j[jb]] > cs) break;
1264       matvals[cnt++] = ba[jb];
1265     }
1266     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1267       matvals[cnt++] = aa[ja];
1268     for (; jb<B->i[i+1]; jb++)
1269       matvals[cnt++] = ba[jb];
1270   }
1271   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa));
1272   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba));
1273   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1274   PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
1275   PetscCall(PetscFree(matvals));
1276 
1277   /* write block size option to the viewer's .info file */
1278   PetscCall(MatView_Binary_BlockSizes(mat,viewer));
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 #include <petscdraw.h>
1283 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1284 {
1285   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1286   PetscMPIInt       rank = aij->rank,size = aij->size;
1287   PetscBool         isdraw,iascii,isbinary;
1288   PetscViewer       sviewer;
1289   PetscViewerFormat format;
1290 
1291   PetscFunctionBegin;
1292   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1293   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1294   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1295   if (iascii) {
1296     PetscCall(PetscViewerGetFormat(viewer,&format));
1297     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1298       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1299       PetscCall(PetscMalloc1(size,&nz));
1300       PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat)));
1301       for (i=0; i<(PetscInt)size; i++) {
1302         nmax = PetscMax(nmax,nz[i]);
1303         nmin = PetscMin(nmin,nz[i]);
1304         navg += nz[i];
1305       }
1306       PetscCall(PetscFree(nz));
1307       navg = navg/size;
1308       PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n",nmin,navg,nmax));
1309       PetscFunctionReturn(0);
1310     }
1311     PetscCall(PetscViewerGetFormat(viewer,&format));
1312     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1313       MatInfo   info;
1314       PetscInt *inodes=NULL;
1315 
1316       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
1317       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
1318       PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL));
1319       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1320       if (!inodes) {
1321         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n",
1322                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1323       } else {
1324         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n",
1325                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1326       }
1327       PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info));
1328       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1329       PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info));
1330       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1331       PetscCall(PetscViewerFlush(viewer));
1332       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1333       PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
1334       PetscCall(VecScatterView(aij->Mvctx,viewer));
1335       PetscFunctionReturn(0);
1336     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1337       PetscInt inodecount,inodelimit,*inodes;
1338       PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit));
1339       if (inodes) {
1340         PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit));
1341       } else {
1342         PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n"));
1343       }
1344       PetscFunctionReturn(0);
1345     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1346       PetscFunctionReturn(0);
1347     }
1348   } else if (isbinary) {
1349     if (size == 1) {
1350       PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1351       PetscCall(MatView(aij->A,viewer));
1352     } else {
1353       PetscCall(MatView_MPIAIJ_Binary(mat,viewer));
1354     }
1355     PetscFunctionReturn(0);
1356   } else if (iascii && size == 1) {
1357     PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1358     PetscCall(MatView(aij->A,viewer));
1359     PetscFunctionReturn(0);
1360   } else if (isdraw) {
1361     PetscDraw draw;
1362     PetscBool isnull;
1363     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
1364     PetscCall(PetscDrawIsNull(draw,&isnull));
1365     if (isnull) PetscFunctionReturn(0);
1366   }
1367 
1368   { /* assemble the entire matrix onto first processor */
1369     Mat A = NULL, Av;
1370     IS  isrow,iscol;
1371 
1372     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1373     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1374     PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A));
1375     PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL));
1376 /*  The commented code uses MatCreateSubMatrices instead */
1377 /*
1378     Mat *AA, A = NULL, Av;
1379     IS  isrow,iscol;
1380 
1381     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1382     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1383     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1384     if (rank == 0) {
1385        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1386        A    = AA[0];
1387        Av   = AA[0];
1388     }
1389     PetscCall(MatDestroySubMatrices(1,&AA));
1390 */
1391     PetscCall(ISDestroy(&iscol));
1392     PetscCall(ISDestroy(&isrow));
1393     /*
1394        Everyone has to call to draw the matrix since the graphics waits are
1395        synchronized across all processors that share the PetscDraw object
1396     */
1397     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1398     if (rank == 0) {
1399       if (((PetscObject)mat)->name) {
1400         PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name));
1401       }
1402       PetscCall(MatView_SeqAIJ(Av,sviewer));
1403     }
1404     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1405     PetscCall(PetscViewerFlush(viewer));
1406     PetscCall(MatDestroy(&A));
1407   }
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1412 {
1413   PetscBool      iascii,isdraw,issocket,isbinary;
1414 
1415   PetscFunctionBegin;
1416   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1417   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1418   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1419   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
1420   if (iascii || isdraw || isbinary || issocket) {
1421     PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer));
1422   }
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1427 {
1428   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1429   Vec            bb1 = NULL;
1430   PetscBool      hasop;
1431 
1432   PetscFunctionBegin;
1433   if (flag == SOR_APPLY_UPPER) {
1434     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1435     PetscFunctionReturn(0);
1436   }
1437 
1438   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1439     PetscCall(VecDuplicate(bb,&bb1));
1440   }
1441 
1442   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1443     if (flag & SOR_ZERO_INITIAL_GUESS) {
1444       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1445       its--;
1446     }
1447 
1448     while (its--) {
1449       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1450       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1451 
1452       /* update rhs: bb1 = bb - B*x */
1453       PetscCall(VecScale(mat->lvec,-1.0));
1454       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1455 
1456       /* local sweep */
1457       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx));
1458     }
1459   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1460     if (flag & SOR_ZERO_INITIAL_GUESS) {
1461       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1462       its--;
1463     }
1464     while (its--) {
1465       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1466       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1467 
1468       /* update rhs: bb1 = bb - B*x */
1469       PetscCall(VecScale(mat->lvec,-1.0));
1470       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1471 
1472       /* local sweep */
1473       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx));
1474     }
1475   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1476     if (flag & SOR_ZERO_INITIAL_GUESS) {
1477       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1478       its--;
1479     }
1480     while (its--) {
1481       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1482       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1483 
1484       /* update rhs: bb1 = bb - B*x */
1485       PetscCall(VecScale(mat->lvec,-1.0));
1486       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1487 
1488       /* local sweep */
1489       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx));
1490     }
1491   } else if (flag & SOR_EISENSTAT) {
1492     Vec xx1;
1493 
1494     PetscCall(VecDuplicate(bb,&xx1));
1495     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx));
1496 
1497     PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1498     PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1499     if (!mat->diag) {
1500       PetscCall(MatCreateVecs(matin,&mat->diag,NULL));
1501       PetscCall(MatGetDiagonal(matin,mat->diag));
1502     }
1503     PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop));
1504     if (hasop) {
1505       PetscCall(MatMultDiagonalBlock(matin,xx,bb1));
1506     } else {
1507       PetscCall(VecPointwiseMult(bb1,mat->diag,xx));
1508     }
1509     PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb));
1510 
1511     PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1));
1512 
1513     /* local sweep */
1514     PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1));
1515     PetscCall(VecAXPY(xx,1.0,xx1));
1516     PetscCall(VecDestroy(&xx1));
1517   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1518 
1519   PetscCall(VecDestroy(&bb1));
1520 
1521   matin->factorerrortype = mat->A->factorerrortype;
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1526 {
1527   Mat            aA,aB,Aperm;
1528   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1529   PetscScalar    *aa,*ba;
1530   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1531   PetscSF        rowsf,sf;
1532   IS             parcolp = NULL;
1533   PetscBool      done;
1534 
1535   PetscFunctionBegin;
1536   PetscCall(MatGetLocalSize(A,&m,&n));
1537   PetscCall(ISGetIndices(rowp,&rwant));
1538   PetscCall(ISGetIndices(colp,&cwant));
1539   PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest));
1540 
1541   /* Invert row permutation to find out where my rows should go */
1542   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf));
1543   PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant));
1544   PetscCall(PetscSFSetFromOptions(rowsf));
1545   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1546   PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1547   PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1548 
1549   /* Invert column permutation to find out where my columns should go */
1550   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1551   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant));
1552   PetscCall(PetscSFSetFromOptions(sf));
1553   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1554   PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1555   PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1556   PetscCall(PetscSFDestroy(&sf));
1557 
1558   PetscCall(ISRestoreIndices(rowp,&rwant));
1559   PetscCall(ISRestoreIndices(colp,&cwant));
1560   PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols));
1561 
1562   /* Find out where my gcols should go */
1563   PetscCall(MatGetSize(aB,NULL,&ng));
1564   PetscCall(PetscMalloc1(ng,&gcdest));
1565   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1566   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols));
1567   PetscCall(PetscSFSetFromOptions(sf));
1568   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1569   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1570   PetscCall(PetscSFDestroy(&sf));
1571 
1572   PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz));
1573   PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1574   PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1575   for (i=0; i<m; i++) {
1576     PetscInt    row = rdest[i];
1577     PetscMPIInt rowner;
1578     PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner));
1579     for (j=ai[i]; j<ai[i+1]; j++) {
1580       PetscInt    col = cdest[aj[j]];
1581       PetscMPIInt cowner;
1582       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */
1583       if (rowner == cowner) dnnz[i]++;
1584       else onnz[i]++;
1585     }
1586     for (j=bi[i]; j<bi[i+1]; j++) {
1587       PetscInt    col = gcdest[bj[j]];
1588       PetscMPIInt cowner;
1589       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner));
1590       if (rowner == cowner) dnnz[i]++;
1591       else onnz[i]++;
1592     }
1593   }
1594   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1595   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1596   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1597   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1598   PetscCall(PetscSFDestroy(&rowsf));
1599 
1600   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm));
1601   PetscCall(MatSeqAIJGetArray(aA,&aa));
1602   PetscCall(MatSeqAIJGetArray(aB,&ba));
1603   for (i=0; i<m; i++) {
1604     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1605     PetscInt j0,rowlen;
1606     rowlen = ai[i+1] - ai[i];
1607     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1608       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1609       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES));
1610     }
1611     rowlen = bi[i+1] - bi[i];
1612     for (j0=j=0; j<rowlen; j0=j) {
1613       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1614       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES));
1615     }
1616   }
1617   PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY));
1618   PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY));
1619   PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1620   PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1621   PetscCall(MatSeqAIJRestoreArray(aA,&aa));
1622   PetscCall(MatSeqAIJRestoreArray(aB,&ba));
1623   PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz));
1624   PetscCall(PetscFree3(work,rdest,cdest));
1625   PetscCall(PetscFree(gcdest));
1626   if (parcolp) PetscCall(ISDestroy(&colp));
1627   *B = Aperm;
1628   PetscFunctionReturn(0);
1629 }
1630 
1631 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1632 {
1633   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1634 
1635   PetscFunctionBegin;
1636   PetscCall(MatGetSize(aij->B,NULL,nghosts));
1637   if (ghosts) *ghosts = aij->garray;
1638   PetscFunctionReturn(0);
1639 }
1640 
1641 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1642 {
1643   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1644   Mat            A    = mat->A,B = mat->B;
1645   PetscLogDouble isend[5],irecv[5];
1646 
1647   PetscFunctionBegin;
1648   info->block_size = 1.0;
1649   PetscCall(MatGetInfo(A,MAT_LOCAL,info));
1650 
1651   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1652   isend[3] = info->memory;  isend[4] = info->mallocs;
1653 
1654   PetscCall(MatGetInfo(B,MAT_LOCAL,info));
1655 
1656   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1657   isend[3] += info->memory;  isend[4] += info->mallocs;
1658   if (flag == MAT_LOCAL) {
1659     info->nz_used      = isend[0];
1660     info->nz_allocated = isend[1];
1661     info->nz_unneeded  = isend[2];
1662     info->memory       = isend[3];
1663     info->mallocs      = isend[4];
1664   } else if (flag == MAT_GLOBAL_MAX) {
1665     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
1666 
1667     info->nz_used      = irecv[0];
1668     info->nz_allocated = irecv[1];
1669     info->nz_unneeded  = irecv[2];
1670     info->memory       = irecv[3];
1671     info->mallocs      = irecv[4];
1672   } else if (flag == MAT_GLOBAL_SUM) {
1673     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
1674 
1675     info->nz_used      = irecv[0];
1676     info->nz_allocated = irecv[1];
1677     info->nz_unneeded  = irecv[2];
1678     info->memory       = irecv[3];
1679     info->mallocs      = irecv[4];
1680   }
1681   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1682   info->fill_ratio_needed = 0;
1683   info->factor_mallocs    = 0;
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1688 {
1689   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1690 
1691   PetscFunctionBegin;
1692   switch (op) {
1693   case MAT_NEW_NONZERO_LOCATIONS:
1694   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1695   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1696   case MAT_KEEP_NONZERO_PATTERN:
1697   case MAT_NEW_NONZERO_LOCATION_ERR:
1698   case MAT_USE_INODES:
1699   case MAT_IGNORE_ZERO_ENTRIES:
1700   case MAT_FORM_EXPLICIT_TRANSPOSE:
1701     MatCheckPreallocated(A,1);
1702     PetscCall(MatSetOption(a->A,op,flg));
1703     PetscCall(MatSetOption(a->B,op,flg));
1704     break;
1705   case MAT_ROW_ORIENTED:
1706     MatCheckPreallocated(A,1);
1707     a->roworiented = flg;
1708 
1709     PetscCall(MatSetOption(a->A,op,flg));
1710     PetscCall(MatSetOption(a->B,op,flg));
1711     break;
1712   case MAT_FORCE_DIAGONAL_ENTRIES:
1713   case MAT_SORTED_FULL:
1714     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1715     break;
1716   case MAT_IGNORE_OFF_PROC_ENTRIES:
1717     a->donotstash = flg;
1718     break;
1719   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1720   case MAT_SPD:
1721   case MAT_SYMMETRIC:
1722   case MAT_STRUCTURALLY_SYMMETRIC:
1723   case MAT_HERMITIAN:
1724   case MAT_SYMMETRY_ETERNAL:
1725     break;
1726   case MAT_SUBMAT_SINGLEIS:
1727     A->submat_singleis = flg;
1728     break;
1729   case MAT_STRUCTURE_ONLY:
1730     /* The option is handled directly by MatSetOption() */
1731     break;
1732   default:
1733     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1734   }
1735   PetscFunctionReturn(0);
1736 }
1737 
1738 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1739 {
1740   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1741   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1742   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1743   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1744   PetscInt       *cmap,*idx_p;
1745 
1746   PetscFunctionBegin;
1747   PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1748   mat->getrowactive = PETSC_TRUE;
1749 
1750   if (!mat->rowvalues && (idx || v)) {
1751     /*
1752         allocate enough space to hold information from the longest row.
1753     */
1754     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1755     PetscInt   max = 1,tmp;
1756     for (i=0; i<matin->rmap->n; i++) {
1757       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1758       if (max < tmp) max = tmp;
1759     }
1760     PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices));
1761   }
1762 
1763   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1764   lrow = row - rstart;
1765 
1766   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1767   if (!v)   {pvA = NULL; pvB = NULL;}
1768   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1769   PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
1770   PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1771   nztot = nzA + nzB;
1772 
1773   cmap = mat->garray;
1774   if (v  || idx) {
1775     if (nztot) {
1776       /* Sort by increasing column numbers, assuming A and B already sorted */
1777       PetscInt imark = -1;
1778       if (v) {
1779         *v = v_p = mat->rowvalues;
1780         for (i=0; i<nzB; i++) {
1781           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1782           else break;
1783         }
1784         imark = i;
1785         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1786         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1787       }
1788       if (idx) {
1789         *idx = idx_p = mat->rowindices;
1790         if (imark > -1) {
1791           for (i=0; i<imark; i++) {
1792             idx_p[i] = cmap[cworkB[i]];
1793           }
1794         } else {
1795           for (i=0; i<nzB; i++) {
1796             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1797             else break;
1798           }
1799           imark = i;
1800         }
1801         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1802         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1803       }
1804     } else {
1805       if (idx) *idx = NULL;
1806       if (v)   *v   = NULL;
1807     }
1808   }
1809   *nz  = nztot;
1810   PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
1811   PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
1812   PetscFunctionReturn(0);
1813 }
1814 
1815 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1816 {
1817   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1818 
1819   PetscFunctionBegin;
1820   PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1821   aij->getrowactive = PETSC_FALSE;
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1826 {
1827   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ*)mat->data;
1828   Mat_SeqAIJ      *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1829   PetscInt        i,j,cstart = mat->cmap->rstart;
1830   PetscReal       sum = 0.0;
1831   const MatScalar *v,*amata,*bmata;
1832 
1833   PetscFunctionBegin;
1834   if (aij->size == 1) {
1835     PetscCall(MatNorm(aij->A,type,norm));
1836   } else {
1837     PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata));
1838     PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata));
1839     if (type == NORM_FROBENIUS) {
1840       v = amata;
1841       for (i=0; i<amat->nz; i++) {
1842         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1843       }
1844       v = bmata;
1845       for (i=0; i<bmat->nz; i++) {
1846         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1847       }
1848       PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1849       *norm = PetscSqrtReal(*norm);
1850       PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz));
1851     } else if (type == NORM_1) { /* max column norm */
1852       PetscReal *tmp,*tmp2;
1853       PetscInt  *jj,*garray = aij->garray;
1854       PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp));
1855       PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2));
1856       *norm = 0.0;
1857       v     = amata; jj = amat->j;
1858       for (j=0; j<amat->nz; j++) {
1859         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1860       }
1861       v = bmata; jj = bmat->j;
1862       for (j=0; j<bmat->nz; j++) {
1863         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1864       }
1865       PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1866       for (j=0; j<mat->cmap->N; j++) {
1867         if (tmp2[j] > *norm) *norm = tmp2[j];
1868       }
1869       PetscCall(PetscFree(tmp));
1870       PetscCall(PetscFree(tmp2));
1871       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1872     } else if (type == NORM_INFINITY) { /* max row norm */
1873       PetscReal ntemp = 0.0;
1874       for (j=0; j<aij->A->rmap->n; j++) {
1875         v   = amata + amat->i[j];
1876         sum = 0.0;
1877         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1878           sum += PetscAbsScalar(*v); v++;
1879         }
1880         v = bmata + bmat->i[j];
1881         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1882           sum += PetscAbsScalar(*v); v++;
1883         }
1884         if (sum > ntemp) ntemp = sum;
1885       }
1886       PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat)));
1887       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1888     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1889     PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata));
1890     PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata));
1891   }
1892   PetscFunctionReturn(0);
1893 }
1894 
1895 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1896 {
1897   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1898   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1899   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1900   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1901   Mat             B,A_diag,*B_diag;
1902   const MatScalar *pbv,*bv;
1903 
1904   PetscFunctionBegin;
1905   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1906   ai = Aloc->i; aj = Aloc->j;
1907   bi = Bloc->i; bj = Bloc->j;
1908   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1909     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1910     PetscSFNode          *oloc;
1911     PETSC_UNUSED PetscSF sf;
1912 
1913     PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc));
1914     /* compute d_nnz for preallocation */
1915     PetscCall(PetscArrayzero(d_nnz,na));
1916     for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++;
1917     /* compute local off-diagonal contributions */
1918     PetscCall(PetscArrayzero(g_nnz,nb));
1919     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1920     /* map those to global */
1921     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1922     PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray));
1923     PetscCall(PetscSFSetFromOptions(sf));
1924     PetscCall(PetscArrayzero(o_nnz,na));
1925     PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1926     PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1927     PetscCall(PetscSFDestroy(&sf));
1928 
1929     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
1930     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
1931     PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs)));
1932     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
1933     PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
1934     PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc));
1935   } else {
1936     B    = *matout;
1937     PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
1938   }
1939 
1940   b           = (Mat_MPIAIJ*)B->data;
1941   A_diag      = a->A;
1942   B_diag      = &b->A;
1943   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1944   A_diag_ncol = A_diag->cmap->N;
1945   B_diag_ilen = sub_B_diag->ilen;
1946   B_diag_i    = sub_B_diag->i;
1947 
1948   /* Set ilen for diagonal of B */
1949   for (i=0; i<A_diag_ncol; i++) {
1950     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1951   }
1952 
1953   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1954   very quickly (=without using MatSetValues), because all writes are local. */
1955   PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag));
1956 
1957   /* copy over the B part */
1958   PetscCall(PetscMalloc1(bi[mb],&cols));
1959   PetscCall(MatSeqAIJGetArrayRead(a->B,&bv));
1960   pbv  = bv;
1961   row  = A->rmap->rstart;
1962   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1963   cols_tmp = cols;
1964   for (i=0; i<mb; i++) {
1965     ncol = bi[i+1]-bi[i];
1966     PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES));
1967     row++;
1968     pbv += ncol; cols_tmp += ncol;
1969   }
1970   PetscCall(PetscFree(cols));
1971   PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv));
1972 
1973   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
1974   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
1975   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1976     *matout = B;
1977   } else {
1978     PetscCall(MatHeaderMerge(A,&B));
1979   }
1980   PetscFunctionReturn(0);
1981 }
1982 
1983 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1984 {
1985   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1986   Mat            a    = aij->A,b = aij->B;
1987   PetscInt       s1,s2,s3;
1988 
1989   PetscFunctionBegin;
1990   PetscCall(MatGetLocalSize(mat,&s2,&s3));
1991   if (rr) {
1992     PetscCall(VecGetLocalSize(rr,&s1));
1993     PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1994     /* Overlap communication with computation. */
1995     PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1996   }
1997   if (ll) {
1998     PetscCall(VecGetLocalSize(ll,&s1));
1999     PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2000     PetscCall((*b->ops->diagonalscale)(b,ll,NULL));
2001   }
2002   /* scale  the diagonal block */
2003   PetscCall((*a->ops->diagonalscale)(a,ll,rr));
2004 
2005   if (rr) {
2006     /* Do a scatter end and then right scale the off-diagonal block */
2007     PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
2008     PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec));
2009   }
2010   PetscFunctionReturn(0);
2011 }
2012 
2013 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2014 {
2015   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2016 
2017   PetscFunctionBegin;
2018   PetscCall(MatSetUnfactored(a->A));
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2023 {
2024   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2025   Mat            a,b,c,d;
2026   PetscBool      flg;
2027 
2028   PetscFunctionBegin;
2029   a = matA->A; b = matA->B;
2030   c = matB->A; d = matB->B;
2031 
2032   PetscCall(MatEqual(a,c,&flg));
2033   if (flg) {
2034     PetscCall(MatEqual(b,d,&flg));
2035   }
2036   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2037   PetscFunctionReturn(0);
2038 }
2039 
2040 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2041 {
2042   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2043   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2044 
2045   PetscFunctionBegin;
2046   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2047   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2048     /* because of the column compression in the off-processor part of the matrix a->B,
2049        the number of columns in a->B and b->B may be different, hence we cannot call
2050        the MatCopy() directly on the two parts. If need be, we can provide a more
2051        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2052        then copying the submatrices */
2053     PetscCall(MatCopy_Basic(A,B,str));
2054   } else {
2055     PetscCall(MatCopy(a->A,b->A,str));
2056     PetscCall(MatCopy(a->B,b->B,str));
2057   }
2058   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2059   PetscFunctionReturn(0);
2060 }
2061 
2062 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2063 {
2064   PetscFunctionBegin;
2065   PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
2066   PetscFunctionReturn(0);
2067 }
2068 
2069 /*
2070    Computes the number of nonzeros per row needed for preallocation when X and Y
2071    have different nonzero structure.
2072 */
2073 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2074 {
2075   PetscInt       i,j,k,nzx,nzy;
2076 
2077   PetscFunctionBegin;
2078   /* Set the number of nonzeros in the new matrix */
2079   for (i=0; i<m; i++) {
2080     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2081     nzx = xi[i+1] - xi[i];
2082     nzy = yi[i+1] - yi[i];
2083     nnz[i] = 0;
2084     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2085       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2086       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2087       nnz[i]++;
2088     }
2089     for (; k<nzy; k++) nnz[i]++;
2090   }
2091   PetscFunctionReturn(0);
2092 }
2093 
2094 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2095 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2096 {
2097   PetscInt       m = Y->rmap->N;
2098   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2099   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2100 
2101   PetscFunctionBegin;
2102   PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz));
2103   PetscFunctionReturn(0);
2104 }
2105 
2106 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2107 {
2108   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2109 
2110   PetscFunctionBegin;
2111   if (str == SAME_NONZERO_PATTERN) {
2112     PetscCall(MatAXPY(yy->A,a,xx->A,str));
2113     PetscCall(MatAXPY(yy->B,a,xx->B,str));
2114   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2115     PetscCall(MatAXPY_Basic(Y,a,X,str));
2116   } else {
2117     Mat      B;
2118     PetscInt *nnz_d,*nnz_o;
2119 
2120     PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d));
2121     PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o));
2122     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
2123     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
2124     PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap));
2125     PetscCall(MatSetType(B,((PetscObject)Y)->type_name));
2126     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d));
2127     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
2128     PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o));
2129     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
2130     PetscCall(MatHeaderMerge(Y,&B));
2131     PetscCall(PetscFree(nnz_d));
2132     PetscCall(PetscFree(nnz_o));
2133   }
2134   PetscFunctionReturn(0);
2135 }
2136 
2137 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2138 
2139 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2140 {
2141   PetscFunctionBegin;
2142   if (PetscDefined(USE_COMPLEX)) {
2143     Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2144 
2145     PetscCall(MatConjugate_SeqAIJ(aij->A));
2146     PetscCall(MatConjugate_SeqAIJ(aij->B));
2147   }
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2152 {
2153   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2154 
2155   PetscFunctionBegin;
2156   PetscCall(MatRealPart(a->A));
2157   PetscCall(MatRealPart(a->B));
2158   PetscFunctionReturn(0);
2159 }
2160 
2161 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2162 {
2163   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2164 
2165   PetscFunctionBegin;
2166   PetscCall(MatImaginaryPart(a->A));
2167   PetscCall(MatImaginaryPart(a->B));
2168   PetscFunctionReturn(0);
2169 }
2170 
2171 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2172 {
2173   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2174   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2175   PetscScalar       *va,*vv;
2176   Vec               vB,vA;
2177   const PetscScalar *vb;
2178 
2179   PetscFunctionBegin;
2180   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA));
2181   PetscCall(MatGetRowMaxAbs(a->A,vA,idx));
2182 
2183   PetscCall(VecGetArrayWrite(vA,&va));
2184   if (idx) {
2185     for (i=0; i<m; i++) {
2186       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2187     }
2188   }
2189 
2190   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB));
2191   PetscCall(PetscMalloc1(m,&idxb));
2192   PetscCall(MatGetRowMaxAbs(a->B,vB,idxb));
2193 
2194   PetscCall(VecGetArrayWrite(v,&vv));
2195   PetscCall(VecGetArrayRead(vB,&vb));
2196   for (i=0; i<m; i++) {
2197     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2198       vv[i] = vb[i];
2199       if (idx) idx[i] = a->garray[idxb[i]];
2200     } else {
2201       vv[i] = va[i];
2202       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2203         idx[i] = a->garray[idxb[i]];
2204     }
2205   }
2206   PetscCall(VecRestoreArrayWrite(vA,&vv));
2207   PetscCall(VecRestoreArrayWrite(vA,&va));
2208   PetscCall(VecRestoreArrayRead(vB,&vb));
2209   PetscCall(PetscFree(idxb));
2210   PetscCall(VecDestroy(&vA));
2211   PetscCall(VecDestroy(&vB));
2212   PetscFunctionReturn(0);
2213 }
2214 
2215 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2216 {
2217   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2218   PetscInt          m = A->rmap->n,n = A->cmap->n;
2219   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2220   PetscInt          *cmap  = mat->garray;
2221   PetscInt          *diagIdx, *offdiagIdx;
2222   Vec               diagV, offdiagV;
2223   PetscScalar       *a, *diagA, *offdiagA;
2224   const PetscScalar *ba,*bav;
2225   PetscInt          r,j,col,ncols,*bi,*bj;
2226   Mat               B = mat->B;
2227   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2228 
2229   PetscFunctionBegin;
2230   /* When a process holds entire A and other processes have no entry */
2231   if (A->cmap->N == n) {
2232     PetscCall(VecGetArrayWrite(v,&diagA));
2233     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2234     PetscCall(MatGetRowMinAbs(mat->A,diagV,idx));
2235     PetscCall(VecDestroy(&diagV));
2236     PetscCall(VecRestoreArrayWrite(v,&diagA));
2237     PetscFunctionReturn(0);
2238   } else if (n == 0) {
2239     if (m) {
2240       PetscCall(VecGetArrayWrite(v,&a));
2241       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2242       PetscCall(VecRestoreArrayWrite(v,&a));
2243     }
2244     PetscFunctionReturn(0);
2245   }
2246 
2247   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2248   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2249   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2250   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2251 
2252   /* Get offdiagIdx[] for implicit 0.0 */
2253   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2254   ba   = bav;
2255   bi   = b->i;
2256   bj   = b->j;
2257   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2258   for (r = 0; r < m; r++) {
2259     ncols = bi[r+1] - bi[r];
2260     if (ncols == A->cmap->N - n) { /* Brow is dense */
2261       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2262     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2263       offdiagA[r] = 0.0;
2264 
2265       /* Find first hole in the cmap */
2266       for (j=0; j<ncols; j++) {
2267         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2268         if (col > j && j < cstart) {
2269           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2270           break;
2271         } else if (col > j + n && j >= cstart) {
2272           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2273           break;
2274         }
2275       }
2276       if (j == ncols && ncols < A->cmap->N - n) {
2277         /* a hole is outside compressed Bcols */
2278         if (ncols == 0) {
2279           if (cstart) {
2280             offdiagIdx[r] = 0;
2281           } else offdiagIdx[r] = cend;
2282         } else { /* ncols > 0 */
2283           offdiagIdx[r] = cmap[ncols-1] + 1;
2284           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2285         }
2286       }
2287     }
2288 
2289     for (j=0; j<ncols; j++) {
2290       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2291       ba++; bj++;
2292     }
2293   }
2294 
2295   PetscCall(VecGetArrayWrite(v, &a));
2296   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2297   for (r = 0; r < m; ++r) {
2298     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2299       a[r]   = diagA[r];
2300       if (idx) idx[r] = cstart + diagIdx[r];
2301     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2302       a[r] = diagA[r];
2303       if (idx) {
2304         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2305           idx[r] = cstart + diagIdx[r];
2306         } else idx[r] = offdiagIdx[r];
2307       }
2308     } else {
2309       a[r]   = offdiagA[r];
2310       if (idx) idx[r] = offdiagIdx[r];
2311     }
2312   }
2313   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2314   PetscCall(VecRestoreArrayWrite(v, &a));
2315   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2316   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2317   PetscCall(VecDestroy(&diagV));
2318   PetscCall(VecDestroy(&offdiagV));
2319   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2320   PetscFunctionReturn(0);
2321 }
2322 
2323 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2324 {
2325   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2326   PetscInt          m = A->rmap->n,n = A->cmap->n;
2327   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2328   PetscInt          *cmap  = mat->garray;
2329   PetscInt          *diagIdx, *offdiagIdx;
2330   Vec               diagV, offdiagV;
2331   PetscScalar       *a, *diagA, *offdiagA;
2332   const PetscScalar *ba,*bav;
2333   PetscInt          r,j,col,ncols,*bi,*bj;
2334   Mat               B = mat->B;
2335   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2336 
2337   PetscFunctionBegin;
2338   /* When a process holds entire A and other processes have no entry */
2339   if (A->cmap->N == n) {
2340     PetscCall(VecGetArrayWrite(v,&diagA));
2341     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2342     PetscCall(MatGetRowMin(mat->A,diagV,idx));
2343     PetscCall(VecDestroy(&diagV));
2344     PetscCall(VecRestoreArrayWrite(v,&diagA));
2345     PetscFunctionReturn(0);
2346   } else if (n == 0) {
2347     if (m) {
2348       PetscCall(VecGetArrayWrite(v,&a));
2349       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2350       PetscCall(VecRestoreArrayWrite(v,&a));
2351     }
2352     PetscFunctionReturn(0);
2353   }
2354 
2355   PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx));
2356   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2357   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2358   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2359 
2360   /* Get offdiagIdx[] for implicit 0.0 */
2361   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2362   ba   = bav;
2363   bi   = b->i;
2364   bj   = b->j;
2365   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2366   for (r = 0; r < m; r++) {
2367     ncols = bi[r+1] - bi[r];
2368     if (ncols == A->cmap->N - n) { /* Brow is dense */
2369       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2370     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2371       offdiagA[r] = 0.0;
2372 
2373       /* Find first hole in the cmap */
2374       for (j=0; j<ncols; j++) {
2375         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2376         if (col > j && j < cstart) {
2377           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2378           break;
2379         } else if (col > j + n && j >= cstart) {
2380           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2381           break;
2382         }
2383       }
2384       if (j == ncols && ncols < A->cmap->N - n) {
2385         /* a hole is outside compressed Bcols */
2386         if (ncols == 0) {
2387           if (cstart) {
2388             offdiagIdx[r] = 0;
2389           } else offdiagIdx[r] = cend;
2390         } else { /* ncols > 0 */
2391           offdiagIdx[r] = cmap[ncols-1] + 1;
2392           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2393         }
2394       }
2395     }
2396 
2397     for (j=0; j<ncols; j++) {
2398       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2399       ba++; bj++;
2400     }
2401   }
2402 
2403   PetscCall(VecGetArrayWrite(v, &a));
2404   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2405   for (r = 0; r < m; ++r) {
2406     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2407       a[r]   = diagA[r];
2408       if (idx) idx[r] = cstart + diagIdx[r];
2409     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2410       a[r] = diagA[r];
2411       if (idx) {
2412         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2413           idx[r] = cstart + diagIdx[r];
2414         } else idx[r] = offdiagIdx[r];
2415       }
2416     } else {
2417       a[r]   = offdiagA[r];
2418       if (idx) idx[r] = offdiagIdx[r];
2419     }
2420   }
2421   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2422   PetscCall(VecRestoreArrayWrite(v, &a));
2423   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2424   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2425   PetscCall(VecDestroy(&diagV));
2426   PetscCall(VecDestroy(&offdiagV));
2427   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2428   PetscFunctionReturn(0);
2429 }
2430 
2431 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2432 {
2433   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2434   PetscInt          m = A->rmap->n,n = A->cmap->n;
2435   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2436   PetscInt          *cmap  = mat->garray;
2437   PetscInt          *diagIdx, *offdiagIdx;
2438   Vec               diagV, offdiagV;
2439   PetscScalar       *a, *diagA, *offdiagA;
2440   const PetscScalar *ba,*bav;
2441   PetscInt          r,j,col,ncols,*bi,*bj;
2442   Mat               B = mat->B;
2443   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2444 
2445   PetscFunctionBegin;
2446   /* When a process holds entire A and other processes have no entry */
2447   if (A->cmap->N == n) {
2448     PetscCall(VecGetArrayWrite(v,&diagA));
2449     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2450     PetscCall(MatGetRowMax(mat->A,diagV,idx));
2451     PetscCall(VecDestroy(&diagV));
2452     PetscCall(VecRestoreArrayWrite(v,&diagA));
2453     PetscFunctionReturn(0);
2454   } else if (n == 0) {
2455     if (m) {
2456       PetscCall(VecGetArrayWrite(v,&a));
2457       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2458       PetscCall(VecRestoreArrayWrite(v,&a));
2459     }
2460     PetscFunctionReturn(0);
2461   }
2462 
2463   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2464   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2465   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2466   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2467 
2468   /* Get offdiagIdx[] for implicit 0.0 */
2469   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2470   ba   = bav;
2471   bi   = b->i;
2472   bj   = b->j;
2473   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2474   for (r = 0; r < m; r++) {
2475     ncols = bi[r+1] - bi[r];
2476     if (ncols == A->cmap->N - n) { /* Brow is dense */
2477       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2478     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2479       offdiagA[r] = 0.0;
2480 
2481       /* Find first hole in the cmap */
2482       for (j=0; j<ncols; j++) {
2483         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2484         if (col > j && j < cstart) {
2485           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2486           break;
2487         } else if (col > j + n && j >= cstart) {
2488           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2489           break;
2490         }
2491       }
2492       if (j == ncols && ncols < A->cmap->N - n) {
2493         /* a hole is outside compressed Bcols */
2494         if (ncols == 0) {
2495           if (cstart) {
2496             offdiagIdx[r] = 0;
2497           } else offdiagIdx[r] = cend;
2498         } else { /* ncols > 0 */
2499           offdiagIdx[r] = cmap[ncols-1] + 1;
2500           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2501         }
2502       }
2503     }
2504 
2505     for (j=0; j<ncols; j++) {
2506       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2507       ba++; bj++;
2508     }
2509   }
2510 
2511   PetscCall(VecGetArrayWrite(v,    &a));
2512   PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA));
2513   for (r = 0; r < m; ++r) {
2514     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2515       a[r] = diagA[r];
2516       if (idx) idx[r] = cstart + diagIdx[r];
2517     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2518       a[r] = diagA[r];
2519       if (idx) {
2520         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2521           idx[r] = cstart + diagIdx[r];
2522         } else idx[r] = offdiagIdx[r];
2523       }
2524     } else {
2525       a[r] = offdiagA[r];
2526       if (idx) idx[r] = offdiagIdx[r];
2527     }
2528   }
2529   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2530   PetscCall(VecRestoreArrayWrite(v,       &a));
2531   PetscCall(VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA));
2532   PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA));
2533   PetscCall(VecDestroy(&diagV));
2534   PetscCall(VecDestroy(&offdiagV));
2535   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2540 {
2541   Mat            *dummy;
2542 
2543   PetscFunctionBegin;
2544   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy));
2545   *newmat = *dummy;
2546   PetscCall(PetscFree(dummy));
2547   PetscFunctionReturn(0);
2548 }
2549 
2550 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2551 {
2552   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2553 
2554   PetscFunctionBegin;
2555   PetscCall(MatInvertBlockDiagonal(a->A,values));
2556   A->factorerrortype = a->A->factorerrortype;
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2561 {
2562   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2563 
2564   PetscFunctionBegin;
2565   PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2566   PetscCall(MatSetRandom(aij->A,rctx));
2567   if (x->assembled) {
2568     PetscCall(MatSetRandom(aij->B,rctx));
2569   } else {
2570     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx));
2571   }
2572   PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY));
2573   PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY));
2574   PetscFunctionReturn(0);
2575 }
2576 
2577 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2578 {
2579   PetscFunctionBegin;
2580   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2581   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2582   PetscFunctionReturn(0);
2583 }
2584 
2585 /*@
2586    MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2587 
2588    Not collective
2589 
2590    Input Parameter:
2591 .    A - the matrix
2592 
2593    Output Parameter:
2594 .    nz - the number of nonzeros
2595 
2596  Level: advanced
2597 
2598 @*/
2599 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A,PetscCount *nz)
2600 {
2601   Mat_MPIAIJ *maij = (Mat_MPIAIJ*)A->data;
2602   Mat_SeqAIJ *aaij = (Mat_SeqAIJ*)maij->A->data, *baij = (Mat_SeqAIJ*)maij->B->data;
2603 
2604   PetscFunctionBegin;
2605   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2606   PetscFunctionReturn(0);
2607 }
2608 
2609 /*@
2610    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2611 
2612    Collective on Mat
2613 
2614    Input Parameters:
2615 +    A - the matrix
2616 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2617 
2618  Level: advanced
2619 
2620 @*/
2621 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2622 {
2623   PetscFunctionBegin;
2624   PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));
2625   PetscFunctionReturn(0);
2626 }
2627 
2628 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2629 {
2630   PetscBool            sc = PETSC_FALSE,flg;
2631 
2632   PetscFunctionBegin;
2633   PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options");
2634   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2635   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg));
2636   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc));
2637   PetscOptionsHeadEnd();
2638   PetscFunctionReturn(0);
2639 }
2640 
2641 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2642 {
2643   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2644   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2645 
2646   PetscFunctionBegin;
2647   if (!Y->preallocated) {
2648     PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL));
2649   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2650     PetscInt nonew = aij->nonew;
2651     PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL));
2652     aij->nonew = nonew;
2653   }
2654   PetscCall(MatShift_Basic(Y,a));
2655   PetscFunctionReturn(0);
2656 }
2657 
2658 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2659 {
2660   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2661 
2662   PetscFunctionBegin;
2663   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2664   PetscCall(MatMissingDiagonal(a->A,missing,d));
2665   if (d) {
2666     PetscInt rstart;
2667     PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
2668     *d += rstart;
2669 
2670   }
2671   PetscFunctionReturn(0);
2672 }
2673 
2674 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2675 {
2676   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2677 
2678   PetscFunctionBegin;
2679   PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag));
2680   PetscFunctionReturn(0);
2681 }
2682 
2683 /* -------------------------------------------------------------------*/
2684 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2685                                        MatGetRow_MPIAIJ,
2686                                        MatRestoreRow_MPIAIJ,
2687                                        MatMult_MPIAIJ,
2688                                 /* 4*/ MatMultAdd_MPIAIJ,
2689                                        MatMultTranspose_MPIAIJ,
2690                                        MatMultTransposeAdd_MPIAIJ,
2691                                        NULL,
2692                                        NULL,
2693                                        NULL,
2694                                 /*10*/ NULL,
2695                                        NULL,
2696                                        NULL,
2697                                        MatSOR_MPIAIJ,
2698                                        MatTranspose_MPIAIJ,
2699                                 /*15*/ MatGetInfo_MPIAIJ,
2700                                        MatEqual_MPIAIJ,
2701                                        MatGetDiagonal_MPIAIJ,
2702                                        MatDiagonalScale_MPIAIJ,
2703                                        MatNorm_MPIAIJ,
2704                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2705                                        MatAssemblyEnd_MPIAIJ,
2706                                        MatSetOption_MPIAIJ,
2707                                        MatZeroEntries_MPIAIJ,
2708                                 /*24*/ MatZeroRows_MPIAIJ,
2709                                        NULL,
2710                                        NULL,
2711                                        NULL,
2712                                        NULL,
2713                                 /*29*/ MatSetUp_MPIAIJ,
2714                                        NULL,
2715                                        NULL,
2716                                        MatGetDiagonalBlock_MPIAIJ,
2717                                        NULL,
2718                                 /*34*/ MatDuplicate_MPIAIJ,
2719                                        NULL,
2720                                        NULL,
2721                                        NULL,
2722                                        NULL,
2723                                 /*39*/ MatAXPY_MPIAIJ,
2724                                        MatCreateSubMatrices_MPIAIJ,
2725                                        MatIncreaseOverlap_MPIAIJ,
2726                                        MatGetValues_MPIAIJ,
2727                                        MatCopy_MPIAIJ,
2728                                 /*44*/ MatGetRowMax_MPIAIJ,
2729                                        MatScale_MPIAIJ,
2730                                        MatShift_MPIAIJ,
2731                                        MatDiagonalSet_MPIAIJ,
2732                                        MatZeroRowsColumns_MPIAIJ,
2733                                 /*49*/ MatSetRandom_MPIAIJ,
2734                                        MatGetRowIJ_MPIAIJ,
2735                                        MatRestoreRowIJ_MPIAIJ,
2736                                        NULL,
2737                                        NULL,
2738                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2739                                        NULL,
2740                                        MatSetUnfactored_MPIAIJ,
2741                                        MatPermute_MPIAIJ,
2742                                        NULL,
2743                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2744                                        MatDestroy_MPIAIJ,
2745                                        MatView_MPIAIJ,
2746                                        NULL,
2747                                        NULL,
2748                                 /*64*/ NULL,
2749                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2750                                        NULL,
2751                                        NULL,
2752                                        NULL,
2753                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2754                                        MatGetRowMinAbs_MPIAIJ,
2755                                        NULL,
2756                                        NULL,
2757                                        NULL,
2758                                        NULL,
2759                                 /*75*/ MatFDColoringApply_AIJ,
2760                                        MatSetFromOptions_MPIAIJ,
2761                                        NULL,
2762                                        NULL,
2763                                        MatFindZeroDiagonals_MPIAIJ,
2764                                 /*80*/ NULL,
2765                                        NULL,
2766                                        NULL,
2767                                 /*83*/ MatLoad_MPIAIJ,
2768                                        MatIsSymmetric_MPIAIJ,
2769                                        NULL,
2770                                        NULL,
2771                                        NULL,
2772                                        NULL,
2773                                 /*89*/ NULL,
2774                                        NULL,
2775                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2776                                        NULL,
2777                                        NULL,
2778                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2779                                        NULL,
2780                                        NULL,
2781                                        NULL,
2782                                        MatBindToCPU_MPIAIJ,
2783                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2784                                        NULL,
2785                                        NULL,
2786                                        MatConjugate_MPIAIJ,
2787                                        NULL,
2788                                 /*104*/MatSetValuesRow_MPIAIJ,
2789                                        MatRealPart_MPIAIJ,
2790                                        MatImaginaryPart_MPIAIJ,
2791                                        NULL,
2792                                        NULL,
2793                                 /*109*/NULL,
2794                                        NULL,
2795                                        MatGetRowMin_MPIAIJ,
2796                                        NULL,
2797                                        MatMissingDiagonal_MPIAIJ,
2798                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2799                                        NULL,
2800                                        MatGetGhosts_MPIAIJ,
2801                                        NULL,
2802                                        NULL,
2803                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2804                                        NULL,
2805                                        NULL,
2806                                        NULL,
2807                                        MatGetMultiProcBlock_MPIAIJ,
2808                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2809                                        MatGetColumnReductions_MPIAIJ,
2810                                        MatInvertBlockDiagonal_MPIAIJ,
2811                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2812                                        MatCreateSubMatricesMPI_MPIAIJ,
2813                                 /*129*/NULL,
2814                                        NULL,
2815                                        NULL,
2816                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2817                                        NULL,
2818                                 /*134*/NULL,
2819                                        NULL,
2820                                        NULL,
2821                                        NULL,
2822                                        NULL,
2823                                 /*139*/MatSetBlockSizes_MPIAIJ,
2824                                        NULL,
2825                                        NULL,
2826                                        MatFDColoringSetUp_MPIXAIJ,
2827                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2828                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2829                                 /*145*/NULL,
2830                                        NULL,
2831                                        NULL,
2832                                        MatCreateGraph_Simple_AIJ,
2833                                        MatFilter_AIJ
2834 };
2835 
2836 /* ----------------------------------------------------------------------------------------*/
2837 
2838 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2839 {
2840   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2841 
2842   PetscFunctionBegin;
2843   PetscCall(MatStoreValues(aij->A));
2844   PetscCall(MatStoreValues(aij->B));
2845   PetscFunctionReturn(0);
2846 }
2847 
2848 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2849 {
2850   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2851 
2852   PetscFunctionBegin;
2853   PetscCall(MatRetrieveValues(aij->A));
2854   PetscCall(MatRetrieveValues(aij->B));
2855   PetscFunctionReturn(0);
2856 }
2857 
2858 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2859 {
2860   Mat_MPIAIJ     *b;
2861   PetscMPIInt    size;
2862 
2863   PetscFunctionBegin;
2864   PetscCall(PetscLayoutSetUp(B->rmap));
2865   PetscCall(PetscLayoutSetUp(B->cmap));
2866   b = (Mat_MPIAIJ*)B->data;
2867 
2868 #if defined(PETSC_USE_CTABLE)
2869   PetscCall(PetscTableDestroy(&b->colmap));
2870 #else
2871   PetscCall(PetscFree(b->colmap));
2872 #endif
2873   PetscCall(PetscFree(b->garray));
2874   PetscCall(VecDestroy(&b->lvec));
2875   PetscCall(VecScatterDestroy(&b->Mvctx));
2876 
2877   /* Because the B will have been resized we simply destroy it and create a new one each time */
2878   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
2879   PetscCall(MatDestroy(&b->B));
2880   PetscCall(MatCreate(PETSC_COMM_SELF,&b->B));
2881   PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
2882   PetscCall(MatSetBlockSizesFromMats(b->B,B,B));
2883   PetscCall(MatSetType(b->B,MATSEQAIJ));
2884   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
2885 
2886   if (!B->preallocated) {
2887     PetscCall(MatCreate(PETSC_COMM_SELF,&b->A));
2888     PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
2889     PetscCall(MatSetBlockSizesFromMats(b->A,B,B));
2890     PetscCall(MatSetType(b->A,MATSEQAIJ));
2891     PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
2892   }
2893 
2894   PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz));
2895   PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz));
2896   B->preallocated  = PETSC_TRUE;
2897   B->was_assembled = PETSC_FALSE;
2898   B->assembled     = PETSC_FALSE;
2899   PetscFunctionReturn(0);
2900 }
2901 
2902 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2903 {
2904   Mat_MPIAIJ     *b;
2905 
2906   PetscFunctionBegin;
2907   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2908   PetscCall(PetscLayoutSetUp(B->rmap));
2909   PetscCall(PetscLayoutSetUp(B->cmap));
2910   b = (Mat_MPIAIJ*)B->data;
2911 
2912 #if defined(PETSC_USE_CTABLE)
2913   PetscCall(PetscTableDestroy(&b->colmap));
2914 #else
2915   PetscCall(PetscFree(b->colmap));
2916 #endif
2917   PetscCall(PetscFree(b->garray));
2918   PetscCall(VecDestroy(&b->lvec));
2919   PetscCall(VecScatterDestroy(&b->Mvctx));
2920 
2921   PetscCall(MatResetPreallocation(b->A));
2922   PetscCall(MatResetPreallocation(b->B));
2923   B->preallocated  = PETSC_TRUE;
2924   B->was_assembled = PETSC_FALSE;
2925   B->assembled = PETSC_FALSE;
2926   PetscFunctionReturn(0);
2927 }
2928 
2929 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2930 {
2931   Mat            mat;
2932   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2933 
2934   PetscFunctionBegin;
2935   *newmat = NULL;
2936   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
2937   PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
2938   PetscCall(MatSetBlockSizesFromMats(mat,matin,matin));
2939   PetscCall(MatSetType(mat,((PetscObject)matin)->type_name));
2940   a       = (Mat_MPIAIJ*)mat->data;
2941 
2942   mat->factortype   = matin->factortype;
2943   mat->assembled    = matin->assembled;
2944   mat->insertmode   = NOT_SET_VALUES;
2945   mat->preallocated = matin->preallocated;
2946 
2947   a->size         = oldmat->size;
2948   a->rank         = oldmat->rank;
2949   a->donotstash   = oldmat->donotstash;
2950   a->roworiented  = oldmat->roworiented;
2951   a->rowindices   = NULL;
2952   a->rowvalues    = NULL;
2953   a->getrowactive = PETSC_FALSE;
2954 
2955   PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap));
2956   PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap));
2957 
2958   if (oldmat->colmap) {
2959 #if defined(PETSC_USE_CTABLE)
2960     PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
2961 #else
2962     PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap));
2963     PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt)));
2964     PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N));
2965 #endif
2966   } else a->colmap = NULL;
2967   if (oldmat->garray) {
2968     PetscInt len;
2969     len  = oldmat->B->cmap->n;
2970     PetscCall(PetscMalloc1(len+1,&a->garray));
2971     PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
2972     if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len));
2973   } else a->garray = NULL;
2974 
2975   /* It may happen MatDuplicate is called with a non-assembled matrix
2976      In fact, MatDuplicate only requires the matrix to be preallocated
2977      This may happen inside a DMCreateMatrix_Shell */
2978   if (oldmat->lvec) {
2979     PetscCall(VecDuplicate(oldmat->lvec,&a->lvec));
2980     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
2981   }
2982   if (oldmat->Mvctx) {
2983     PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
2984     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
2985   }
2986   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2987   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2988   PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B));
2989   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
2990   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
2991   *newmat = mat;
2992   PetscFunctionReturn(0);
2993 }
2994 
2995 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2996 {
2997   PetscBool      isbinary, ishdf5;
2998 
2999   PetscFunctionBegin;
3000   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
3001   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
3002   /* force binary viewer to load .info file if it has not yet done so */
3003   PetscCall(PetscViewerSetUp(viewer));
3004   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
3005   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
3006   if (isbinary) {
3007     PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer));
3008   } else if (ishdf5) {
3009 #if defined(PETSC_HAVE_HDF5)
3010     PetscCall(MatLoad_AIJ_HDF5(newMat,viewer));
3011 #else
3012     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3013 #endif
3014   } else {
3015     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3016   }
3017   PetscFunctionReturn(0);
3018 }
3019 
3020 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3021 {
3022   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3023   PetscInt       *rowidxs,*colidxs;
3024   PetscScalar    *matvals;
3025 
3026   PetscFunctionBegin;
3027   PetscCall(PetscViewerSetUp(viewer));
3028 
3029   /* read in matrix header */
3030   PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT));
3031   PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3032   M  = header[1]; N = header[2]; nz = header[3];
3033   PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
3034   PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
3035   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3036 
3037   /* set block sizes from the viewer's .info file */
3038   PetscCall(MatLoad_Binary_BlockSizes(mat,viewer));
3039   /* set global sizes if not set already */
3040   if (mat->rmap->N < 0) mat->rmap->N = M;
3041   if (mat->cmap->N < 0) mat->cmap->N = N;
3042   PetscCall(PetscLayoutSetUp(mat->rmap));
3043   PetscCall(PetscLayoutSetUp(mat->cmap));
3044 
3045   /* check if the matrix sizes are correct */
3046   PetscCall(MatGetSize(mat,&rows,&cols));
3047   PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3048 
3049   /* read in row lengths and build row indices */
3050   PetscCall(MatGetLocalSize(mat,&m,NULL));
3051   PetscCall(PetscMalloc1(m+1,&rowidxs));
3052   PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT));
3053   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3054   PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer)));
3055   PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3056   /* read in column indices and matrix values */
3057   PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals));
3058   PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
3059   PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
3060   /* store matrix indices and values */
3061   PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals));
3062   PetscCall(PetscFree(rowidxs));
3063   PetscCall(PetscFree2(colidxs,matvals));
3064   PetscFunctionReturn(0);
3065 }
3066 
3067 /* Not scalable because of ISAllGather() unless getting all columns. */
3068 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3069 {
3070   IS             iscol_local;
3071   PetscBool      isstride;
3072   PetscMPIInt    lisstride=0,gisstride;
3073 
3074   PetscFunctionBegin;
3075   /* check if we are grabbing all columns*/
3076   PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride));
3077 
3078   if (isstride) {
3079     PetscInt  start,len,mstart,mlen;
3080     PetscCall(ISStrideGetInfo(iscol,&start,NULL));
3081     PetscCall(ISGetLocalSize(iscol,&len));
3082     PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen));
3083     if (mstart == start && mlen-mstart == len) lisstride = 1;
3084   }
3085 
3086   PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat)));
3087   if (gisstride) {
3088     PetscInt N;
3089     PetscCall(MatGetSize(mat,NULL,&N));
3090     PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local));
3091     PetscCall(ISSetIdentity(iscol_local));
3092     PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3093   } else {
3094     PetscInt cbs;
3095     PetscCall(ISGetBlockSize(iscol,&cbs));
3096     PetscCall(ISAllGather(iscol,&iscol_local));
3097     PetscCall(ISSetBlockSize(iscol_local,cbs));
3098   }
3099 
3100   *isseq = iscol_local;
3101   PetscFunctionReturn(0);
3102 }
3103 
3104 /*
3105  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3106  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3107 
3108  Input Parameters:
3109    mat - matrix
3110    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3111            i.e., mat->rstart <= isrow[i] < mat->rend
3112    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3113            i.e., mat->cstart <= iscol[i] < mat->cend
3114  Output Parameter:
3115    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3116    iscol_o - sequential column index set for retrieving mat->B
3117    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3118  */
3119 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3120 {
3121   Vec            x,cmap;
3122   const PetscInt *is_idx;
3123   PetscScalar    *xarray,*cmaparray;
3124   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3125   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3126   Mat            B=a->B;
3127   Vec            lvec=a->lvec,lcmap;
3128   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3129   MPI_Comm       comm;
3130   VecScatter     Mvctx=a->Mvctx;
3131 
3132   PetscFunctionBegin;
3133   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3134   PetscCall(ISGetLocalSize(iscol,&ncols));
3135 
3136   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3137   PetscCall(MatCreateVecs(mat,&x,NULL));
3138   PetscCall(VecSet(x,-1.0));
3139   PetscCall(VecDuplicate(x,&cmap));
3140   PetscCall(VecSet(cmap,-1.0));
3141 
3142   /* Get start indices */
3143   PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm));
3144   isstart -= ncols;
3145   PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend));
3146 
3147   PetscCall(ISGetIndices(iscol,&is_idx));
3148   PetscCall(VecGetArray(x,&xarray));
3149   PetscCall(VecGetArray(cmap,&cmaparray));
3150   PetscCall(PetscMalloc1(ncols,&idx));
3151   for (i=0; i<ncols; i++) {
3152     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3153     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3154     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3155   }
3156   PetscCall(VecRestoreArray(x,&xarray));
3157   PetscCall(VecRestoreArray(cmap,&cmaparray));
3158   PetscCall(ISRestoreIndices(iscol,&is_idx));
3159 
3160   /* Get iscol_d */
3161   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d));
3162   PetscCall(ISGetBlockSize(iscol,&i));
3163   PetscCall(ISSetBlockSize(*iscol_d,i));
3164 
3165   /* Get isrow_d */
3166   PetscCall(ISGetLocalSize(isrow,&m));
3167   rstart = mat->rmap->rstart;
3168   PetscCall(PetscMalloc1(m,&idx));
3169   PetscCall(ISGetIndices(isrow,&is_idx));
3170   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3171   PetscCall(ISRestoreIndices(isrow,&is_idx));
3172 
3173   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d));
3174   PetscCall(ISGetBlockSize(isrow,&i));
3175   PetscCall(ISSetBlockSize(*isrow_d,i));
3176 
3177   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3178   PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3179   PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3180 
3181   PetscCall(VecDuplicate(lvec,&lcmap));
3182 
3183   PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3184   PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3185 
3186   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3187   /* off-process column indices */
3188   count = 0;
3189   PetscCall(PetscMalloc1(Bn,&idx));
3190   PetscCall(PetscMalloc1(Bn,&cmap1));
3191 
3192   PetscCall(VecGetArray(lvec,&xarray));
3193   PetscCall(VecGetArray(lcmap,&cmaparray));
3194   for (i=0; i<Bn; i++) {
3195     if (PetscRealPart(xarray[i]) > -1.0) {
3196       idx[count]     = i;                   /* local column index in off-diagonal part B */
3197       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3198       count++;
3199     }
3200   }
3201   PetscCall(VecRestoreArray(lvec,&xarray));
3202   PetscCall(VecRestoreArray(lcmap,&cmaparray));
3203 
3204   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o));
3205   /* cannot ensure iscol_o has same blocksize as iscol! */
3206 
3207   PetscCall(PetscFree(idx));
3208   *garray = cmap1;
3209 
3210   PetscCall(VecDestroy(&x));
3211   PetscCall(VecDestroy(&cmap));
3212   PetscCall(VecDestroy(&lcmap));
3213   PetscFunctionReturn(0);
3214 }
3215 
3216 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3217 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3218 {
3219   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3220   Mat            M = NULL;
3221   MPI_Comm       comm;
3222   IS             iscol_d,isrow_d,iscol_o;
3223   Mat            Asub = NULL,Bsub = NULL;
3224   PetscInt       n;
3225 
3226   PetscFunctionBegin;
3227   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3228 
3229   if (call == MAT_REUSE_MATRIX) {
3230     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3231     PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d));
3232     PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3233 
3234     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d));
3235     PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3236 
3237     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o));
3238     PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3239 
3240     /* Update diagonal and off-diagonal portions of submat */
3241     asub = (Mat_MPIAIJ*)(*submat)->data;
3242     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A));
3243     PetscCall(ISGetLocalSize(iscol_o,&n));
3244     if (n) {
3245       PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B));
3246     }
3247     PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY));
3248     PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY));
3249 
3250   } else { /* call == MAT_INITIAL_MATRIX) */
3251     const PetscInt *garray;
3252     PetscInt        BsubN;
3253 
3254     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3255     PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray));
3256 
3257     /* Create local submatrices Asub and Bsub */
3258     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub));
3259     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub));
3260 
3261     /* Create submatrix M */
3262     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M));
3263 
3264     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3265     asub = (Mat_MPIAIJ*)M->data;
3266 
3267     PetscCall(ISGetLocalSize(iscol_o,&BsubN));
3268     n = asub->B->cmap->N;
3269     if (BsubN > n) {
3270       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3271       const PetscInt *idx;
3272       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3273       PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN));
3274 
3275       PetscCall(PetscMalloc1(n,&idx_new));
3276       j = 0;
3277       PetscCall(ISGetIndices(iscol_o,&idx));
3278       for (i=0; i<n; i++) {
3279         if (j >= BsubN) break;
3280         while (subgarray[i] > garray[j]) j++;
3281 
3282         if (subgarray[i] == garray[j]) {
3283           idx_new[i] = idx[j++];
3284         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]);
3285       }
3286       PetscCall(ISRestoreIndices(iscol_o,&idx));
3287 
3288       PetscCall(ISDestroy(&iscol_o));
3289       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o));
3290 
3291     } else if (BsubN < n) {
3292       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N);
3293     }
3294 
3295     PetscCall(PetscFree(garray));
3296     *submat = M;
3297 
3298     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3299     PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d));
3300     PetscCall(ISDestroy(&isrow_d));
3301 
3302     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d));
3303     PetscCall(ISDestroy(&iscol_d));
3304 
3305     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o));
3306     PetscCall(ISDestroy(&iscol_o));
3307   }
3308   PetscFunctionReturn(0);
3309 }
3310 
3311 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3312 {
3313   IS             iscol_local=NULL,isrow_d;
3314   PetscInt       csize;
3315   PetscInt       n,i,j,start,end;
3316   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3317   MPI_Comm       comm;
3318 
3319   PetscFunctionBegin;
3320   /* If isrow has same processor distribution as mat,
3321      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3322   if (call == MAT_REUSE_MATRIX) {
3323     PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d));
3324     if (isrow_d) {
3325       sameRowDist  = PETSC_TRUE;
3326       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3327     } else {
3328       PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local));
3329       if (iscol_local) {
3330         sameRowDist  = PETSC_TRUE;
3331         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3332       }
3333     }
3334   } else {
3335     /* Check if isrow has same processor distribution as mat */
3336     sameDist[0] = PETSC_FALSE;
3337     PetscCall(ISGetLocalSize(isrow,&n));
3338     if (!n) {
3339       sameDist[0] = PETSC_TRUE;
3340     } else {
3341       PetscCall(ISGetMinMax(isrow,&i,&j));
3342       PetscCall(MatGetOwnershipRange(mat,&start,&end));
3343       if (i >= start && j < end) {
3344         sameDist[0] = PETSC_TRUE;
3345       }
3346     }
3347 
3348     /* Check if iscol has same processor distribution as mat */
3349     sameDist[1] = PETSC_FALSE;
3350     PetscCall(ISGetLocalSize(iscol,&n));
3351     if (!n) {
3352       sameDist[1] = PETSC_TRUE;
3353     } else {
3354       PetscCall(ISGetMinMax(iscol,&i,&j));
3355       PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end));
3356       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3357     }
3358 
3359     PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3360     PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm));
3361     sameRowDist = tsameDist[0];
3362   }
3363 
3364   if (sameRowDist) {
3365     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3366       /* isrow and iscol have same processor distribution as mat */
3367       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat));
3368       PetscFunctionReturn(0);
3369     } else { /* sameRowDist */
3370       /* isrow has same processor distribution as mat */
3371       if (call == MAT_INITIAL_MATRIX) {
3372         PetscBool sorted;
3373         PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3374         PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */
3375         PetscCall(ISGetSize(iscol,&i));
3376         PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i);
3377 
3378         PetscCall(ISSorted(iscol_local,&sorted));
3379         if (sorted) {
3380           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3381           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat));
3382           PetscFunctionReturn(0);
3383         }
3384       } else { /* call == MAT_REUSE_MATRIX */
3385         IS iscol_sub;
3386         PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3387         if (iscol_sub) {
3388           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat));
3389           PetscFunctionReturn(0);
3390         }
3391       }
3392     }
3393   }
3394 
3395   /* General case: iscol -> iscol_local which has global size of iscol */
3396   if (call == MAT_REUSE_MATRIX) {
3397     PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
3398     PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3399   } else {
3400     if (!iscol_local) {
3401       PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3402     }
3403   }
3404 
3405   PetscCall(ISGetLocalSize(iscol,&csize));
3406   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat));
3407 
3408   if (call == MAT_INITIAL_MATRIX) {
3409     PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
3410     PetscCall(ISDestroy(&iscol_local));
3411   }
3412   PetscFunctionReturn(0);
3413 }
3414 
3415 /*@C
3416      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3417          and "off-diagonal" part of the matrix in CSR format.
3418 
3419    Collective
3420 
3421    Input Parameters:
3422 +  comm - MPI communicator
3423 .  A - "diagonal" portion of matrix
3424 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3425 -  garray - global index of B columns
3426 
3427    Output Parameter:
3428 .   mat - the matrix, with input A as its local diagonal matrix
3429    Level: advanced
3430 
3431    Notes:
3432        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3433        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3434 
3435 .seealso: `MatCreateMPIAIJWithSplitArrays()`
3436 @*/
3437 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3438 {
3439   Mat_MPIAIJ        *maij;
3440   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3441   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3442   const PetscScalar *oa;
3443   Mat               Bnew;
3444   PetscInt          m,n,N;
3445   MatType           mpi_mat_type;
3446 
3447   PetscFunctionBegin;
3448   PetscCall(MatCreate(comm,mat));
3449   PetscCall(MatGetSize(A,&m,&n));
3450   PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N);
3451   PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs);
3452   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3453   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3454 
3455   /* Get global columns of mat */
3456   PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm));
3457 
3458   PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N));
3459   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3460   PetscCall(MatGetMPIMatType_Private(A,&mpi_mat_type));
3461   PetscCall(MatSetType(*mat,mpi_mat_type));
3462 
3463   PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs));
3464   maij = (Mat_MPIAIJ*)(*mat)->data;
3465 
3466   (*mat)->preallocated = PETSC_TRUE;
3467 
3468   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3469   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3470 
3471   /* Set A as diagonal portion of *mat */
3472   maij->A = A;
3473 
3474   nz = oi[m];
3475   for (i=0; i<nz; i++) {
3476     col   = oj[i];
3477     oj[i] = garray[col];
3478   }
3479 
3480   /* Set Bnew as off-diagonal portion of *mat */
3481   PetscCall(MatSeqAIJGetArrayRead(B,&oa));
3482   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew));
3483   PetscCall(MatSeqAIJRestoreArrayRead(B,&oa));
3484   bnew        = (Mat_SeqAIJ*)Bnew->data;
3485   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3486   maij->B     = Bnew;
3487 
3488   PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N);
3489 
3490   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3491   b->free_a       = PETSC_FALSE;
3492   b->free_ij      = PETSC_FALSE;
3493   PetscCall(MatDestroy(&B));
3494 
3495   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3496   bnew->free_a       = PETSC_TRUE;
3497   bnew->free_ij      = PETSC_TRUE;
3498 
3499   /* condense columns of maij->B */
3500   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
3501   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
3502   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
3503   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
3504   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3505   PetscFunctionReturn(0);
3506 }
3507 
3508 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3509 
3510 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3511 {
3512   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3513   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3514   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3515   Mat            M,Msub,B=a->B;
3516   MatScalar      *aa;
3517   Mat_SeqAIJ     *aij;
3518   PetscInt       *garray = a->garray,*colsub,Ncols;
3519   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3520   IS             iscol_sub,iscmap;
3521   const PetscInt *is_idx,*cmap;
3522   PetscBool      allcolumns=PETSC_FALSE;
3523   MPI_Comm       comm;
3524 
3525   PetscFunctionBegin;
3526   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3527   if (call == MAT_REUSE_MATRIX) {
3528     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3529     PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3530     PetscCall(ISGetLocalSize(iscol_sub,&count));
3531 
3532     PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap));
3533     PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3534 
3535     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub));
3536     PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3537 
3538     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub));
3539 
3540   } else { /* call == MAT_INITIAL_MATRIX) */
3541     PetscBool flg;
3542 
3543     PetscCall(ISGetLocalSize(iscol,&n));
3544     PetscCall(ISGetSize(iscol,&Ncols));
3545 
3546     /* (1) iscol -> nonscalable iscol_local */
3547     /* Check for special case: each processor gets entire matrix columns */
3548     PetscCall(ISIdentity(iscol_local,&flg));
3549     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3550     PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3551     if (allcolumns) {
3552       iscol_sub = iscol_local;
3553       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3554       PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap));
3555 
3556     } else {
3557       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3558       PetscInt *idx,*cmap1,k;
3559       PetscCall(PetscMalloc1(Ncols,&idx));
3560       PetscCall(PetscMalloc1(Ncols,&cmap1));
3561       PetscCall(ISGetIndices(iscol_local,&is_idx));
3562       count = 0;
3563       k     = 0;
3564       for (i=0; i<Ncols; i++) {
3565         j = is_idx[i];
3566         if (j >= cstart && j < cend) {
3567           /* diagonal part of mat */
3568           idx[count]     = j;
3569           cmap1[count++] = i; /* column index in submat */
3570         } else if (Bn) {
3571           /* off-diagonal part of mat */
3572           if (j == garray[k]) {
3573             idx[count]     = j;
3574             cmap1[count++] = i;  /* column index in submat */
3575           } else if (j > garray[k]) {
3576             while (j > garray[k] && k < Bn-1) k++;
3577             if (j == garray[k]) {
3578               idx[count]     = j;
3579               cmap1[count++] = i; /* column index in submat */
3580             }
3581           }
3582         }
3583       }
3584       PetscCall(ISRestoreIndices(iscol_local,&is_idx));
3585 
3586       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub));
3587       PetscCall(ISGetBlockSize(iscol,&cbs));
3588       PetscCall(ISSetBlockSize(iscol_sub,cbs));
3589 
3590       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap));
3591     }
3592 
3593     /* (3) Create sequential Msub */
3594     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub));
3595   }
3596 
3597   PetscCall(ISGetLocalSize(iscol_sub,&count));
3598   aij  = (Mat_SeqAIJ*)(Msub)->data;
3599   ii   = aij->i;
3600   PetscCall(ISGetIndices(iscmap,&cmap));
3601 
3602   /*
3603       m - number of local rows
3604       Ncols - number of columns (same on all processors)
3605       rstart - first row in new global matrix generated
3606   */
3607   PetscCall(MatGetSize(Msub,&m,NULL));
3608 
3609   if (call == MAT_INITIAL_MATRIX) {
3610     /* (4) Create parallel newmat */
3611     PetscMPIInt    rank,size;
3612     PetscInt       csize;
3613 
3614     PetscCallMPI(MPI_Comm_size(comm,&size));
3615     PetscCallMPI(MPI_Comm_rank(comm,&rank));
3616 
3617     /*
3618         Determine the number of non-zeros in the diagonal and off-diagonal
3619         portions of the matrix in order to do correct preallocation
3620     */
3621 
3622     /* first get start and end of "diagonal" columns */
3623     PetscCall(ISGetLocalSize(iscol,&csize));
3624     if (csize == PETSC_DECIDE) {
3625       PetscCall(ISGetSize(isrow,&mglobal));
3626       if (mglobal == Ncols) { /* square matrix */
3627         nlocal = m;
3628       } else {
3629         nlocal = Ncols/size + ((Ncols % size) > rank);
3630       }
3631     } else {
3632       nlocal = csize;
3633     }
3634     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3635     rstart = rend - nlocal;
3636     PetscCheck(rank != size - 1 || rend == Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols);
3637 
3638     /* next, compute all the lengths */
3639     jj    = aij->j;
3640     PetscCall(PetscMalloc1(2*m+1,&dlens));
3641     olens = dlens + m;
3642     for (i=0; i<m; i++) {
3643       jend = ii[i+1] - ii[i];
3644       olen = 0;
3645       dlen = 0;
3646       for (j=0; j<jend; j++) {
3647         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3648         else dlen++;
3649         jj++;
3650       }
3651       olens[i] = olen;
3652       dlens[i] = dlen;
3653     }
3654 
3655     PetscCall(ISGetBlockSize(isrow,&bs));
3656     PetscCall(ISGetBlockSize(iscol,&cbs));
3657 
3658     PetscCall(MatCreate(comm,&M));
3659     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols));
3660     PetscCall(MatSetBlockSizes(M,bs,cbs));
3661     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3662     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3663     PetscCall(PetscFree(dlens));
3664 
3665   } else { /* call == MAT_REUSE_MATRIX */
3666     M    = *newmat;
3667     PetscCall(MatGetLocalSize(M,&i,NULL));
3668     PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3669     PetscCall(MatZeroEntries(M));
3670     /*
3671          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3672        rather than the slower MatSetValues().
3673     */
3674     M->was_assembled = PETSC_TRUE;
3675     M->assembled     = PETSC_FALSE;
3676   }
3677 
3678   /* (5) Set values of Msub to *newmat */
3679   PetscCall(PetscMalloc1(count,&colsub));
3680   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
3681 
3682   jj   = aij->j;
3683   PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa));
3684   for (i=0; i<m; i++) {
3685     row = rstart + i;
3686     nz  = ii[i+1] - ii[i];
3687     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3688     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES));
3689     jj += nz; aa += nz;
3690   }
3691   PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa));
3692   PetscCall(ISRestoreIndices(iscmap,&cmap));
3693 
3694   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3695   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3696 
3697   PetscCall(PetscFree(colsub));
3698 
3699   /* save Msub, iscol_sub and iscmap used in processor for next request */
3700   if (call == MAT_INITIAL_MATRIX) {
3701     *newmat = M;
3702     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub));
3703     PetscCall(MatDestroy(&Msub));
3704 
3705     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub));
3706     PetscCall(ISDestroy(&iscol_sub));
3707 
3708     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap));
3709     PetscCall(ISDestroy(&iscmap));
3710 
3711     if (iscol_local) {
3712       PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local));
3713       PetscCall(ISDestroy(&iscol_local));
3714     }
3715   }
3716   PetscFunctionReturn(0);
3717 }
3718 
3719 /*
3720     Not great since it makes two copies of the submatrix, first an SeqAIJ
3721   in local and then by concatenating the local matrices the end result.
3722   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3723 
3724   Note: This requires a sequential iscol with all indices.
3725 */
3726 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3727 {
3728   PetscMPIInt    rank,size;
3729   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3730   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3731   Mat            M,Mreuse;
3732   MatScalar      *aa,*vwork;
3733   MPI_Comm       comm;
3734   Mat_SeqAIJ     *aij;
3735   PetscBool      colflag,allcolumns=PETSC_FALSE;
3736 
3737   PetscFunctionBegin;
3738   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3739   PetscCallMPI(MPI_Comm_rank(comm,&rank));
3740   PetscCallMPI(MPI_Comm_size(comm,&size));
3741 
3742   /* Check for special case: each processor gets entire matrix columns */
3743   PetscCall(ISIdentity(iscol,&colflag));
3744   PetscCall(ISGetLocalSize(iscol,&n));
3745   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3746   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3747 
3748   if (call ==  MAT_REUSE_MATRIX) {
3749     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse));
3750     PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3751     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse));
3752   } else {
3753     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse));
3754   }
3755 
3756   /*
3757       m - number of local rows
3758       n - number of columns (same on all processors)
3759       rstart - first row in new global matrix generated
3760   */
3761   PetscCall(MatGetSize(Mreuse,&m,&n));
3762   PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs));
3763   if (call == MAT_INITIAL_MATRIX) {
3764     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3765     ii  = aij->i;
3766     jj  = aij->j;
3767 
3768     /*
3769         Determine the number of non-zeros in the diagonal and off-diagonal
3770         portions of the matrix in order to do correct preallocation
3771     */
3772 
3773     /* first get start and end of "diagonal" columns */
3774     if (csize == PETSC_DECIDE) {
3775       PetscCall(ISGetSize(isrow,&mglobal));
3776       if (mglobal == n) { /* square matrix */
3777         nlocal = m;
3778       } else {
3779         nlocal = n/size + ((n % size) > rank);
3780       }
3781     } else {
3782       nlocal = csize;
3783     }
3784     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3785     rstart = rend - nlocal;
3786     PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
3787 
3788     /* next, compute all the lengths */
3789     PetscCall(PetscMalloc1(2*m+1,&dlens));
3790     olens = dlens + m;
3791     for (i=0; i<m; i++) {
3792       jend = ii[i+1] - ii[i];
3793       olen = 0;
3794       dlen = 0;
3795       for (j=0; j<jend; j++) {
3796         if (*jj < rstart || *jj >= rend) olen++;
3797         else dlen++;
3798         jj++;
3799       }
3800       olens[i] = olen;
3801       dlens[i] = dlen;
3802     }
3803     PetscCall(MatCreate(comm,&M));
3804     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n));
3805     PetscCall(MatSetBlockSizes(M,bs,cbs));
3806     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3807     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3808     PetscCall(PetscFree(dlens));
3809   } else {
3810     PetscInt ml,nl;
3811 
3812     M    = *newmat;
3813     PetscCall(MatGetLocalSize(M,&ml,&nl));
3814     PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3815     PetscCall(MatZeroEntries(M));
3816     /*
3817          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3818        rather than the slower MatSetValues().
3819     */
3820     M->was_assembled = PETSC_TRUE;
3821     M->assembled     = PETSC_FALSE;
3822   }
3823   PetscCall(MatGetOwnershipRange(M,&rstart,&rend));
3824   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3825   ii   = aij->i;
3826   jj   = aij->j;
3827 
3828   /* trigger copy to CPU if needed */
3829   PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa));
3830   for (i=0; i<m; i++) {
3831     row   = rstart + i;
3832     nz    = ii[i+1] - ii[i];
3833     cwork = jj; jj += nz;
3834     vwork = aa; aa += nz;
3835     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES));
3836   }
3837   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa));
3838 
3839   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3840   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3841   *newmat = M;
3842 
3843   /* save submatrix used in processor for next request */
3844   if (call ==  MAT_INITIAL_MATRIX) {
3845     PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse));
3846     PetscCall(MatDestroy(&Mreuse));
3847   }
3848   PetscFunctionReturn(0);
3849 }
3850 
3851 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3852 {
3853   PetscInt       m,cstart, cend,j,nnz,i,d,*ld;
3854   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3855   const PetscInt *JJ;
3856   PetscBool      nooffprocentries;
3857   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)B->data;
3858 
3859   PetscFunctionBegin;
3860   PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]);
3861 
3862   PetscCall(PetscLayoutSetUp(B->rmap));
3863   PetscCall(PetscLayoutSetUp(B->cmap));
3864   m      = B->rmap->n;
3865   cstart = B->cmap->rstart;
3866   cend   = B->cmap->rend;
3867   rstart = B->rmap->rstart;
3868 
3869   PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz));
3870 
3871   if (PetscDefined(USE_DEBUG)) {
3872     for (i=0; i<m; i++) {
3873       nnz = Ii[i+1]- Ii[i];
3874       JJ  = J + Ii[i];
3875       PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz);
3876       PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]);
3877       PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N);
3878     }
3879   }
3880 
3881   for (i=0; i<m; i++) {
3882     nnz     = Ii[i+1]- Ii[i];
3883     JJ      = J + Ii[i];
3884     nnz_max = PetscMax(nnz_max,nnz);
3885     d       = 0;
3886     for (j=0; j<nnz; j++) {
3887       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3888     }
3889     d_nnz[i] = d;
3890     o_nnz[i] = nnz - d;
3891   }
3892   PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
3893   PetscCall(PetscFree2(d_nnz,o_nnz));
3894 
3895   for (i=0; i<m; i++) {
3896     ii   = i + rstart;
3897     PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES));
3898   }
3899   nooffprocentries    = B->nooffprocentries;
3900   B->nooffprocentries = PETSC_TRUE;
3901   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
3902   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3903   B->nooffprocentries = nooffprocentries;
3904 
3905   /* count number of entries below block diagonal */
3906   PetscCall(PetscFree(Aij->ld));
3907   PetscCall(PetscCalloc1(m,&ld));
3908   Aij->ld = ld;
3909   for (i=0; i<m; i++) {
3910     nnz  = Ii[i+1] - Ii[i];
3911     j     = 0;
3912     while  (j < nnz && J[j] < cstart) {j++;}
3913     ld[i] = j;
3914     J     += nnz;
3915   }
3916 
3917   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3918   PetscFunctionReturn(0);
3919 }
3920 
3921 /*@
3922    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3923    (the default parallel PETSc format).
3924 
3925    Collective
3926 
3927    Input Parameters:
3928 +  B - the matrix
3929 .  i - the indices into j for the start of each local row (starts with zero)
3930 .  j - the column indices for each local row (starts with zero)
3931 -  v - optional values in the matrix
3932 
3933    Level: developer
3934 
3935    Notes:
3936        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3937      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3938      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3939 
3940        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3941 
3942        The format which is used for the sparse matrix input, is equivalent to a
3943     row-major ordering.. i.e for the following matrix, the input data expected is
3944     as shown
3945 
3946 $        1 0 0
3947 $        2 0 3     P0
3948 $       -------
3949 $        4 5 6     P1
3950 $
3951 $     Process0 [P0]: rows_owned=[0,1]
3952 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3953 $        j =  {0,0,2}  [size = 3]
3954 $        v =  {1,2,3}  [size = 3]
3955 $
3956 $     Process1 [P1]: rows_owned=[2]
3957 $        i =  {0,3}    [size = nrow+1  = 1+1]
3958 $        j =  {0,1,2}  [size = 3]
3959 $        v =  {4,5,6}  [size = 3]
3960 
3961 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
3962           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
3963 @*/
3964 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3965 {
3966   PetscFunctionBegin;
3967   PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3968   PetscFunctionReturn(0);
3969 }
3970 
3971 /*@C
3972    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3973    (the default parallel PETSc format).  For good matrix assembly performance
3974    the user should preallocate the matrix storage by setting the parameters
3975    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3976    performance can be increased by more than a factor of 50.
3977 
3978    Collective
3979 
3980    Input Parameters:
3981 +  B - the matrix
3982 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3983            (same value is used for all local rows)
3984 .  d_nnz - array containing the number of nonzeros in the various rows of the
3985            DIAGONAL portion of the local submatrix (possibly different for each row)
3986            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3987            The size of this array is equal to the number of local rows, i.e 'm'.
3988            For matrices that will be factored, you must leave room for (and set)
3989            the diagonal entry even if it is zero.
3990 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3991            submatrix (same value is used for all local rows).
3992 -  o_nnz - array containing the number of nonzeros in the various rows of the
3993            OFF-DIAGONAL portion of the local submatrix (possibly different for
3994            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3995            structure. The size of this array is equal to the number
3996            of local rows, i.e 'm'.
3997 
3998    If the *_nnz parameter is given then the *_nz parameter is ignored
3999 
4000    The AIJ format (also called the Yale sparse matrix format or
4001    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4002    storage.  The stored row and column indices begin with zero.
4003    See Users-Manual: ch_mat for details.
4004 
4005    The parallel matrix is partitioned such that the first m0 rows belong to
4006    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4007    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4008 
4009    The DIAGONAL portion of the local submatrix of a processor can be defined
4010    as the submatrix which is obtained by extraction the part corresponding to
4011    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4012    first row that belongs to the processor, r2 is the last row belonging to
4013    the this processor, and c1-c2 is range of indices of the local part of a
4014    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4015    common case of a square matrix, the row and column ranges are the same and
4016    the DIAGONAL part is also square. The remaining portion of the local
4017    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4018 
4019    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4020 
4021    You can call MatGetInfo() to get information on how effective the preallocation was;
4022    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4023    You can also run with the option -info and look for messages with the string
4024    malloc in them to see if additional memory allocation was needed.
4025 
4026    Example usage:
4027 
4028    Consider the following 8x8 matrix with 34 non-zero values, that is
4029    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4030    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4031    as follows:
4032 
4033 .vb
4034             1  2  0  |  0  3  0  |  0  4
4035     Proc0   0  5  6  |  7  0  0  |  8  0
4036             9  0 10  | 11  0  0  | 12  0
4037     -------------------------------------
4038            13  0 14  | 15 16 17  |  0  0
4039     Proc1   0 18  0  | 19 20 21  |  0  0
4040             0  0  0  | 22 23  0  | 24  0
4041     -------------------------------------
4042     Proc2  25 26 27  |  0  0 28  | 29  0
4043            30  0  0  | 31 32 33  |  0 34
4044 .ve
4045 
4046    This can be represented as a collection of submatrices as:
4047 
4048 .vb
4049       A B C
4050       D E F
4051       G H I
4052 .ve
4053 
4054    Where the submatrices A,B,C are owned by proc0, D,E,F are
4055    owned by proc1, G,H,I are owned by proc2.
4056 
4057    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4058    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4059    The 'M','N' parameters are 8,8, and have the same values on all procs.
4060 
4061    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4062    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4063    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4064    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4065    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4066    matrix, ans [DF] as another SeqAIJ matrix.
4067 
4068    When d_nz, o_nz parameters are specified, d_nz storage elements are
4069    allocated for every row of the local diagonal submatrix, and o_nz
4070    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4071    One way to choose d_nz and o_nz is to use the max nonzerors per local
4072    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4073    In this case, the values of d_nz,o_nz are:
4074 .vb
4075      proc0 : dnz = 2, o_nz = 2
4076      proc1 : dnz = 3, o_nz = 2
4077      proc2 : dnz = 1, o_nz = 4
4078 .ve
4079    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4080    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4081    for proc3. i.e we are using 12+15+10=37 storage locations to store
4082    34 values.
4083 
4084    When d_nnz, o_nnz parameters are specified, the storage is specified
4085    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4086    In the above case the values for d_nnz,o_nnz are:
4087 .vb
4088      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4089      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4090      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4091 .ve
4092    Here the space allocated is sum of all the above values i.e 34, and
4093    hence pre-allocation is perfect.
4094 
4095    Level: intermediate
4096 
4097 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4098           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4099 @*/
4100 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4101 {
4102   PetscFunctionBegin;
4103   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4104   PetscValidType(B,1);
4105   PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
4106   PetscFunctionReturn(0);
4107 }
4108 
4109 /*@
4110      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4111          CSR format for the local rows.
4112 
4113    Collective
4114 
4115    Input Parameters:
4116 +  comm - MPI communicator
4117 .  m - number of local rows (Cannot be PETSC_DECIDE)
4118 .  n - This value should be the same as the local size used in creating the
4119        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4120        calculated if N is given) For square matrices n is almost always m.
4121 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4122 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4123 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4124 .   j - column indices
4125 -   a - optional matrix values
4126 
4127    Output Parameter:
4128 .   mat - the matrix
4129 
4130    Level: intermediate
4131 
4132    Notes:
4133        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4134      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4135      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4136 
4137        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4138 
4139        The format which is used for the sparse matrix input, is equivalent to a
4140     row-major ordering.. i.e for the following matrix, the input data expected is
4141     as shown
4142 
4143        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4144 
4145 $        1 0 0
4146 $        2 0 3     P0
4147 $       -------
4148 $        4 5 6     P1
4149 $
4150 $     Process0 [P0]: rows_owned=[0,1]
4151 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4152 $        j =  {0,0,2}  [size = 3]
4153 $        v =  {1,2,3}  [size = 3]
4154 $
4155 $     Process1 [P1]: rows_owned=[2]
4156 $        i =  {0,3}    [size = nrow+1  = 1+1]
4157 $        j =  {0,1,2}  [size = 3]
4158 $        v =  {4,5,6}  [size = 3]
4159 
4160 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4161           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4162 @*/
4163 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4164 {
4165   PetscFunctionBegin;
4166   PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4167   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4168   PetscCall(MatCreate(comm,mat));
4169   PetscCall(MatSetSizes(*mat,m,n,M,N));
4170   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4171   PetscCall(MatSetType(*mat,MATMPIAIJ));
4172   PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a));
4173   PetscFunctionReturn(0);
4174 }
4175 
4176 /*@
4177      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4178          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from MatCreateMPIAIJWithArrays()
4179 
4180      Deprecated: Use `MatUpdateMPIAIJWithArray()`
4181 
4182    Collective
4183 
4184    Input Parameters:
4185 +  mat - the matrix
4186 .  m - number of local rows (Cannot be PETSC_DECIDE)
4187 .  n - This value should be the same as the local size used in creating the
4188        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4189        calculated if N is given) For square matrices n is almost always m.
4190 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4191 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4192 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4193 .  J - column indices
4194 -  v - matrix values
4195 
4196    Level: intermediate
4197 
4198 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4199           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()`
4200 @*/
4201 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4202 {
4203   PetscInt       nnz,i;
4204   PetscBool      nooffprocentries;
4205   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4206   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4207   PetscScalar    *ad,*ao;
4208   PetscInt       ldi,Iii,md;
4209   const PetscInt *Adi = Ad->i;
4210   PetscInt       *ld = Aij->ld;
4211 
4212   PetscFunctionBegin;
4213   PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4214   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4215   PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4216   PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4217 
4218   PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad));
4219   PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao));
4220 
4221   for (i=0; i<m; i++) {
4222     nnz  = Ii[i+1]- Ii[i];
4223     Iii  = Ii[i];
4224     ldi  = ld[i];
4225     md   = Adi[i+1]-Adi[i];
4226     PetscCall(PetscArraycpy(ao,v + Iii,ldi));
4227     PetscCall(PetscArraycpy(ad,v + Iii + ldi,md));
4228     PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md));
4229     ad  += md;
4230     ao  += nnz - md;
4231   }
4232   nooffprocentries      = mat->nooffprocentries;
4233   mat->nooffprocentries = PETSC_TRUE;
4234   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad));
4235   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao));
4236   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4237   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4238   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4239   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
4240   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
4241   mat->nooffprocentries = nooffprocentries;
4242   PetscFunctionReturn(0);
4243 }
4244 
4245 /*@
4246      MatUpdateMPIAIJWithArray - updates an MPI AIJ matrix using an array that contains the nonzero values
4247 
4248    Collective
4249 
4250    Input Parameters:
4251 +  mat - the matrix
4252 -  v - matrix values, stored by row
4253 
4254    Level: intermediate
4255 
4256    Notes:
4257    The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4258 
4259 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4260           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()`
4261 @*/
4262 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat,const PetscScalar v[])
4263 {
4264   PetscInt       nnz,i,m;
4265   PetscBool      nooffprocentries;
4266   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4267   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4268   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4269   PetscScalar    *ad,*ao;
4270   const PetscInt *Adi = Ad->i,*Adj = Ao->i;
4271   PetscInt       ldi,Iii,md;
4272   PetscInt       *ld = Aij->ld;
4273 
4274   PetscFunctionBegin;
4275   m = mat->rmap->n;
4276 
4277   PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad));
4278   PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao));
4279   Iii = 0;
4280   for (i=0; i<m; i++) {
4281     nnz  = Adi[i+1]-Adi[i] + Adj[i+1]-Adj[i];
4282     ldi  = ld[i];
4283     md   = Adi[i+1]-Adi[i];
4284     PetscCall(PetscArraycpy(ao,v + Iii,ldi));
4285     PetscCall(PetscArraycpy(ad,v + Iii + ldi,md));
4286     PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md));
4287     ad  += md;
4288     ao  += nnz - md;
4289     Iii += nnz;
4290   }
4291   nooffprocentries      = mat->nooffprocentries;
4292   mat->nooffprocentries = PETSC_TRUE;
4293   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad));
4294   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao));
4295   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4296   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4297   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4298   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
4299   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
4300   mat->nooffprocentries = nooffprocentries;
4301   PetscFunctionReturn(0);
4302 }
4303 
4304 /*@C
4305    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4306    (the default parallel PETSc format).  For good matrix assembly performance
4307    the user should preallocate the matrix storage by setting the parameters
4308    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4309    performance can be increased by more than a factor of 50.
4310 
4311    Collective
4312 
4313    Input Parameters:
4314 +  comm - MPI communicator
4315 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4316            This value should be the same as the local size used in creating the
4317            y vector for the matrix-vector product y = Ax.
4318 .  n - This value should be the same as the local size used in creating the
4319        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4320        calculated if N is given) For square matrices n is almost always m.
4321 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4322 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4323 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4324            (same value is used for all local rows)
4325 .  d_nnz - array containing the number of nonzeros in the various rows of the
4326            DIAGONAL portion of the local submatrix (possibly different for each row)
4327            or NULL, if d_nz is used to specify the nonzero structure.
4328            The size of this array is equal to the number of local rows, i.e 'm'.
4329 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4330            submatrix (same value is used for all local rows).
4331 -  o_nnz - array containing the number of nonzeros in the various rows of the
4332            OFF-DIAGONAL portion of the local submatrix (possibly different for
4333            each row) or NULL, if o_nz is used to specify the nonzero
4334            structure. The size of this array is equal to the number
4335            of local rows, i.e 'm'.
4336 
4337    Output Parameter:
4338 .  A - the matrix
4339 
4340    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4341    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4342    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4343 
4344    Notes:
4345    If the *_nnz parameter is given then the *_nz parameter is ignored
4346 
4347    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4348    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4349    storage requirements for this matrix.
4350 
4351    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4352    processor than it must be used on all processors that share the object for
4353    that argument.
4354 
4355    The user MUST specify either the local or global matrix dimensions
4356    (possibly both).
4357 
4358    The parallel matrix is partitioned across processors such that the
4359    first m0 rows belong to process 0, the next m1 rows belong to
4360    process 1, the next m2 rows belong to process 2 etc.. where
4361    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4362    values corresponding to [m x N] submatrix.
4363 
4364    The columns are logically partitioned with the n0 columns belonging
4365    to 0th partition, the next n1 columns belonging to the next
4366    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4367 
4368    The DIAGONAL portion of the local submatrix on any given processor
4369    is the submatrix corresponding to the rows and columns m,n
4370    corresponding to the given processor. i.e diagonal matrix on
4371    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4372    etc. The remaining portion of the local submatrix [m x (N-n)]
4373    constitute the OFF-DIAGONAL portion. The example below better
4374    illustrates this concept.
4375 
4376    For a square global matrix we define each processor's diagonal portion
4377    to be its local rows and the corresponding columns (a square submatrix);
4378    each processor's off-diagonal portion encompasses the remainder of the
4379    local matrix (a rectangular submatrix).
4380 
4381    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4382 
4383    When calling this routine with a single process communicator, a matrix of
4384    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4385    type of communicator, use the construction mechanism
4386 .vb
4387      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4388 .ve
4389 
4390 $     MatCreate(...,&A);
4391 $     MatSetType(A,MATMPIAIJ);
4392 $     MatSetSizes(A, m,n,M,N);
4393 $     MatMPIAIJSetPreallocation(A,...);
4394 
4395    By default, this format uses inodes (identical nodes) when possible.
4396    We search for consecutive rows with the same nonzero structure, thereby
4397    reusing matrix information to achieve increased efficiency.
4398 
4399    Options Database Keys:
4400 +  -mat_no_inode  - Do not use inodes
4401 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4402 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
4403         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4404         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.
4405 
4406    Example usage:
4407 
4408    Consider the following 8x8 matrix with 34 non-zero values, that is
4409    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4410    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4411    as follows
4412 
4413 .vb
4414             1  2  0  |  0  3  0  |  0  4
4415     Proc0   0  5  6  |  7  0  0  |  8  0
4416             9  0 10  | 11  0  0  | 12  0
4417     -------------------------------------
4418            13  0 14  | 15 16 17  |  0  0
4419     Proc1   0 18  0  | 19 20 21  |  0  0
4420             0  0  0  | 22 23  0  | 24  0
4421     -------------------------------------
4422     Proc2  25 26 27  |  0  0 28  | 29  0
4423            30  0  0  | 31 32 33  |  0 34
4424 .ve
4425 
4426    This can be represented as a collection of submatrices as
4427 
4428 .vb
4429       A B C
4430       D E F
4431       G H I
4432 .ve
4433 
4434    Where the submatrices A,B,C are owned by proc0, D,E,F are
4435    owned by proc1, G,H,I are owned by proc2.
4436 
4437    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4438    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4439    The 'M','N' parameters are 8,8, and have the same values on all procs.
4440 
4441    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4442    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4443    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4444    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4445    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4446    matrix, ans [DF] as another SeqAIJ matrix.
4447 
4448    When d_nz, o_nz parameters are specified, d_nz storage elements are
4449    allocated for every row of the local diagonal submatrix, and o_nz
4450    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4451    One way to choose d_nz and o_nz is to use the max nonzerors per local
4452    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4453    In this case, the values of d_nz,o_nz are
4454 .vb
4455      proc0 : dnz = 2, o_nz = 2
4456      proc1 : dnz = 3, o_nz = 2
4457      proc2 : dnz = 1, o_nz = 4
4458 .ve
4459    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4460    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4461    for proc3. i.e we are using 12+15+10=37 storage locations to store
4462    34 values.
4463 
4464    When d_nnz, o_nnz parameters are specified, the storage is specified
4465    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4466    In the above case the values for d_nnz,o_nnz are
4467 .vb
4468      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4469      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4470      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4471 .ve
4472    Here the space allocated is sum of all the above values i.e 34, and
4473    hence pre-allocation is perfect.
4474 
4475    Level: intermediate
4476 
4477 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4478           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4479 @*/
4480 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4481 {
4482   PetscMPIInt    size;
4483 
4484   PetscFunctionBegin;
4485   PetscCall(MatCreate(comm,A));
4486   PetscCall(MatSetSizes(*A,m,n,M,N));
4487   PetscCallMPI(MPI_Comm_size(comm,&size));
4488   if (size > 1) {
4489     PetscCall(MatSetType(*A,MATMPIAIJ));
4490     PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz));
4491   } else {
4492     PetscCall(MatSetType(*A,MATSEQAIJ));
4493     PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz));
4494   }
4495   PetscFunctionReturn(0);
4496 }
4497 
4498 /*@C
4499   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4500 
4501   Not collective
4502 
4503   Input Parameter:
4504 . A - The MPIAIJ matrix
4505 
4506   Output Parameters:
4507 + Ad - The local diagonal block as a SeqAIJ matrix
4508 . Ao - The local off-diagonal block as a SeqAIJ matrix
4509 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4510 
4511   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4512   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4513   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4514   local column numbers to global column numbers in the original matrix.
4515 
4516   Level: intermediate
4517 
4518 .seealso: `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4519 @*/
4520 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4521 {
4522   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4523   PetscBool      flg;
4524 
4525   PetscFunctionBegin;
4526   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg));
4527   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4528   if (Ad)     *Ad     = a->A;
4529   if (Ao)     *Ao     = a->B;
4530   if (colmap) *colmap = a->garray;
4531   PetscFunctionReturn(0);
4532 }
4533 
4534 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4535 {
4536   PetscInt       m,N,i,rstart,nnz,Ii;
4537   PetscInt       *indx;
4538   PetscScalar    *values;
4539   MatType        rootType;
4540 
4541   PetscFunctionBegin;
4542   PetscCall(MatGetSize(inmat,&m,&N));
4543   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4544     PetscInt       *dnz,*onz,sum,bs,cbs;
4545 
4546     if (n == PETSC_DECIDE) {
4547       PetscCall(PetscSplitOwnership(comm,&n,&N));
4548     }
4549     /* Check sum(n) = N */
4550     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
4551     PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
4552 
4553     PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm));
4554     rstart -= m;
4555 
4556     MatPreallocateBegin(comm,m,n,dnz,onz);
4557     for (i=0; i<m; i++) {
4558       PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4559       PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz));
4560       PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4561     }
4562 
4563     PetscCall(MatCreate(comm,outmat));
4564     PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4565     PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
4566     PetscCall(MatSetBlockSizes(*outmat,bs,cbs));
4567     PetscCall(MatGetRootType_Private(inmat,&rootType));
4568     PetscCall(MatSetType(*outmat,rootType));
4569     PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz));
4570     PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz));
4571     MatPreallocateEnd(dnz,onz);
4572     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
4573   }
4574 
4575   /* numeric phase */
4576   PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL));
4577   for (i=0; i<m; i++) {
4578     PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4579     Ii   = i + rstart;
4580     PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
4581     PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4582   }
4583   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
4584   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
4585   PetscFunctionReturn(0);
4586 }
4587 
4588 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4589 {
4590   PetscMPIInt       rank;
4591   PetscInt          m,N,i,rstart,nnz;
4592   size_t            len;
4593   const PetscInt    *indx;
4594   PetscViewer       out;
4595   char              *name;
4596   Mat               B;
4597   const PetscScalar *values;
4598 
4599   PetscFunctionBegin;
4600   PetscCall(MatGetLocalSize(A,&m,NULL));
4601   PetscCall(MatGetSize(A,NULL,&N));
4602   /* Should this be the type of the diagonal block of A? */
4603   PetscCall(MatCreate(PETSC_COMM_SELF,&B));
4604   PetscCall(MatSetSizes(B,m,N,m,N));
4605   PetscCall(MatSetBlockSizesFromMats(B,A,A));
4606   PetscCall(MatSetType(B,MATSEQAIJ));
4607   PetscCall(MatSeqAIJSetPreallocation(B,0,NULL));
4608   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
4609   for (i=0; i<m; i++) {
4610     PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values));
4611     PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES));
4612     PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values));
4613   }
4614   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
4615   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
4616 
4617   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
4618   PetscCall(PetscStrlen(outfile,&len));
4619   PetscCall(PetscMalloc1(len+6,&name));
4620   PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank));
4621   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out));
4622   PetscCall(PetscFree(name));
4623   PetscCall(MatView(B,out));
4624   PetscCall(PetscViewerDestroy(&out));
4625   PetscCall(MatDestroy(&B));
4626   PetscFunctionReturn(0);
4627 }
4628 
4629 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4630 {
4631   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4632 
4633   PetscFunctionBegin;
4634   if (!merge) PetscFunctionReturn(0);
4635   PetscCall(PetscFree(merge->id_r));
4636   PetscCall(PetscFree(merge->len_s));
4637   PetscCall(PetscFree(merge->len_r));
4638   PetscCall(PetscFree(merge->bi));
4639   PetscCall(PetscFree(merge->bj));
4640   PetscCall(PetscFree(merge->buf_ri[0]));
4641   PetscCall(PetscFree(merge->buf_ri));
4642   PetscCall(PetscFree(merge->buf_rj[0]));
4643   PetscCall(PetscFree(merge->buf_rj));
4644   PetscCall(PetscFree(merge->coi));
4645   PetscCall(PetscFree(merge->coj));
4646   PetscCall(PetscFree(merge->owners_co));
4647   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4648   PetscCall(PetscFree(merge));
4649   PetscFunctionReturn(0);
4650 }
4651 
4652 #include <../src/mat/utils/freespace.h>
4653 #include <petscbt.h>
4654 
4655 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4656 {
4657   MPI_Comm            comm;
4658   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4659   PetscMPIInt         size,rank,taga,*len_s;
4660   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4661   PetscInt            proc,m;
4662   PetscInt            **buf_ri,**buf_rj;
4663   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4664   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4665   MPI_Request         *s_waits,*r_waits;
4666   MPI_Status          *status;
4667   const MatScalar     *aa,*a_a;
4668   MatScalar           **abuf_r,*ba_i;
4669   Mat_Merge_SeqsToMPI *merge;
4670   PetscContainer      container;
4671 
4672   PetscFunctionBegin;
4673   PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm));
4674   PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0));
4675 
4676   PetscCallMPI(MPI_Comm_size(comm,&size));
4677   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4678 
4679   PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container));
4680   PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4681   PetscCall(PetscContainerGetPointer(container,(void**)&merge));
4682   PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a));
4683   aa   = a_a;
4684 
4685   bi     = merge->bi;
4686   bj     = merge->bj;
4687   buf_ri = merge->buf_ri;
4688   buf_rj = merge->buf_rj;
4689 
4690   PetscCall(PetscMalloc1(size,&status));
4691   owners = merge->rowmap->range;
4692   len_s  = merge->len_s;
4693 
4694   /* send and recv matrix values */
4695   /*-----------------------------*/
4696   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga));
4697   PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits));
4698 
4699   PetscCall(PetscMalloc1(merge->nsend+1,&s_waits));
4700   for (proc=0,k=0; proc<size; proc++) {
4701     if (!len_s[proc]) continue;
4702     i    = owners[proc];
4703     PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k));
4704     k++;
4705   }
4706 
4707   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status));
4708   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status));
4709   PetscCall(PetscFree(status));
4710 
4711   PetscCall(PetscFree(s_waits));
4712   PetscCall(PetscFree(r_waits));
4713 
4714   /* insert mat values of mpimat */
4715   /*----------------------------*/
4716   PetscCall(PetscMalloc1(N,&ba_i));
4717   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4718 
4719   for (k=0; k<merge->nrecv; k++) {
4720     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4721     nrows       = *(buf_ri_k[k]);
4722     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4723     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4724   }
4725 
4726   /* set values of ba */
4727   m    = merge->rowmap->n;
4728   for (i=0; i<m; i++) {
4729     arow = owners[rank] + i;
4730     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4731     bnzi = bi[i+1] - bi[i];
4732     PetscCall(PetscArrayzero(ba_i,bnzi));
4733 
4734     /* add local non-zero vals of this proc's seqmat into ba */
4735     anzi   = ai[arow+1] - ai[arow];
4736     aj     = a->j + ai[arow];
4737     aa     = a_a + ai[arow];
4738     nextaj = 0;
4739     for (j=0; nextaj<anzi; j++) {
4740       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4741         ba_i[j] += aa[nextaj++];
4742       }
4743     }
4744 
4745     /* add received vals into ba */
4746     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4747       /* i-th row */
4748       if (i == *nextrow[k]) {
4749         anzi   = *(nextai[k]+1) - *nextai[k];
4750         aj     = buf_rj[k] + *(nextai[k]);
4751         aa     = abuf_r[k] + *(nextai[k]);
4752         nextaj = 0;
4753         for (j=0; nextaj<anzi; j++) {
4754           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4755             ba_i[j] += aa[nextaj++];
4756           }
4757         }
4758         nextrow[k]++; nextai[k]++;
4759       }
4760     }
4761     PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES));
4762   }
4763   PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a));
4764   PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY));
4765   PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY));
4766 
4767   PetscCall(PetscFree(abuf_r[0]));
4768   PetscCall(PetscFree(abuf_r));
4769   PetscCall(PetscFree(ba_i));
4770   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4771   PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0));
4772   PetscFunctionReturn(0);
4773 }
4774 
4775 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4776 {
4777   Mat                 B_mpi;
4778   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4779   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4780   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4781   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4782   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4783   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4784   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4785   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4786   MPI_Status          *status;
4787   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4788   PetscBT             lnkbt;
4789   Mat_Merge_SeqsToMPI *merge;
4790   PetscContainer      container;
4791 
4792   PetscFunctionBegin;
4793   PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0));
4794 
4795   /* make sure it is a PETSc comm */
4796   PetscCall(PetscCommDuplicate(comm,&comm,NULL));
4797   PetscCallMPI(MPI_Comm_size(comm,&size));
4798   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4799 
4800   PetscCall(PetscNew(&merge));
4801   PetscCall(PetscMalloc1(size,&status));
4802 
4803   /* determine row ownership */
4804   /*---------------------------------------------------------*/
4805   PetscCall(PetscLayoutCreate(comm,&merge->rowmap));
4806   PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m));
4807   PetscCall(PetscLayoutSetSize(merge->rowmap,M));
4808   PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1));
4809   PetscCall(PetscLayoutSetUp(merge->rowmap));
4810   PetscCall(PetscMalloc1(size,&len_si));
4811   PetscCall(PetscMalloc1(size,&merge->len_s));
4812 
4813   m      = merge->rowmap->n;
4814   owners = merge->rowmap->range;
4815 
4816   /* determine the number of messages to send, their lengths */
4817   /*---------------------------------------------------------*/
4818   len_s = merge->len_s;
4819 
4820   len          = 0; /* length of buf_si[] */
4821   merge->nsend = 0;
4822   for (proc=0; proc<size; proc++) {
4823     len_si[proc] = 0;
4824     if (proc == rank) {
4825       len_s[proc] = 0;
4826     } else {
4827       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4828       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4829     }
4830     if (len_s[proc]) {
4831       merge->nsend++;
4832       nrows = 0;
4833       for (i=owners[proc]; i<owners[proc+1]; i++) {
4834         if (ai[i+1] > ai[i]) nrows++;
4835       }
4836       len_si[proc] = 2*(nrows+1);
4837       len         += len_si[proc];
4838     }
4839   }
4840 
4841   /* determine the number and length of messages to receive for ij-structure */
4842   /*-------------------------------------------------------------------------*/
4843   PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv));
4844   PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri));
4845 
4846   /* post the Irecv of j-structure */
4847   /*-------------------------------*/
4848   PetscCall(PetscCommGetNewTag(comm,&tagj));
4849   PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits));
4850 
4851   /* post the Isend of j-structure */
4852   /*--------------------------------*/
4853   PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits));
4854 
4855   for (proc=0, k=0; proc<size; proc++) {
4856     if (!len_s[proc]) continue;
4857     i    = owners[proc];
4858     PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k));
4859     k++;
4860   }
4861 
4862   /* receives and sends of j-structure are complete */
4863   /*------------------------------------------------*/
4864   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status));
4865   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status));
4866 
4867   /* send and recv i-structure */
4868   /*---------------------------*/
4869   PetscCall(PetscCommGetNewTag(comm,&tagi));
4870   PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits));
4871 
4872   PetscCall(PetscMalloc1(len+1,&buf_s));
4873   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4874   for (proc=0,k=0; proc<size; proc++) {
4875     if (!len_s[proc]) continue;
4876     /* form outgoing message for i-structure:
4877          buf_si[0]:                 nrows to be sent
4878                [1:nrows]:           row index (global)
4879                [nrows+1:2*nrows+1]: i-structure index
4880     */
4881     /*-------------------------------------------*/
4882     nrows       = len_si[proc]/2 - 1;
4883     buf_si_i    = buf_si + nrows+1;
4884     buf_si[0]   = nrows;
4885     buf_si_i[0] = 0;
4886     nrows       = 0;
4887     for (i=owners[proc]; i<owners[proc+1]; i++) {
4888       anzi = ai[i+1] - ai[i];
4889       if (anzi) {
4890         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4891         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4892         nrows++;
4893       }
4894     }
4895     PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k));
4896     k++;
4897     buf_si += len_si[proc];
4898   }
4899 
4900   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status));
4901   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status));
4902 
4903   PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv));
4904   for (i=0; i<merge->nrecv; i++) {
4905     PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));
4906   }
4907 
4908   PetscCall(PetscFree(len_si));
4909   PetscCall(PetscFree(len_ri));
4910   PetscCall(PetscFree(rj_waits));
4911   PetscCall(PetscFree2(si_waits,sj_waits));
4912   PetscCall(PetscFree(ri_waits));
4913   PetscCall(PetscFree(buf_s));
4914   PetscCall(PetscFree(status));
4915 
4916   /* compute a local seq matrix in each processor */
4917   /*----------------------------------------------*/
4918   /* allocate bi array and free space for accumulating nonzero column info */
4919   PetscCall(PetscMalloc1(m+1,&bi));
4920   bi[0] = 0;
4921 
4922   /* create and initialize a linked list */
4923   nlnk = N+1;
4924   PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt));
4925 
4926   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4927   len  = ai[owners[rank+1]] - ai[owners[rank]];
4928   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space));
4929 
4930   current_space = free_space;
4931 
4932   /* determine symbolic info for each local row */
4933   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4934 
4935   for (k=0; k<merge->nrecv; k++) {
4936     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4937     nrows       = *buf_ri_k[k];
4938     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4939     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4940   }
4941 
4942   MatPreallocateBegin(comm,m,n,dnz,onz);
4943   len  = 0;
4944   for (i=0; i<m; i++) {
4945     bnzi = 0;
4946     /* add local non-zero cols of this proc's seqmat into lnk */
4947     arow  = owners[rank] + i;
4948     anzi  = ai[arow+1] - ai[arow];
4949     aj    = a->j + ai[arow];
4950     PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4951     bnzi += nlnk;
4952     /* add received col data into lnk */
4953     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4954       if (i == *nextrow[k]) { /* i-th row */
4955         anzi  = *(nextai[k]+1) - *nextai[k];
4956         aj    = buf_rj[k] + *nextai[k];
4957         PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4958         bnzi += nlnk;
4959         nextrow[k]++; nextai[k]++;
4960       }
4961     }
4962     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4963 
4964     /* if free space is not available, make more free space */
4965     if (current_space->local_remaining<bnzi) {
4966       PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space));
4967       nspacedouble++;
4968     }
4969     /* copy data into free space, then initialize lnk */
4970     PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt));
4971     PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz));
4972 
4973     current_space->array           += bnzi;
4974     current_space->local_used      += bnzi;
4975     current_space->local_remaining -= bnzi;
4976 
4977     bi[i+1] = bi[i] + bnzi;
4978   }
4979 
4980   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4981 
4982   PetscCall(PetscMalloc1(bi[m]+1,&bj));
4983   PetscCall(PetscFreeSpaceContiguous(&free_space,bj));
4984   PetscCall(PetscLLDestroy(lnk,lnkbt));
4985 
4986   /* create symbolic parallel matrix B_mpi */
4987   /*---------------------------------------*/
4988   PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs));
4989   PetscCall(MatCreate(comm,&B_mpi));
4990   if (n==PETSC_DECIDE) {
4991     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N));
4992   } else {
4993     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4994   }
4995   PetscCall(MatSetBlockSizes(B_mpi,bs,cbs));
4996   PetscCall(MatSetType(B_mpi,MATMPIAIJ));
4997   PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz));
4998   MatPreallocateEnd(dnz,onz);
4999   PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE));
5000 
5001   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5002   B_mpi->assembled  = PETSC_FALSE;
5003   merge->bi         = bi;
5004   merge->bj         = bj;
5005   merge->buf_ri     = buf_ri;
5006   merge->buf_rj     = buf_rj;
5007   merge->coi        = NULL;
5008   merge->coj        = NULL;
5009   merge->owners_co  = NULL;
5010 
5011   PetscCall(PetscCommDestroy(&comm));
5012 
5013   /* attach the supporting struct to B_mpi for reuse */
5014   PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container));
5015   PetscCall(PetscContainerSetPointer(container,merge));
5016   PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI));
5017   PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container));
5018   PetscCall(PetscContainerDestroy(&container));
5019   *mpimat = B_mpi;
5020 
5021   PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0));
5022   PetscFunctionReturn(0);
5023 }
5024 
5025 /*@C
5026       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5027                  matrices from each processor
5028 
5029     Collective
5030 
5031    Input Parameters:
5032 +    comm - the communicators the parallel matrix will live on
5033 .    seqmat - the input sequential matrices
5034 .    m - number of local rows (or PETSC_DECIDE)
5035 .    n - number of local columns (or PETSC_DECIDE)
5036 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5037 
5038    Output Parameter:
5039 .    mpimat - the parallel matrix generated
5040 
5041     Level: advanced
5042 
5043    Notes:
5044      The dimensions of the sequential matrix in each processor MUST be the same.
5045      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5046      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5047 @*/
5048 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5049 {
5050   PetscMPIInt    size;
5051 
5052   PetscFunctionBegin;
5053   PetscCallMPI(MPI_Comm_size(comm,&size));
5054   if (size == 1) {
5055     PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
5056     if (scall == MAT_INITIAL_MATRIX) {
5057       PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat));
5058     } else {
5059       PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN));
5060     }
5061     PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
5062     PetscFunctionReturn(0);
5063   }
5064   PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
5065   if (scall == MAT_INITIAL_MATRIX) {
5066     PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat));
5067   }
5068   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat));
5069   PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
5070   PetscFunctionReturn(0);
5071 }
5072 
5073 /*@
5074      MatAIJGetLocalMat - Creates a SeqAIJ from a MATAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5075           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5076           with MatGetSize()
5077 
5078     Not Collective
5079 
5080    Input Parameters:
5081 +    A - the matrix
5082 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5083 
5084    Output Parameter:
5085 .    A_loc - the local sequential matrix generated
5086 
5087     Level: developer
5088 
5089    Notes:
5090      In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix.
5091 
5092      Destroy the matrix with MatDestroy()
5093 
5094 .seealso: MatMPIAIJGetLocalMat()
5095 
5096 @*/
5097 PetscErrorCode MatAIJGetLocalMat(Mat A,Mat *A_loc)
5098 {
5099   PetscBool      mpi;
5100 
5101   PetscFunctionBegin;
5102   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&mpi));
5103   if (mpi) {
5104     PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,A_loc));
5105   } else {
5106     *A_loc = A;
5107     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5108   }
5109   PetscFunctionReturn(0);
5110 }
5111 
5112 /*@
5113      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5114           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5115           with MatGetSize()
5116 
5117     Not Collective
5118 
5119    Input Parameters:
5120 +    A - the matrix
5121 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5122 
5123    Output Parameter:
5124 .    A_loc - the local sequential matrix generated
5125 
5126     Level: developer
5127 
5128    Notes:
5129      In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix.
5130 
5131      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5132      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5133      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5134      modify the values of the returned A_loc.
5135 
5136 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5137 @*/
5138 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5139 {
5140   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5141   Mat_SeqAIJ        *mat,*a,*b;
5142   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5143   const PetscScalar *aa,*ba,*aav,*bav;
5144   PetscScalar       *ca,*cam;
5145   PetscMPIInt       size;
5146   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5147   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5148   PetscBool         match;
5149 
5150   PetscFunctionBegin;
5151   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match));
5152   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5153   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5154   if (size == 1) {
5155     if (scall == MAT_INITIAL_MATRIX) {
5156       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5157       *A_loc = mpimat->A;
5158     } else if (scall == MAT_REUSE_MATRIX) {
5159       PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN));
5160     }
5161     PetscFunctionReturn(0);
5162   }
5163 
5164   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5165   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5166   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5167   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5168   PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav));
5169   PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav));
5170   aa   = aav;
5171   ba   = bav;
5172   if (scall == MAT_INITIAL_MATRIX) {
5173     PetscCall(PetscMalloc1(1+am,&ci));
5174     ci[0] = 0;
5175     for (i=0; i<am; i++) {
5176       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5177     }
5178     PetscCall(PetscMalloc1(1+ci[am],&cj));
5179     PetscCall(PetscMalloc1(1+ci[am],&ca));
5180     k    = 0;
5181     for (i=0; i<am; i++) {
5182       ncols_o = bi[i+1] - bi[i];
5183       ncols_d = ai[i+1] - ai[i];
5184       /* off-diagonal portion of A */
5185       for (jo=0; jo<ncols_o; jo++) {
5186         col = cmap[*bj];
5187         if (col >= cstart) break;
5188         cj[k]   = col; bj++;
5189         ca[k++] = *ba++;
5190       }
5191       /* diagonal portion of A */
5192       for (j=0; j<ncols_d; j++) {
5193         cj[k]   = cstart + *aj++;
5194         ca[k++] = *aa++;
5195       }
5196       /* off-diagonal portion of A */
5197       for (j=jo; j<ncols_o; j++) {
5198         cj[k]   = cmap[*bj++];
5199         ca[k++] = *ba++;
5200       }
5201     }
5202     /* put together the new matrix */
5203     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc));
5204     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5205     /* Since these are PETSc arrays, change flags to free them as necessary. */
5206     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5207     mat->free_a  = PETSC_TRUE;
5208     mat->free_ij = PETSC_TRUE;
5209     mat->nonew   = 0;
5210   } else if (scall == MAT_REUSE_MATRIX) {
5211     mat  =(Mat_SeqAIJ*)(*A_loc)->data;
5212     ci   = mat->i;
5213     cj   = mat->j;
5214     PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam));
5215     for (i=0; i<am; i++) {
5216       /* off-diagonal portion of A */
5217       ncols_o = bi[i+1] - bi[i];
5218       for (jo=0; jo<ncols_o; jo++) {
5219         col = cmap[*bj];
5220         if (col >= cstart) break;
5221         *cam++ = *ba++; bj++;
5222       }
5223       /* diagonal portion of A */
5224       ncols_d = ai[i+1] - ai[i];
5225       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5226       /* off-diagonal portion of A */
5227       for (j=jo; j<ncols_o; j++) {
5228         *cam++ = *ba++; bj++;
5229       }
5230     }
5231     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam));
5232   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5233   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav));
5234   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav));
5235   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5236   PetscFunctionReturn(0);
5237 }
5238 
5239 /*@
5240      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5241           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5242 
5243     Not Collective
5244 
5245    Input Parameters:
5246 +    A - the matrix
5247 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5248 
5249    Output Parameters:
5250 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5251 -    A_loc - the local sequential matrix generated
5252 
5253     Level: developer
5254 
5255    Notes:
5256      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5257 
5258 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5259 
5260 @*/
5261 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5262 {
5263   Mat            Ao,Ad;
5264   const PetscInt *cmap;
5265   PetscMPIInt    size;
5266   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5267 
5268   PetscFunctionBegin;
5269   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap));
5270   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5271   if (size == 1) {
5272     if (scall == MAT_INITIAL_MATRIX) {
5273       PetscCall(PetscObjectReference((PetscObject)Ad));
5274       *A_loc = Ad;
5275     } else if (scall == MAT_REUSE_MATRIX) {
5276       PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN));
5277     }
5278     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob));
5279     PetscFunctionReturn(0);
5280   }
5281   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f));
5282   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5283   if (f) {
5284     PetscCall((*f)(A,scall,glob,A_loc));
5285   } else {
5286     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5287     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5288     Mat_SeqAIJ        *c;
5289     PetscInt          *ai = a->i, *aj = a->j;
5290     PetscInt          *bi = b->i, *bj = b->j;
5291     PetscInt          *ci,*cj;
5292     const PetscScalar *aa,*ba;
5293     PetscScalar       *ca;
5294     PetscInt          i,j,am,dn,on;
5295 
5296     PetscCall(MatGetLocalSize(Ad,&am,&dn));
5297     PetscCall(MatGetLocalSize(Ao,NULL,&on));
5298     PetscCall(MatSeqAIJGetArrayRead(Ad,&aa));
5299     PetscCall(MatSeqAIJGetArrayRead(Ao,&ba));
5300     if (scall == MAT_INITIAL_MATRIX) {
5301       PetscInt k;
5302       PetscCall(PetscMalloc1(1+am,&ci));
5303       PetscCall(PetscMalloc1(ai[am]+bi[am],&cj));
5304       PetscCall(PetscMalloc1(ai[am]+bi[am],&ca));
5305       ci[0] = 0;
5306       for (i=0,k=0; i<am; i++) {
5307         const PetscInt ncols_o = bi[i+1] - bi[i];
5308         const PetscInt ncols_d = ai[i+1] - ai[i];
5309         ci[i+1] = ci[i] + ncols_o + ncols_d;
5310         /* diagonal portion of A */
5311         for (j=0; j<ncols_d; j++,k++) {
5312           cj[k] = *aj++;
5313           ca[k] = *aa++;
5314         }
5315         /* off-diagonal portion of A */
5316         for (j=0; j<ncols_o; j++,k++) {
5317           cj[k] = dn + *bj++;
5318           ca[k] = *ba++;
5319         }
5320       }
5321       /* put together the new matrix */
5322       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc));
5323       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5324       /* Since these are PETSc arrays, change flags to free them as necessary. */
5325       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5326       c->free_a  = PETSC_TRUE;
5327       c->free_ij = PETSC_TRUE;
5328       c->nonew   = 0;
5329       PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name));
5330     } else if (scall == MAT_REUSE_MATRIX) {
5331       PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca));
5332       for (i=0; i<am; i++) {
5333         const PetscInt ncols_d = ai[i+1] - ai[i];
5334         const PetscInt ncols_o = bi[i+1] - bi[i];
5335         /* diagonal portion of A */
5336         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5337         /* off-diagonal portion of A */
5338         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5339       }
5340       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca));
5341     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5342     PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa));
5343     PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa));
5344     if (glob) {
5345       PetscInt cst, *gidx;
5346 
5347       PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL));
5348       PetscCall(PetscMalloc1(dn+on,&gidx));
5349       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5350       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5351       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob));
5352     }
5353   }
5354   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5355   PetscFunctionReturn(0);
5356 }
5357 
5358 /*@C
5359      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5360 
5361     Not Collective
5362 
5363    Input Parameters:
5364 +    A - the matrix
5365 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5366 -    row, col - index sets of rows and columns to extract (or NULL)
5367 
5368    Output Parameter:
5369 .    A_loc - the local sequential matrix generated
5370 
5371     Level: developer
5372 
5373 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5374 
5375 @*/
5376 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5377 {
5378   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5379   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5380   IS             isrowa,iscola;
5381   Mat            *aloc;
5382   PetscBool      match;
5383 
5384   PetscFunctionBegin;
5385   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match));
5386   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5387   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0));
5388   if (!row) {
5389     start = A->rmap->rstart; end = A->rmap->rend;
5390     PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa));
5391   } else {
5392     isrowa = *row;
5393   }
5394   if (!col) {
5395     start = A->cmap->rstart;
5396     cmap  = a->garray;
5397     nzA   = a->A->cmap->n;
5398     nzB   = a->B->cmap->n;
5399     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5400     ncols = 0;
5401     for (i=0; i<nzB; i++) {
5402       if (cmap[i] < start) idx[ncols++] = cmap[i];
5403       else break;
5404     }
5405     imark = i;
5406     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5407     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5408     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola));
5409   } else {
5410     iscola = *col;
5411   }
5412   if (scall != MAT_INITIAL_MATRIX) {
5413     PetscCall(PetscMalloc1(1,&aloc));
5414     aloc[0] = *A_loc;
5415   }
5416   PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc));
5417   if (!col) { /* attach global id of condensed columns */
5418     PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola));
5419   }
5420   *A_loc = aloc[0];
5421   PetscCall(PetscFree(aloc));
5422   if (!row) {
5423     PetscCall(ISDestroy(&isrowa));
5424   }
5425   if (!col) {
5426     PetscCall(ISDestroy(&iscola));
5427   }
5428   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0));
5429   PetscFunctionReturn(0);
5430 }
5431 
5432 /*
5433  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5434  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5435  * on a global size.
5436  * */
5437 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5438 {
5439   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5440   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5441   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5442   PetscMPIInt              owner;
5443   PetscSFNode              *iremote,*oiremote;
5444   const PetscInt           *lrowindices;
5445   PetscSF                  sf,osf;
5446   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5447   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5448   MPI_Comm                 comm;
5449   ISLocalToGlobalMapping   mapping;
5450   const PetscScalar        *pd_a,*po_a;
5451 
5452   PetscFunctionBegin;
5453   PetscCall(PetscObjectGetComm((PetscObject)P,&comm));
5454   /* plocalsize is the number of roots
5455    * nrows is the number of leaves
5456    * */
5457   PetscCall(MatGetLocalSize(P,&plocalsize,NULL));
5458   PetscCall(ISGetLocalSize(rows,&nrows));
5459   PetscCall(PetscCalloc1(nrows,&iremote));
5460   PetscCall(ISGetIndices(rows,&lrowindices));
5461   for (i=0;i<nrows;i++) {
5462     /* Find a remote index and an owner for a row
5463      * The row could be local or remote
5464      * */
5465     owner = 0;
5466     lidx  = 0;
5467     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx));
5468     iremote[i].index = lidx;
5469     iremote[i].rank  = owner;
5470   }
5471   /* Create SF to communicate how many nonzero columns for each row */
5472   PetscCall(PetscSFCreate(comm,&sf));
5473   /* SF will figure out the number of nonzero colunms for each row, and their
5474    * offsets
5475    * */
5476   PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5477   PetscCall(PetscSFSetFromOptions(sf));
5478   PetscCall(PetscSFSetUp(sf));
5479 
5480   PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets));
5481   PetscCall(PetscCalloc1(2*plocalsize,&nrcols));
5482   PetscCall(PetscCalloc1(nrows,&pnnz));
5483   roffsets[0] = 0;
5484   roffsets[1] = 0;
5485   for (i=0;i<plocalsize;i++) {
5486     /* diag */
5487     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5488     /* off diag */
5489     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5490     /* compute offsets so that we relative location for each row */
5491     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5492     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5493   }
5494   PetscCall(PetscCalloc1(2*nrows,&nlcols));
5495   PetscCall(PetscCalloc1(2*nrows,&loffsets));
5496   /* 'r' means root, and 'l' means leaf */
5497   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5498   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5499   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5500   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5501   PetscCall(PetscSFDestroy(&sf));
5502   PetscCall(PetscFree(roffsets));
5503   PetscCall(PetscFree(nrcols));
5504   dntotalcols = 0;
5505   ontotalcols = 0;
5506   ncol = 0;
5507   for (i=0;i<nrows;i++) {
5508     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5509     ncol = PetscMax(pnnz[i],ncol);
5510     /* diag */
5511     dntotalcols += nlcols[i*2+0];
5512     /* off diag */
5513     ontotalcols += nlcols[i*2+1];
5514   }
5515   /* We do not need to figure the right number of columns
5516    * since all the calculations will be done by going through the raw data
5517    * */
5518   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth));
5519   PetscCall(MatSetUp(*P_oth));
5520   PetscCall(PetscFree(pnnz));
5521   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5522   /* diag */
5523   PetscCall(PetscCalloc1(dntotalcols,&iremote));
5524   /* off diag */
5525   PetscCall(PetscCalloc1(ontotalcols,&oiremote));
5526   /* diag */
5527   PetscCall(PetscCalloc1(dntotalcols,&ilocal));
5528   /* off diag */
5529   PetscCall(PetscCalloc1(ontotalcols,&oilocal));
5530   dntotalcols = 0;
5531   ontotalcols = 0;
5532   ntotalcols  = 0;
5533   for (i=0;i<nrows;i++) {
5534     owner = 0;
5535     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL));
5536     /* Set iremote for diag matrix */
5537     for (j=0;j<nlcols[i*2+0];j++) {
5538       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5539       iremote[dntotalcols].rank    = owner;
5540       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5541       ilocal[dntotalcols++]        = ntotalcols++;
5542     }
5543     /* off diag */
5544     for (j=0;j<nlcols[i*2+1];j++) {
5545       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5546       oiremote[ontotalcols].rank    = owner;
5547       oilocal[ontotalcols++]        = ntotalcols++;
5548     }
5549   }
5550   PetscCall(ISRestoreIndices(rows,&lrowindices));
5551   PetscCall(PetscFree(loffsets));
5552   PetscCall(PetscFree(nlcols));
5553   PetscCall(PetscSFCreate(comm,&sf));
5554   /* P serves as roots and P_oth is leaves
5555    * Diag matrix
5556    * */
5557   PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5558   PetscCall(PetscSFSetFromOptions(sf));
5559   PetscCall(PetscSFSetUp(sf));
5560 
5561   PetscCall(PetscSFCreate(comm,&osf));
5562   /* Off diag */
5563   PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER));
5564   PetscCall(PetscSFSetFromOptions(osf));
5565   PetscCall(PetscSFSetUp(osf));
5566   PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5567   PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5568   /* We operate on the matrix internal data for saving memory */
5569   PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5570   PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5571   PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL));
5572   /* Convert to global indices for diag matrix */
5573   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5574   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5575   /* We want P_oth store global indices */
5576   PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping));
5577   /* Use memory scalable approach */
5578   PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH));
5579   PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j));
5580   PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5581   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5582   /* Convert back to local indices */
5583   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5584   PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5585   nout = 0;
5586   PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j));
5587   PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout);
5588   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5589   /* Exchange values */
5590   PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5591   PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5592   PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5593   PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5594   /* Stop PETSc from shrinking memory */
5595   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5596   PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY));
5597   PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY));
5598   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5599   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf));
5600   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf));
5601   PetscCall(PetscSFDestroy(&sf));
5602   PetscCall(PetscSFDestroy(&osf));
5603   PetscFunctionReturn(0);
5604 }
5605 
5606 /*
5607  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5608  * This supports MPIAIJ and MAIJ
5609  * */
5610 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5611 {
5612   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5613   Mat_SeqAIJ            *p_oth;
5614   IS                    rows,map;
5615   PetscHMapI            hamp;
5616   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5617   MPI_Comm              comm;
5618   PetscSF               sf,osf;
5619   PetscBool             has;
5620 
5621   PetscFunctionBegin;
5622   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5623   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0));
5624   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5625    *  and then create a submatrix (that often is an overlapping matrix)
5626    * */
5627   if (reuse == MAT_INITIAL_MATRIX) {
5628     /* Use a hash table to figure out unique keys */
5629     PetscCall(PetscHMapICreate(&hamp));
5630     PetscCall(PetscHMapIResize(hamp,a->B->cmap->n));
5631     PetscCall(PetscCalloc1(a->B->cmap->n,&mapping));
5632     count = 0;
5633     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5634     for (i=0;i<a->B->cmap->n;i++) {
5635       key  = a->garray[i]/dof;
5636       PetscCall(PetscHMapIHas(hamp,key,&has));
5637       if (!has) {
5638         mapping[i] = count;
5639         PetscCall(PetscHMapISet(hamp,key,count++));
5640       } else {
5641         /* Current 'i' has the same value the previous step */
5642         mapping[i] = count-1;
5643       }
5644     }
5645     PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map));
5646     PetscCall(PetscHMapIGetSize(hamp,&htsize));
5647     PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count);
5648     PetscCall(PetscCalloc1(htsize,&rowindices));
5649     off = 0;
5650     PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices));
5651     PetscCall(PetscHMapIDestroy(&hamp));
5652     PetscCall(PetscSortInt(htsize,rowindices));
5653     PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows));
5654     /* In case, the matrix was already created but users want to recreate the matrix */
5655     PetscCall(MatDestroy(P_oth));
5656     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth));
5657     PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map));
5658     PetscCall(ISDestroy(&map));
5659     PetscCall(ISDestroy(&rows));
5660   } else if (reuse == MAT_REUSE_MATRIX) {
5661     /* If matrix was already created, we simply update values using SF objects
5662      * that as attached to the matrix ealier.
5663      */
5664     const PetscScalar *pd_a,*po_a;
5665 
5666     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf));
5667     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf));
5668     PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5669     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5670     /* Update values in place */
5671     PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5672     PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5673     PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5674     PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5675     PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5676     PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5677     PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5678     PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5679   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5680   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0));
5681   PetscFunctionReturn(0);
5682 }
5683 
5684 /*@C
5685   MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5686 
5687   Collective on Mat
5688 
5689   Input Parameters:
5690 + A - the first matrix in mpiaij format
5691 . B - the second matrix in mpiaij format
5692 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5693 
5694   Output Parameters:
5695 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5696 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5697 - B_seq - the sequential matrix generated
5698 
5699   Level: developer
5700 
5701 @*/
5702 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5703 {
5704   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5705   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5706   IS             isrowb,iscolb;
5707   Mat            *bseq=NULL;
5708 
5709   PetscFunctionBegin;
5710   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5711     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5712   }
5713   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0));
5714 
5715   if (scall == MAT_INITIAL_MATRIX) {
5716     start = A->cmap->rstart;
5717     cmap  = a->garray;
5718     nzA   = a->A->cmap->n;
5719     nzB   = a->B->cmap->n;
5720     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5721     ncols = 0;
5722     for (i=0; i<nzB; i++) {  /* row < local row index */
5723       if (cmap[i] < start) idx[ncols++] = cmap[i];
5724       else break;
5725     }
5726     imark = i;
5727     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5728     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5729     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb));
5730     PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb));
5731   } else {
5732     PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5733     isrowb  = *rowb; iscolb = *colb;
5734     PetscCall(PetscMalloc1(1,&bseq));
5735     bseq[0] = *B_seq;
5736   }
5737   PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq));
5738   *B_seq = bseq[0];
5739   PetscCall(PetscFree(bseq));
5740   if (!rowb) {
5741     PetscCall(ISDestroy(&isrowb));
5742   } else {
5743     *rowb = isrowb;
5744   }
5745   if (!colb) {
5746     PetscCall(ISDestroy(&iscolb));
5747   } else {
5748     *colb = iscolb;
5749   }
5750   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0));
5751   PetscFunctionReturn(0);
5752 }
5753 
5754 /*
5755     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5756     of the OFF-DIAGONAL portion of local A
5757 
5758     Collective on Mat
5759 
5760    Input Parameters:
5761 +    A,B - the matrices in mpiaij format
5762 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5763 
5764    Output Parameter:
5765 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5766 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5767 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5768 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5769 
5770     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5771      for this matrix. This is not desirable..
5772 
5773     Level: developer
5774 
5775 */
5776 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5777 {
5778   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5779   Mat_SeqAIJ             *b_oth;
5780   VecScatter             ctx;
5781   MPI_Comm               comm;
5782   const PetscMPIInt      *rprocs,*sprocs;
5783   const PetscInt         *srow,*rstarts,*sstarts;
5784   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5785   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5786   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5787   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5788   PetscMPIInt            size,tag,rank,nreqs;
5789 
5790   PetscFunctionBegin;
5791   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5792   PetscCallMPI(MPI_Comm_size(comm,&size));
5793 
5794   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5795     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5796   }
5797   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0));
5798   PetscCallMPI(MPI_Comm_rank(comm,&rank));
5799 
5800   if (size == 1) {
5801     startsj_s = NULL;
5802     bufa_ptr  = NULL;
5803     *B_oth    = NULL;
5804     PetscFunctionReturn(0);
5805   }
5806 
5807   ctx = a->Mvctx;
5808   tag = ((PetscObject)ctx)->tag;
5809 
5810   PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs));
5811   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5812   PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs));
5813   PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs));
5814   PetscCall(PetscMalloc1(nreqs,&reqs));
5815   rwaits = reqs;
5816   swaits = reqs + nrecvs;
5817 
5818   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5819   if (scall == MAT_INITIAL_MATRIX) {
5820     /* i-array */
5821     /*---------*/
5822     /*  post receives */
5823     if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */
5824     for (i=0; i<nrecvs; i++) {
5825       rowlen = rvalues + rstarts[i]*rbs;
5826       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5827       PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5828     }
5829 
5830     /* pack the outgoing message */
5831     PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj));
5832 
5833     sstartsj[0] = 0;
5834     rstartsj[0] = 0;
5835     len         = 0; /* total length of j or a array to be sent */
5836     if (nsends) {
5837       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5838       PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues));
5839     }
5840     for (i=0; i<nsends; i++) {
5841       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5842       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5843       for (j=0; j<nrows; j++) {
5844         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5845         for (l=0; l<sbs; l++) {
5846           PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */
5847 
5848           rowlen[j*sbs+l] = ncols;
5849 
5850           len += ncols;
5851           PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL));
5852         }
5853         k++;
5854       }
5855       PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i));
5856 
5857       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5858     }
5859     /* recvs and sends of i-array are completed */
5860     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5861     PetscCall(PetscFree(svalues));
5862 
5863     /* allocate buffers for sending j and a arrays */
5864     PetscCall(PetscMalloc1(len+1,&bufj));
5865     PetscCall(PetscMalloc1(len+1,&bufa));
5866 
5867     /* create i-array of B_oth */
5868     PetscCall(PetscMalloc1(aBn+2,&b_othi));
5869 
5870     b_othi[0] = 0;
5871     len       = 0; /* total length of j or a array to be received */
5872     k         = 0;
5873     for (i=0; i<nrecvs; i++) {
5874       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5875       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5876       for (j=0; j<nrows; j++) {
5877         b_othi[k+1] = b_othi[k] + rowlen[j];
5878         PetscCall(PetscIntSumError(rowlen[j],len,&len));
5879         k++;
5880       }
5881       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5882     }
5883     PetscCall(PetscFree(rvalues));
5884 
5885     /* allocate space for j and a arrays of B_oth */
5886     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj));
5887     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha));
5888 
5889     /* j-array */
5890     /*---------*/
5891     /*  post receives of j-array */
5892     for (i=0; i<nrecvs; i++) {
5893       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5894       PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5895     }
5896 
5897     /* pack the outgoing message j-array */
5898     if (nsends) k = sstarts[0];
5899     for (i=0; i<nsends; i++) {
5900       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5901       bufJ  = bufj+sstartsj[i];
5902       for (j=0; j<nrows; j++) {
5903         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5904         for (ll=0; ll<sbs; ll++) {
5905           PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5906           for (l=0; l<ncols; l++) {
5907             *bufJ++ = cols[l];
5908           }
5909           PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5910         }
5911       }
5912       PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i));
5913     }
5914 
5915     /* recvs and sends of j-array are completed */
5916     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5917   } else if (scall == MAT_REUSE_MATRIX) {
5918     sstartsj = *startsj_s;
5919     rstartsj = *startsj_r;
5920     bufa     = *bufa_ptr;
5921     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5922     PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha));
5923   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5924 
5925   /* a-array */
5926   /*---------*/
5927   /*  post receives of a-array */
5928   for (i=0; i<nrecvs; i++) {
5929     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5930     PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i));
5931   }
5932 
5933   /* pack the outgoing message a-array */
5934   if (nsends) k = sstarts[0];
5935   for (i=0; i<nsends; i++) {
5936     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5937     bufA  = bufa+sstartsj[i];
5938     for (j=0; j<nrows; j++) {
5939       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5940       for (ll=0; ll<sbs; ll++) {
5941         PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5942         for (l=0; l<ncols; l++) {
5943           *bufA++ = vals[l];
5944         }
5945         PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5946       }
5947     }
5948     PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i));
5949   }
5950   /* recvs and sends of a-array are completed */
5951   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5952   PetscCall(PetscFree(reqs));
5953 
5954   if (scall == MAT_INITIAL_MATRIX) {
5955     /* put together the new matrix */
5956     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth));
5957 
5958     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5959     /* Since these are PETSc arrays, change flags to free them as necessary. */
5960     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5961     b_oth->free_a  = PETSC_TRUE;
5962     b_oth->free_ij = PETSC_TRUE;
5963     b_oth->nonew   = 0;
5964 
5965     PetscCall(PetscFree(bufj));
5966     if (!startsj_s || !bufa_ptr) {
5967       PetscCall(PetscFree2(sstartsj,rstartsj));
5968       PetscCall(PetscFree(bufa_ptr));
5969     } else {
5970       *startsj_s = sstartsj;
5971       *startsj_r = rstartsj;
5972       *bufa_ptr  = bufa;
5973     }
5974   } else if (scall == MAT_REUSE_MATRIX) {
5975     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha));
5976   }
5977 
5978   PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs));
5979   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs));
5980   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0));
5981   PetscFunctionReturn(0);
5982 }
5983 
5984 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5985 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5986 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5987 #if defined(PETSC_HAVE_MKL_SPARSE)
5988 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5989 #endif
5990 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5991 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5992 #if defined(PETSC_HAVE_ELEMENTAL)
5993 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5994 #endif
5995 #if defined(PETSC_HAVE_SCALAPACK)
5996 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5997 #endif
5998 #if defined(PETSC_HAVE_HYPRE)
5999 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
6000 #endif
6001 #if defined(PETSC_HAVE_CUDA)
6002 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
6003 #endif
6004 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6005 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
6006 #endif
6007 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
6008 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
6009 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6010 
6011 /*
6012     Computes (B'*A')' since computing B*A directly is untenable
6013 
6014                n                       p                          p
6015         [             ]       [             ]         [                 ]
6016       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6017         [             ]       [             ]         [                 ]
6018 
6019 */
6020 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
6021 {
6022   Mat            At,Bt,Ct;
6023 
6024   PetscFunctionBegin;
6025   PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At));
6026   PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt));
6027   PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct));
6028   PetscCall(MatDestroy(&At));
6029   PetscCall(MatDestroy(&Bt));
6030   PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C));
6031   PetscCall(MatDestroy(&Ct));
6032   PetscFunctionReturn(0);
6033 }
6034 
6035 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
6036 {
6037   PetscBool      cisdense;
6038 
6039   PetscFunctionBegin;
6040   PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n);
6041   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N));
6042   PetscCall(MatSetBlockSizesFromMats(C,A,B));
6043   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
6044   if (!cisdense) {
6045     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
6046   }
6047   PetscCall(MatSetUp(C));
6048 
6049   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6050   PetscFunctionReturn(0);
6051 }
6052 
6053 /* ----------------------------------------------------------------*/
6054 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6055 {
6056   Mat_Product *product = C->product;
6057   Mat         A = product->A,B=product->B;
6058 
6059   PetscFunctionBegin;
6060   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6061     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6062 
6063   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6064   C->ops->productsymbolic = MatProductSymbolic_AB;
6065   PetscFunctionReturn(0);
6066 }
6067 
6068 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6069 {
6070   Mat_Product    *product = C->product;
6071 
6072   PetscFunctionBegin;
6073   if (product->type == MATPRODUCT_AB) {
6074     PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6075   }
6076   PetscFunctionReturn(0);
6077 }
6078 
6079 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6080 
6081   Input Parameters:
6082 
6083     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
6084     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
6085 
6086     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6087 
6088     For Set1, j1[] contains column indices of the nonzeros.
6089     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6090     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6091     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6092 
6093     Similar for Set2.
6094 
6095     This routine merges the two sets of nonzeros row by row and removes repeats.
6096 
6097   Output Parameters: (memory is allocated by the caller)
6098 
6099     i[],j[]: the CSR of the merged matrix, which has m rows.
6100     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6101     imap2[]: similar to imap1[], but for Set2.
6102     Note we order nonzeros row-by-row and from left to right.
6103 */
6104 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[],
6105   const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[],
6106   PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[])
6107 {
6108   PetscInt       r,m; /* Row index of mat */
6109   PetscCount     t,t1,t2,b1,e1,b2,e2;
6110 
6111   PetscFunctionBegin;
6112   PetscCall(MatGetLocalSize(mat,&m,NULL));
6113   t1   = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6114   i[0] = 0;
6115   for (r=0; r<m; r++) { /* Do row by row merging */
6116     b1   = rowBegin1[r];
6117     e1   = rowEnd1[r];
6118     b2   = rowBegin2[r];
6119     e2   = rowEnd2[r];
6120     while (b1 < e1 && b2 < e2) {
6121       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6122         j[t]      = j1[b1];
6123         imap1[t1] = t;
6124         imap2[t2] = t;
6125         b1       += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */
6126         b2       += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6127         t1++; t2++; t++;
6128       } else if (j1[b1] < j2[b2]) {
6129         j[t]      = j1[b1];
6130         imap1[t1] = t;
6131         b1       += jmap1[t1+1] - jmap1[t1];
6132         t1++; t++;
6133       } else {
6134         j[t]      = j2[b2];
6135         imap2[t2] = t;
6136         b2       += jmap2[t2+1] - jmap2[t2];
6137         t2++; t++;
6138       }
6139     }
6140     /* Merge the remaining in either j1[] or j2[] */
6141     while (b1 < e1) {
6142       j[t]      = j1[b1];
6143       imap1[t1] = t;
6144       b1       += jmap1[t1+1] - jmap1[t1];
6145       t1++; t++;
6146     }
6147     while (b2 < e2) {
6148       j[t]      = j2[b2];
6149       imap2[t2] = t;
6150       b2       += jmap2[t2+1] - jmap2[t2];
6151       t2++; t++;
6152     }
6153     i[r+1] = t;
6154   }
6155   PetscFunctionReturn(0);
6156 }
6157 
6158 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6159 
6160   Input Parameters:
6161     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6162     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6163       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6164 
6165       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6166       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6167 
6168   Output Parameters:
6169     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6170     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6171       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6172       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6173 
6174     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6175       Atot: number of entries belonging to the diagonal block.
6176       Annz: number of unique nonzeros belonging to the diagonal block.
6177       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6178         repeats (i.e., same 'i,j' pair).
6179       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6180         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6181 
6182       Atot: number of entries belonging to the diagonal block
6183       Annz: number of unique nonzeros belonging to the diagonal block.
6184 
6185     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6186 
6187     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6188 */
6189 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[],
6190   PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[],
6191   PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_,
6192   PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_)
6193 {
6194   PetscInt          cstart,cend,rstart,rend,row,col;
6195   PetscCount        Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6196   PetscCount        Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6197   PetscCount        k,m,p,q,r,s,mid;
6198   PetscCount        *Aperm,*Bperm,*Ajmap,*Bjmap;
6199 
6200   PetscFunctionBegin;
6201   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6202   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6203   m    = rend - rstart;
6204 
6205   for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */
6206 
6207   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6208      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6209   */
6210   while (k<n) {
6211     row = i[k];
6212     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6213     for (s=k; s<n; s++) if (i[s] != row) break;
6214     for (p=k; p<s; p++) {
6215       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6216       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]);
6217     }
6218     PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k));
6219     PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6220     rowBegin[row-rstart] = k;
6221     rowMid[row-rstart]   = mid;
6222     rowEnd[row-rstart]   = s;
6223 
6224     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6225     Atot += mid - k;
6226     Btot += s - mid;
6227 
6228     /* Count unique nonzeros of this diag/offdiag row */
6229     for (p=k; p<mid;) {
6230       col = j[p];
6231       do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */
6232       Annz++;
6233     }
6234 
6235     for (p=mid; p<s;) {
6236       col = j[p];
6237       do {p++;} while (p<s && j[p] == col);
6238       Bnnz++;
6239     }
6240     k = s;
6241   }
6242 
6243   /* Allocation according to Atot, Btot, Annz, Bnnz */
6244   PetscCall(PetscMalloc1(Atot,&Aperm));
6245   PetscCall(PetscMalloc1(Btot,&Bperm));
6246   PetscCall(PetscMalloc1(Annz+1,&Ajmap));
6247   PetscCall(PetscMalloc1(Bnnz+1,&Bjmap));
6248 
6249   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6250   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6251   for (r=0; r<m; r++) {
6252     k     = rowBegin[r];
6253     mid   = rowMid[r];
6254     s     = rowEnd[r];
6255     PetscCall(PetscArraycpy(Aperm+Atot,perm+k,  mid-k));
6256     PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid));
6257     Atot += mid - k;
6258     Btot += s - mid;
6259 
6260     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6261     for (p=k; p<mid;) {
6262       col = j[p];
6263       q   = p;
6264       do {p++;} while (p<mid && j[p] == col);
6265       Ajmap[Annz+1] = Ajmap[Annz] + (p - q);
6266       Annz++;
6267     }
6268 
6269     for (p=mid; p<s;) {
6270       col = j[p];
6271       q   = p;
6272       do {p++;} while (p<s && j[p] == col);
6273       Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q);
6274       Bnnz++;
6275     }
6276   }
6277   /* Output */
6278   *Aperm_ = Aperm;
6279   *Annz_  = Annz;
6280   *Atot_  = Atot;
6281   *Ajmap_ = Ajmap;
6282   *Bperm_ = Bperm;
6283   *Bnnz_  = Bnnz;
6284   *Btot_  = Btot;
6285   *Bjmap_ = Bjmap;
6286   PetscFunctionReturn(0);
6287 }
6288 
6289 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6290 
6291   Input Parameters:
6292     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6293     nnz:  number of unique nonzeros in the merged matrix
6294     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6295     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6296 
6297   Output Parameter: (memory is allocated by the caller)
6298     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6299 
6300   Example:
6301     nnz1 = 4
6302     nnz  = 6
6303     imap = [1,3,4,5]
6304     jmap = [0,3,5,6,7]
6305    then,
6306     jmap_new = [0,0,3,3,5,6,7]
6307 */
6308 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[])
6309 {
6310   PetscCount k,p;
6311 
6312   PetscFunctionBegin;
6313   jmap_new[0] = 0;
6314   p = nnz; /* p loops over jmap_new[] backwards */
6315   for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */
6316     for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1];
6317   }
6318   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6319   PetscFunctionReturn(0);
6320 }
6321 
6322 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[])
6323 {
6324   MPI_Comm                  comm;
6325   PetscMPIInt               rank,size;
6326   PetscInt                  m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6327   PetscCount                k,p,q,rem; /* Loop variables over coo arrays */
6328   Mat_MPIAIJ                *mpiaij = (Mat_MPIAIJ*)mat->data;
6329 
6330   PetscFunctionBegin;
6331   PetscCall(PetscFree(mpiaij->garray));
6332   PetscCall(VecDestroy(&mpiaij->lvec));
6333 #if defined(PETSC_USE_CTABLE)
6334   PetscCall(PetscTableDestroy(&mpiaij->colmap));
6335 #else
6336   PetscCall(PetscFree(mpiaij->colmap));
6337 #endif
6338   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6339   mat->assembled = PETSC_FALSE;
6340   mat->was_assembled = PETSC_FALSE;
6341   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6342 
6343   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
6344   PetscCallMPI(MPI_Comm_size(comm,&size));
6345   PetscCallMPI(MPI_Comm_rank(comm,&rank));
6346   PetscCall(PetscLayoutSetUp(mat->rmap));
6347   PetscCall(PetscLayoutSetUp(mat->cmap));
6348   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6349   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6350   PetscCall(MatGetLocalSize(mat,&m,&n));
6351   PetscCall(MatGetSize(mat,&M,&N));
6352 
6353   /* ---------------------------------------------------------------------------*/
6354   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6355   /* entries come first, then local rows, then remote rows.                     */
6356   /* ---------------------------------------------------------------------------*/
6357   PetscCount n1 = coo_n,*perm1;
6358   PetscInt   *i1,*j1; /* Copies of input COOs along with a permutation array */
6359   PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1));
6360   PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */
6361   PetscCall(PetscArraycpy(j1,coo_j,n1));
6362   for (k=0; k<n1; k++) perm1[k] = k;
6363 
6364   /* Manipulate indices so that entries with negative row or col indices will have smallest
6365      row indices, local entries will have greater but negative row indices, and remote entries
6366      will have positive row indices.
6367   */
6368   for (k=0; k<n1; k++) {
6369     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */
6370     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6371     else {
6372       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6373       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6374     }
6375   }
6376 
6377   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6378   PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1));
6379   for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */
6380   PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */
6381   for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/
6382 
6383   /* ---------------------------------------------------------------------------*/
6384   /*           Split local rows into diag/offdiag portions                      */
6385   /* ---------------------------------------------------------------------------*/
6386   PetscCount   *rowBegin1,*rowMid1,*rowEnd1;
6387   PetscCount   *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1;
6388   PetscCount   Annz1,Bnnz1,Atot1,Btot1;
6389 
6390   PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1));
6391   PetscCall(PetscMalloc1(n1-rem,&Cperm1));
6392   PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1));
6393 
6394   /* ---------------------------------------------------------------------------*/
6395   /*           Send remote rows to their owner                                  */
6396   /* ---------------------------------------------------------------------------*/
6397   /* Find which rows should be sent to which remote ranks*/
6398   PetscInt       nsend = 0; /* Number of MPI ranks to send data to */
6399   PetscMPIInt    *sendto; /* [nsend], storing remote ranks */
6400   PetscInt       *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6401   const PetscInt *ranges;
6402   PetscInt       maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6403 
6404   PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges));
6405   PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries));
6406   for (k=rem; k<n1;) {
6407     PetscMPIInt  owner;
6408     PetscInt     firstRow,lastRow;
6409 
6410     /* Locate a row range */
6411     firstRow = i1[k]; /* first row of this owner */
6412     PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner));
6413     lastRow  = ranges[owner+1]-1; /* last row of this owner */
6414 
6415     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6416     PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p));
6417 
6418     /* All entries in [k,p) belong to this remote owner */
6419     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6420       PetscMPIInt *sendto2;
6421       PetscInt    *nentries2;
6422       PetscInt    maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size;
6423 
6424       PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2));
6425       PetscCall(PetscArraycpy(sendto2,sendto,maxNsend));
6426       PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1));
6427       PetscCall(PetscFree2(sendto,nentries2));
6428       sendto      = sendto2;
6429       nentries    = nentries2;
6430       maxNsend    = maxNsend2;
6431     }
6432     sendto[nsend]   = owner;
6433     nentries[nsend] = p - k;
6434     PetscCall(PetscCountCast(p-k,&nentries[nsend]));
6435     nsend++;
6436     k = p;
6437   }
6438 
6439   /* Build 1st SF to know offsets on remote to send data */
6440   PetscSF     sf1;
6441   PetscInt    nroots = 1,nroots2 = 0;
6442   PetscInt    nleaves = nsend,nleaves2 = 0;
6443   PetscInt    *offsets;
6444   PetscSFNode *iremote;
6445 
6446   PetscCall(PetscSFCreate(comm,&sf1));
6447   PetscCall(PetscMalloc1(nsend,&iremote));
6448   PetscCall(PetscMalloc1(nsend,&offsets));
6449   for (k=0; k<nsend; k++) {
6450     iremote[k].rank  = sendto[k];
6451     iremote[k].index = 0;
6452     nleaves2        += nentries[k];
6453     PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt");
6454   }
6455   PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6456   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM));
6457   PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6458   PetscCall(PetscSFDestroy(&sf1));
6459   PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem);
6460 
6461   /* Build 2nd SF to send remote COOs to their owner */
6462   PetscSF sf2;
6463   nroots  = nroots2;
6464   nleaves = nleaves2;
6465   PetscCall(PetscSFCreate(comm,&sf2));
6466   PetscCall(PetscSFSetFromOptions(sf2));
6467   PetscCall(PetscMalloc1(nleaves,&iremote));
6468   p       = 0;
6469   for (k=0; k<nsend; k++) {
6470     PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt");
6471     for (q=0; q<nentries[k]; q++,p++) {
6472       iremote[p].rank  = sendto[k];
6473       iremote[p].index = offsets[k] + q;
6474     }
6475   }
6476   PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6477 
6478   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6479   PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem));
6480 
6481   /* Send the remote COOs to their owner */
6482   PetscInt   n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6483   PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6484   PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2));
6485   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE));
6486   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE));
6487   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE));
6488   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE));
6489 
6490   PetscCall(PetscFree(offsets));
6491   PetscCall(PetscFree2(sendto,nentries));
6492 
6493   /* ---------------------------------------------------------------*/
6494   /* Sort received COOs by row along with the permutation array     */
6495   /* ---------------------------------------------------------------*/
6496   for (k=0; k<n2; k++) perm2[k] = k;
6497   PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2));
6498 
6499   /* ---------------------------------------------------------------*/
6500   /* Split received COOs into diag/offdiag portions                 */
6501   /* ---------------------------------------------------------------*/
6502   PetscCount  *rowBegin2,*rowMid2,*rowEnd2;
6503   PetscCount  *Ajmap2,*Aperm2,*Bjmap2,*Bperm2;
6504   PetscCount  Annz2,Bnnz2,Atot2,Btot2;
6505 
6506   PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2));
6507   PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2));
6508 
6509   /* --------------------------------------------------------------------------*/
6510   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6511   /* --------------------------------------------------------------------------*/
6512   PetscInt   *Ai,*Bi;
6513   PetscInt   *Aj,*Bj;
6514 
6515   PetscCall(PetscMalloc1(m+1,&Ai));
6516   PetscCall(PetscMalloc1(m+1,&Bi));
6517   PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6518   PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj));
6519 
6520   PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2;
6521   PetscCall(PetscMalloc1(Annz1,&Aimap1));
6522   PetscCall(PetscMalloc1(Bnnz1,&Bimap1));
6523   PetscCall(PetscMalloc1(Annz2,&Aimap2));
6524   PetscCall(PetscMalloc1(Bnnz2,&Bimap2));
6525 
6526   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj));
6527   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1,  rowEnd1,rowMid2,  rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj));
6528 
6529   /* --------------------------------------------------------------------------*/
6530   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6531   /* expect nonzeros in A/B most likely have local contributing entries        */
6532   /* --------------------------------------------------------------------------*/
6533   PetscInt Annz = Ai[m];
6534   PetscInt Bnnz = Bi[m];
6535   PetscCount *Ajmap1_new,*Bjmap1_new;
6536 
6537   PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new));
6538   PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new));
6539 
6540   PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new));
6541   PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new));
6542 
6543   PetscCall(PetscFree(Aimap1));
6544   PetscCall(PetscFree(Ajmap1));
6545   PetscCall(PetscFree(Bimap1));
6546   PetscCall(PetscFree(Bjmap1));
6547   PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1));
6548   PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2));
6549   PetscCall(PetscFree3(i1,j1,perm1));
6550   PetscCall(PetscFree3(i2,j2,perm2));
6551 
6552   Ajmap1 = Ajmap1_new;
6553   Bjmap1 = Bjmap1_new;
6554 
6555   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6556   if (Annz < Annz1 + Annz2) {
6557     PetscInt *Aj_new;
6558     PetscCall(PetscMalloc1(Annz,&Aj_new));
6559     PetscCall(PetscArraycpy(Aj_new,Aj,Annz));
6560     PetscCall(PetscFree(Aj));
6561     Aj   = Aj_new;
6562   }
6563 
6564   if (Bnnz < Bnnz1 + Bnnz2) {
6565     PetscInt *Bj_new;
6566     PetscCall(PetscMalloc1(Bnnz,&Bj_new));
6567     PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz));
6568     PetscCall(PetscFree(Bj));
6569     Bj   = Bj_new;
6570   }
6571 
6572   /* --------------------------------------------------------------------------------*/
6573   /* Create new submatrices for on-process and off-process coupling                  */
6574   /* --------------------------------------------------------------------------------*/
6575   PetscScalar   *Aa,*Ba;
6576   MatType       rtype;
6577   Mat_SeqAIJ    *a,*b;
6578   PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */
6579   PetscCall(PetscCalloc1(Bnnz,&Ba));
6580   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6581   if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;}
6582   PetscCall(MatDestroy(&mpiaij->A));
6583   PetscCall(MatDestroy(&mpiaij->B));
6584   PetscCall(MatGetRootType_Private(mat,&rtype));
6585   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A));
6586   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B));
6587   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6588 
6589   a = (Mat_SeqAIJ*)mpiaij->A->data;
6590   b = (Mat_SeqAIJ*)mpiaij->B->data;
6591   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6592   a->free_a       = b->free_a       = PETSC_TRUE;
6593   a->free_ij      = b->free_ij      = PETSC_TRUE;
6594 
6595   /* conversion must happen AFTER multiply setup */
6596   PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A));
6597   PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B));
6598   PetscCall(VecDestroy(&mpiaij->lvec));
6599   PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL));
6600   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec));
6601 
6602   mpiaij->coo_n   = coo_n;
6603   mpiaij->coo_sf  = sf2;
6604   mpiaij->sendlen = nleaves;
6605   mpiaij->recvlen = nroots;
6606 
6607   mpiaij->Annz    = Annz;
6608   mpiaij->Bnnz    = Bnnz;
6609 
6610   mpiaij->Annz2   = Annz2;
6611   mpiaij->Bnnz2   = Bnnz2;
6612 
6613   mpiaij->Atot1   = Atot1;
6614   mpiaij->Atot2   = Atot2;
6615   mpiaij->Btot1   = Btot1;
6616   mpiaij->Btot2   = Btot2;
6617 
6618   mpiaij->Ajmap1  = Ajmap1;
6619   mpiaij->Aperm1  = Aperm1;
6620 
6621   mpiaij->Bjmap1  = Bjmap1;
6622   mpiaij->Bperm1  = Bperm1;
6623 
6624   mpiaij->Aimap2  = Aimap2;
6625   mpiaij->Ajmap2  = Ajmap2;
6626   mpiaij->Aperm2  = Aperm2;
6627 
6628   mpiaij->Bimap2  = Bimap2;
6629   mpiaij->Bjmap2  = Bjmap2;
6630   mpiaij->Bperm2  = Bperm2;
6631 
6632   mpiaij->Cperm1  = Cperm1;
6633 
6634   /* Allocate in preallocation. If not used, it has zero cost on host */
6635   PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf));
6636   PetscFunctionReturn(0);
6637 }
6638 
6639 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode)
6640 {
6641   Mat_MPIAIJ           *mpiaij = (Mat_MPIAIJ*)mat->data;
6642   Mat                  A = mpiaij->A,B = mpiaij->B;
6643   PetscCount           Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2;
6644   PetscScalar          *Aa,*Ba;
6645   PetscScalar          *sendbuf = mpiaij->sendbuf;
6646   PetscScalar          *recvbuf = mpiaij->recvbuf;
6647   const PetscCount     *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2;
6648   const PetscCount     *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2;
6649   const PetscCount     *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2;
6650   const PetscCount     *Cperm1 = mpiaij->Cperm1;
6651 
6652   PetscFunctionBegin;
6653   PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */
6654   PetscCall(MatSeqAIJGetArray(B,&Ba));
6655 
6656   /* Pack entries to be sent to remote */
6657   for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6658 
6659   /* Send remote entries to their owner and overlap the communication with local computation */
6660   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE));
6661   /* Add local entries to A and B */
6662   for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6663     PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */
6664     for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]];
6665     Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum;
6666   }
6667   for (PetscCount i=0; i<Bnnz; i++) {
6668     PetscScalar sum = 0.0;
6669     for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]];
6670     Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum;
6671   }
6672   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE));
6673 
6674   /* Add received remote entries to A and B */
6675   for (PetscCount i=0; i<Annz2; i++) {
6676     for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6677   }
6678   for (PetscCount i=0; i<Bnnz2; i++) {
6679     for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6680   }
6681   PetscCall(MatSeqAIJRestoreArray(A,&Aa));
6682   PetscCall(MatSeqAIJRestoreArray(B,&Ba));
6683   PetscFunctionReturn(0);
6684 }
6685 
6686 /* ----------------------------------------------------------------*/
6687 
6688 /*MC
6689    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6690 
6691    Options Database Keys:
6692 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6693 
6694    Level: beginner
6695 
6696    Notes:
6697     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6698     in this case the values associated with the rows and columns one passes in are set to zero
6699     in the matrix
6700 
6701     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6702     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6703 
6704 .seealso: `MatCreateAIJ()`
6705 M*/
6706 
6707 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6708 {
6709   Mat_MPIAIJ     *b;
6710   PetscMPIInt    size;
6711 
6712   PetscFunctionBegin;
6713   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
6714 
6715   PetscCall(PetscNewLog(B,&b));
6716   B->data       = (void*)b;
6717   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
6718   B->assembled  = PETSC_FALSE;
6719   B->insertmode = NOT_SET_VALUES;
6720   b->size       = size;
6721 
6722   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
6723 
6724   /* build cache for off array entries formed */
6725   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
6726 
6727   b->donotstash  = PETSC_FALSE;
6728   b->colmap      = NULL;
6729   b->garray      = NULL;
6730   b->roworiented = PETSC_TRUE;
6731 
6732   /* stuff used for matrix vector multiply */
6733   b->lvec  = NULL;
6734   b->Mvctx = NULL;
6735 
6736   /* stuff for MatGetRow() */
6737   b->rowindices   = NULL;
6738   b->rowvalues    = NULL;
6739   b->getrowactive = PETSC_FALSE;
6740 
6741   /* flexible pointer used in CUSPARSE classes */
6742   b->spptr = NULL;
6743 
6744   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6745   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ));
6746   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ));
6747   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ));
6748   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ));
6749   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ));
6750   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ));
6751   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ));
6752   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM));
6753   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL));
6754 #if defined(PETSC_HAVE_CUDA)
6755   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6756 #endif
6757 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6758   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos));
6759 #endif
6760 #if defined(PETSC_HAVE_MKL_SPARSE)
6761   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL));
6762 #endif
6763   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL));
6764   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ));
6765   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ));
6766   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
6767 #if defined(PETSC_HAVE_ELEMENTAL)
6768   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental));
6769 #endif
6770 #if defined(PETSC_HAVE_SCALAPACK)
6771   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK));
6772 #endif
6773   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS));
6774   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL));
6775 #if defined(PETSC_HAVE_HYPRE)
6776   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE));
6777   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ));
6778 #endif
6779   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ));
6780   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ));
6781   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ));
6782   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ));
6783   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ));
6784   PetscFunctionReturn(0);
6785 }
6786 
6787 /*@C
6788      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6789          and "off-diagonal" part of the matrix in CSR format.
6790 
6791    Collective
6792 
6793    Input Parameters:
6794 +  comm - MPI communicator
6795 .  m - number of local rows (Cannot be PETSC_DECIDE)
6796 .  n - This value should be the same as the local size used in creating the
6797        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6798        calculated if N is given) For square matrices n is almost always m.
6799 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6800 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6801 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6802 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6803 .   a - matrix values
6804 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6805 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6806 -   oa - matrix values
6807 
6808    Output Parameter:
6809 .   mat - the matrix
6810 
6811    Level: advanced
6812 
6813    Notes:
6814        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6815        must free the arrays once the matrix has been destroyed and not before.
6816 
6817        The i and j indices are 0 based
6818 
6819        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6820 
6821        This sets local rows and cannot be used to set off-processor values.
6822 
6823        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6824        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6825        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6826        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6827        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6828        communication if it is known that only local entries will be set.
6829 
6830 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6831           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6832 @*/
6833 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6834 {
6835   Mat_MPIAIJ     *maij;
6836 
6837   PetscFunctionBegin;
6838   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6839   PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6840   PetscCheck(oi[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6841   PetscCall(MatCreate(comm,mat));
6842   PetscCall(MatSetSizes(*mat,m,n,M,N));
6843   PetscCall(MatSetType(*mat,MATMPIAIJ));
6844   maij = (Mat_MPIAIJ*) (*mat)->data;
6845 
6846   (*mat)->preallocated = PETSC_TRUE;
6847 
6848   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6849   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6850 
6851   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A));
6852   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B));
6853 
6854   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
6855   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
6856   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
6857   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
6858   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
6859   PetscFunctionReturn(0);
6860 }
6861 
6862 typedef struct {
6863   Mat       *mp;    /* intermediate products */
6864   PetscBool *mptmp; /* is the intermediate product temporary ? */
6865   PetscInt  cp;     /* number of intermediate products */
6866 
6867   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6868   PetscInt    *startsj_s,*startsj_r;
6869   PetscScalar *bufa;
6870   Mat         P_oth;
6871 
6872   /* may take advantage of merging product->B */
6873   Mat Bloc; /* B-local by merging diag and off-diag */
6874 
6875   /* cusparse does not have support to split between symbolic and numeric phases.
6876      When api_user is true, we don't need to update the numerical values
6877      of the temporary storage */
6878   PetscBool reusesym;
6879 
6880   /* support for COO values insertion */
6881   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6882   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6883   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6884   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6885   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6886   PetscMemType mtype;
6887 
6888   /* customization */
6889   PetscBool abmerge;
6890   PetscBool P_oth_bind;
6891 } MatMatMPIAIJBACKEND;
6892 
6893 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6894 {
6895   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6896   PetscInt            i;
6897 
6898   PetscFunctionBegin;
6899   PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r));
6900   PetscCall(PetscFree(mmdata->bufa));
6901   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v));
6902   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w));
6903   PetscCall(MatDestroy(&mmdata->P_oth));
6904   PetscCall(MatDestroy(&mmdata->Bloc));
6905   PetscCall(PetscSFDestroy(&mmdata->sf));
6906   for (i = 0; i < mmdata->cp; i++) {
6907     PetscCall(MatDestroy(&mmdata->mp[i]));
6908   }
6909   PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp));
6910   PetscCall(PetscFree(mmdata->own[0]));
6911   PetscCall(PetscFree(mmdata->own));
6912   PetscCall(PetscFree(mmdata->off[0]));
6913   PetscCall(PetscFree(mmdata->off));
6914   PetscCall(PetscFree(mmdata));
6915   PetscFunctionReturn(0);
6916 }
6917 
6918 /* Copy selected n entries with indices in idx[] of A to v[].
6919    If idx is NULL, copy the whole data array of A to v[]
6920  */
6921 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6922 {
6923   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6924 
6925   PetscFunctionBegin;
6926   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f));
6927   if (f) {
6928     PetscCall((*f)(A,n,idx,v));
6929   } else {
6930     const PetscScalar *vv;
6931 
6932     PetscCall(MatSeqAIJGetArrayRead(A,&vv));
6933     if (n && idx) {
6934       PetscScalar    *w = v;
6935       const PetscInt *oi = idx;
6936       PetscInt       j;
6937 
6938       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6939     } else {
6940       PetscCall(PetscArraycpy(v,vv,n));
6941     }
6942     PetscCall(MatSeqAIJRestoreArrayRead(A,&vv));
6943   }
6944   PetscFunctionReturn(0);
6945 }
6946 
6947 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6948 {
6949   MatMatMPIAIJBACKEND *mmdata;
6950   PetscInt            i,n_d,n_o;
6951 
6952   PetscFunctionBegin;
6953   MatCheckProduct(C,1);
6954   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6955   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6956   if (!mmdata->reusesym) { /* update temporary matrices */
6957     if (mmdata->P_oth) {
6958       PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
6959     }
6960     if (mmdata->Bloc) {
6961       PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc));
6962     }
6963   }
6964   mmdata->reusesym = PETSC_FALSE;
6965 
6966   for (i = 0; i < mmdata->cp; i++) {
6967     PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6968     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
6969   }
6970   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6971     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6972 
6973     if (mmdata->mptmp[i]) continue;
6974     if (noff) {
6975       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6976 
6977       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o));
6978       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d));
6979       n_o += noff;
6980       n_d += nown;
6981     } else {
6982       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6983 
6984       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d));
6985       n_d += mm->nz;
6986     }
6987   }
6988   if (mmdata->hasoffproc) { /* offprocess insertion */
6989     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6990     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6991   }
6992   PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES));
6993   PetscFunctionReturn(0);
6994 }
6995 
6996 /* Support for Pt * A, A * P, or Pt * A * P */
6997 #define MAX_NUMBER_INTERMEDIATE 4
6998 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6999 {
7000   Mat_Product            *product = C->product;
7001   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7002   Mat_MPIAIJ             *a,*p;
7003   MatMatMPIAIJBACKEND    *mmdata;
7004   ISLocalToGlobalMapping P_oth_l2g = NULL;
7005   IS                     glob = NULL;
7006   const char             *prefix;
7007   char                   pprefix[256];
7008   const PetscInt         *globidx,*P_oth_idx;
7009   PetscInt               i,j,cp,m,n,M,N,*coo_i,*coo_j;
7010   PetscCount             ncoo,ncoo_d,ncoo_o,ncoo_oown;
7011   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7012                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
7013                                                                                         /* a base offset; type-2: sparse with a local to global map table */
7014   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
7015 
7016   MatProductType         ptype;
7017   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
7018   PetscMPIInt            size;
7019 
7020   PetscFunctionBegin;
7021   MatCheckProduct(C,1);
7022   PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
7023   ptype = product->type;
7024   if (product->A->symmetric && ptype == MATPRODUCT_AtB) {
7025     ptype = MATPRODUCT_AB;
7026     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7027   }
7028   switch (ptype) {
7029   case MATPRODUCT_AB:
7030     A = product->A;
7031     P = product->B;
7032     m = A->rmap->n;
7033     n = P->cmap->n;
7034     M = A->rmap->N;
7035     N = P->cmap->N;
7036     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7037     break;
7038   case MATPRODUCT_AtB:
7039     P = product->A;
7040     A = product->B;
7041     m = P->cmap->n;
7042     n = A->cmap->n;
7043     M = P->cmap->N;
7044     N = A->cmap->N;
7045     hasoffproc = PETSC_TRUE;
7046     break;
7047   case MATPRODUCT_PtAP:
7048     A = product->A;
7049     P = product->B;
7050     m = P->cmap->n;
7051     n = P->cmap->n;
7052     M = P->cmap->N;
7053     N = P->cmap->N;
7054     hasoffproc = PETSC_TRUE;
7055     break;
7056   default:
7057     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
7058   }
7059   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size));
7060   if (size == 1) hasoffproc = PETSC_FALSE;
7061 
7062   /* defaults */
7063   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
7064     mp[i]    = NULL;
7065     mptmp[i] = PETSC_FALSE;
7066     rmapt[i] = -1;
7067     cmapt[i] = -1;
7068     rmapa[i] = NULL;
7069     cmapa[i] = NULL;
7070   }
7071 
7072   /* customization */
7073   PetscCall(PetscNew(&mmdata));
7074   mmdata->reusesym = product->api_user;
7075   if (ptype == MATPRODUCT_AB) {
7076     if (product->api_user) {
7077       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");
7078       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
7079       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7080       PetscOptionsEnd();
7081     } else {
7082       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");
7083       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
7084       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7085       PetscOptionsEnd();
7086     }
7087   } else if (ptype == MATPRODUCT_PtAP) {
7088     if (product->api_user) {
7089       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");
7090       PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7091       PetscOptionsEnd();
7092     } else {
7093       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
7094       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7095       PetscOptionsEnd();
7096     }
7097   }
7098   a = (Mat_MPIAIJ*)A->data;
7099   p = (Mat_MPIAIJ*)P->data;
7100   PetscCall(MatSetSizes(C,m,n,M,N));
7101   PetscCall(PetscLayoutSetUp(C->rmap));
7102   PetscCall(PetscLayoutSetUp(C->cmap));
7103   PetscCall(MatSetType(C,((PetscObject)A)->type_name));
7104   PetscCall(MatGetOptionsPrefix(C,&prefix));
7105 
7106   cp   = 0;
7107   switch (ptype) {
7108   case MATPRODUCT_AB: /* A * P */
7109     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
7110 
7111     /* A_diag * P_local (merged or not) */
7112     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7113       /* P is product->B */
7114       PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7115       PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
7116       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7117       PetscCall(MatProductSetFill(mp[cp],product->fill));
7118       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7119       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7120       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7121       mp[cp]->product->api_user = product->api_user;
7122       PetscCall(MatProductSetFromOptions(mp[cp]));
7123       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7124       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7125       PetscCall(ISGetIndices(glob,&globidx));
7126       rmapt[cp] = 1;
7127       cmapt[cp] = 2;
7128       cmapa[cp] = globidx;
7129       mptmp[cp] = PETSC_FALSE;
7130       cp++;
7131     } else { /* A_diag * P_diag and A_diag * P_off */
7132       PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp]));
7133       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7134       PetscCall(MatProductSetFill(mp[cp],product->fill));
7135       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7136       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7137       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7138       mp[cp]->product->api_user = product->api_user;
7139       PetscCall(MatProductSetFromOptions(mp[cp]));
7140       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7141       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7142       rmapt[cp] = 1;
7143       cmapt[cp] = 1;
7144       mptmp[cp] = PETSC_FALSE;
7145       cp++;
7146       PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp]));
7147       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7148       PetscCall(MatProductSetFill(mp[cp],product->fill));
7149       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7150       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7151       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7152       mp[cp]->product->api_user = product->api_user;
7153       PetscCall(MatProductSetFromOptions(mp[cp]));
7154       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7155       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7156       rmapt[cp] = 1;
7157       cmapt[cp] = 2;
7158       cmapa[cp] = p->garray;
7159       mptmp[cp] = PETSC_FALSE;
7160       cp++;
7161     }
7162 
7163     /* A_off * P_other */
7164     if (mmdata->P_oth) {
7165       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */
7166       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7167       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7168       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7169       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7170       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7171       PetscCall(MatProductSetFill(mp[cp],product->fill));
7172       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7173       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7174       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7175       mp[cp]->product->api_user = product->api_user;
7176       PetscCall(MatProductSetFromOptions(mp[cp]));
7177       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7178       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7179       rmapt[cp] = 1;
7180       cmapt[cp] = 2;
7181       cmapa[cp] = P_oth_idx;
7182       mptmp[cp] = PETSC_FALSE;
7183       cp++;
7184     }
7185     break;
7186 
7187   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7188     /* A is product->B */
7189     PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7190     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7191       PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]));
7192       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7193       PetscCall(MatProductSetFill(mp[cp],product->fill));
7194       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7195       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7196       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7197       mp[cp]->product->api_user = product->api_user;
7198       PetscCall(MatProductSetFromOptions(mp[cp]));
7199       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7200       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7201       PetscCall(ISGetIndices(glob,&globidx));
7202       rmapt[cp] = 2;
7203       rmapa[cp] = globidx;
7204       cmapt[cp] = 2;
7205       cmapa[cp] = globidx;
7206       mptmp[cp] = PETSC_FALSE;
7207       cp++;
7208     } else {
7209       PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]));
7210       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7211       PetscCall(MatProductSetFill(mp[cp],product->fill));
7212       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7213       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7214       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7215       mp[cp]->product->api_user = product->api_user;
7216       PetscCall(MatProductSetFromOptions(mp[cp]));
7217       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7218       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7219       PetscCall(ISGetIndices(glob,&globidx));
7220       rmapt[cp] = 1;
7221       cmapt[cp] = 2;
7222       cmapa[cp] = globidx;
7223       mptmp[cp] = PETSC_FALSE;
7224       cp++;
7225       PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]));
7226       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7227       PetscCall(MatProductSetFill(mp[cp],product->fill));
7228       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7229       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7230       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7231       mp[cp]->product->api_user = product->api_user;
7232       PetscCall(MatProductSetFromOptions(mp[cp]));
7233       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7234       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7235       rmapt[cp] = 2;
7236       rmapa[cp] = p->garray;
7237       cmapt[cp] = 2;
7238       cmapa[cp] = globidx;
7239       mptmp[cp] = PETSC_FALSE;
7240       cp++;
7241     }
7242     break;
7243   case MATPRODUCT_PtAP:
7244     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
7245     /* P is product->B */
7246     PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7247     PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
7248     PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP));
7249     PetscCall(MatProductSetFill(mp[cp],product->fill));
7250     PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7251     PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7252     PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7253     mp[cp]->product->api_user = product->api_user;
7254     PetscCall(MatProductSetFromOptions(mp[cp]));
7255     PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7256     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7257     PetscCall(ISGetIndices(glob,&globidx));
7258     rmapt[cp] = 2;
7259     rmapa[cp] = globidx;
7260     cmapt[cp] = 2;
7261     cmapa[cp] = globidx;
7262     mptmp[cp] = PETSC_FALSE;
7263     cp++;
7264     if (mmdata->P_oth) {
7265       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g));
7266       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7267       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7268       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7269       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7270       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7271       PetscCall(MatProductSetFill(mp[cp],product->fill));
7272       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7273       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7274       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7275       mp[cp]->product->api_user = product->api_user;
7276       PetscCall(MatProductSetFromOptions(mp[cp]));
7277       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7278       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7279       mptmp[cp] = PETSC_TRUE;
7280       cp++;
7281       PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]));
7282       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7283       PetscCall(MatProductSetFill(mp[cp],product->fill));
7284       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7285       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7286       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7287       mp[cp]->product->api_user = product->api_user;
7288       PetscCall(MatProductSetFromOptions(mp[cp]));
7289       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7290       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7291       rmapt[cp] = 2;
7292       rmapa[cp] = globidx;
7293       cmapt[cp] = 2;
7294       cmapa[cp] = P_oth_idx;
7295       mptmp[cp] = PETSC_FALSE;
7296       cp++;
7297     }
7298     break;
7299   default:
7300     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
7301   }
7302   /* sanity check */
7303   if (size > 1) for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i);
7304 
7305   PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp));
7306   for (i = 0; i < cp; i++) {
7307     mmdata->mp[i]    = mp[i];
7308     mmdata->mptmp[i] = mptmp[i];
7309   }
7310   mmdata->cp = cp;
7311   C->product->data       = mmdata;
7312   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7313   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7314 
7315   /* memory type */
7316   mmdata->mtype = PETSC_MEMTYPE_HOST;
7317   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,""));
7318   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,""));
7319   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7320   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7321 
7322   /* prepare coo coordinates for values insertion */
7323 
7324   /* count total nonzeros of those intermediate seqaij Mats
7325     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7326     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7327     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7328   */
7329   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7330     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7331     if (mptmp[cp]) continue;
7332     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7333       const PetscInt *rmap = rmapa[cp];
7334       const PetscInt mr = mp[cp]->rmap->n;
7335       const PetscInt rs = C->rmap->rstart;
7336       const PetscInt re = C->rmap->rend;
7337       const PetscInt *ii  = mm->i;
7338       for (i = 0; i < mr; i++) {
7339         const PetscInt gr = rmap[i];
7340         const PetscInt nz = ii[i+1] - ii[i];
7341         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7342         else ncoo_oown += nz; /* this row is local */
7343       }
7344     } else ncoo_d += mm->nz;
7345   }
7346 
7347   /*
7348     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7349 
7350     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7351 
7352     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
7353 
7354     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7355     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7356     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7357 
7358     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7359     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7360   */
7361   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */
7362   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own));
7363 
7364   /* gather (i,j) of nonzeros inserted by remote procs */
7365   if (hasoffproc) {
7366     PetscSF  msf;
7367     PetscInt ncoo2,*coo_i2,*coo_j2;
7368 
7369     PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0]));
7370     PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0]));
7371     PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */
7372 
7373     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7374       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7375       PetscInt   *idxoff = mmdata->off[cp];
7376       PetscInt   *idxown = mmdata->own[cp];
7377       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7378         const PetscInt *rmap = rmapa[cp];
7379         const PetscInt *cmap = cmapa[cp];
7380         const PetscInt *ii  = mm->i;
7381         PetscInt       *coi = coo_i + ncoo_o;
7382         PetscInt       *coj = coo_j + ncoo_o;
7383         const PetscInt mr = mp[cp]->rmap->n;
7384         const PetscInt rs = C->rmap->rstart;
7385         const PetscInt re = C->rmap->rend;
7386         const PetscInt cs = C->cmap->rstart;
7387         for (i = 0; i < mr; i++) {
7388           const PetscInt *jj = mm->j + ii[i];
7389           const PetscInt gr  = rmap[i];
7390           const PetscInt nz  = ii[i+1] - ii[i];
7391           if (gr < rs || gr >= re) { /* this is an offproc row */
7392             for (j = ii[i]; j < ii[i+1]; j++) {
7393               *coi++ = gr;
7394               *idxoff++ = j;
7395             }
7396             if (!cmapt[cp]) { /* already global */
7397               for (j = 0; j < nz; j++) *coj++ = jj[j];
7398             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7399               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7400             } else { /* offdiag */
7401               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7402             }
7403             ncoo_o += nz;
7404           } else { /* this is a local row */
7405             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
7406           }
7407         }
7408       }
7409       mmdata->off[cp + 1] = idxoff;
7410       mmdata->own[cp + 1] = idxown;
7411     }
7412 
7413     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7414     PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i));
7415     PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf));
7416     PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL));
7417     ncoo = ncoo_d + ncoo_oown + ncoo2;
7418     PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2));
7419     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7420     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown));
7421     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7422     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7423     PetscCall(PetscFree2(coo_i,coo_j));
7424     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7425     PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w));
7426     coo_i = coo_i2;
7427     coo_j = coo_j2;
7428   } else { /* no offproc values insertion */
7429     ncoo = ncoo_d;
7430     PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j));
7431 
7432     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7433     PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER));
7434     PetscCall(PetscSFSetUp(mmdata->sf));
7435   }
7436   mmdata->hasoffproc = hasoffproc;
7437 
7438   /* gather (i,j) of nonzeros inserted locally */
7439   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7440     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
7441     PetscInt       *coi = coo_i + ncoo_d;
7442     PetscInt       *coj = coo_j + ncoo_d;
7443     const PetscInt *jj  = mm->j;
7444     const PetscInt *ii  = mm->i;
7445     const PetscInt *cmap = cmapa[cp];
7446     const PetscInt *rmap = rmapa[cp];
7447     const PetscInt mr = mp[cp]->rmap->n;
7448     const PetscInt rs = C->rmap->rstart;
7449     const PetscInt re = C->rmap->rend;
7450     const PetscInt cs = C->cmap->rstart;
7451 
7452     if (mptmp[cp]) continue;
7453     if (rmapt[cp] == 1) { /* consecutive rows */
7454       /* fill coo_i */
7455       for (i = 0; i < mr; i++) {
7456         const PetscInt gr = i + rs;
7457         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
7458       }
7459       /* fill coo_j */
7460       if (!cmapt[cp]) { /* type-0, already global */
7461         PetscCall(PetscArraycpy(coj,jj,mm->nz));
7462       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
7463         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7464       } else { /* type-2, local to global for sparse columns */
7465         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7466       }
7467       ncoo_d += mm->nz;
7468     } else if (rmapt[cp] == 2) { /* sparse rows */
7469       for (i = 0; i < mr; i++) {
7470         const PetscInt *jj = mm->j + ii[i];
7471         const PetscInt gr  = rmap[i];
7472         const PetscInt nz  = ii[i+1] - ii[i];
7473         if (gr >= rs && gr < re) { /* local rows */
7474           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
7475           if (!cmapt[cp]) { /* type-0, already global */
7476             for (j = 0; j < nz; j++) *coj++ = jj[j];
7477           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7478             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7479           } else { /* type-2, local to global for sparse columns */
7480             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7481           }
7482           ncoo_d += nz;
7483         }
7484       }
7485     }
7486   }
7487   if (glob) {
7488     PetscCall(ISRestoreIndices(glob,&globidx));
7489   }
7490   PetscCall(ISDestroy(&glob));
7491   if (P_oth_l2g) {
7492     PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx));
7493   }
7494   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7495   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7496   PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v));
7497 
7498   /* preallocate with COO data */
7499   PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j));
7500   PetscCall(PetscFree2(coo_i,coo_j));
7501   PetscFunctionReturn(0);
7502 }
7503 
7504 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7505 {
7506   Mat_Product *product = mat->product;
7507 #if defined(PETSC_HAVE_DEVICE)
7508   PetscBool    match   = PETSC_FALSE;
7509   PetscBool    usecpu  = PETSC_FALSE;
7510 #else
7511   PetscBool    match   = PETSC_TRUE;
7512 #endif
7513 
7514   PetscFunctionBegin;
7515   MatCheckProduct(mat,1);
7516 #if defined(PETSC_HAVE_DEVICE)
7517   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7518     PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match));
7519   }
7520   if (match) { /* we can always fallback to the CPU if requested */
7521     switch (product->type) {
7522     case MATPRODUCT_AB:
7523       if (product->api_user) {
7524         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");
7525         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7526         PetscOptionsEnd();
7527       } else {
7528         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");
7529         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7530         PetscOptionsEnd();
7531       }
7532       break;
7533     case MATPRODUCT_AtB:
7534       if (product->api_user) {
7535         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");
7536         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7537         PetscOptionsEnd();
7538       } else {
7539         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");
7540         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7541         PetscOptionsEnd();
7542       }
7543       break;
7544     case MATPRODUCT_PtAP:
7545       if (product->api_user) {
7546         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");
7547         PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7548         PetscOptionsEnd();
7549       } else {
7550         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");
7551         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7552         PetscOptionsEnd();
7553       }
7554       break;
7555     default:
7556       break;
7557     }
7558     match = (PetscBool)!usecpu;
7559   }
7560 #endif
7561   if (match) {
7562     switch (product->type) {
7563     case MATPRODUCT_AB:
7564     case MATPRODUCT_AtB:
7565     case MATPRODUCT_PtAP:
7566       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7567       break;
7568     default:
7569       break;
7570     }
7571   }
7572   /* fallback to MPIAIJ ops */
7573   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7574   PetscFunctionReturn(0);
7575 }
7576 
7577 /*
7578    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7579 
7580    n - the number of block indices in cc[]
7581    cc - the block indices (must be large enough to contain the indices)
7582 */
7583 static inline PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc)
7584 {
7585   PetscInt       cnt = -1,nidx,j;
7586   const PetscInt *idx;
7587 
7588   PetscFunctionBegin;
7589   PetscCall(MatGetRow(Amat,row,&nidx,&idx,NULL));
7590   if (nidx) {
7591     cnt = 0;
7592     cc[cnt] = idx[0]/bs;
7593     for (j=1; j<nidx; j++) {
7594       if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs;
7595     }
7596   }
7597   PetscCall(MatRestoreRow(Amat,row,&nidx,&idx,NULL));
7598   *n = cnt+1;
7599   PetscFunctionReturn(0);
7600 }
7601 
7602 /*
7603     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7604 
7605     ncollapsed - the number of block indices
7606     collapsed - the block indices (must be large enough to contain the indices)
7607 */
7608 static inline PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed)
7609 {
7610   PetscInt       i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp;
7611 
7612   PetscFunctionBegin;
7613   PetscCall(MatCollapseRow(Amat,start,bs,&nprev,cprev));
7614   for (i=start+1; i<start+bs; i++) {
7615     PetscCall(MatCollapseRow(Amat,i,bs,&ncur,ccur));
7616     PetscCall(PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged));
7617     cprevtmp = cprev; cprev = merged; merged = cprevtmp;
7618   }
7619   *ncollapsed = nprev;
7620   if (collapsed) *collapsed  = cprev;
7621   PetscFunctionReturn(0);
7622 }
7623 
7624 /* -------------------------------------------------------------------------- */
7625 /*
7626  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7627 
7628  Input Parameter:
7629  . Amat - matrix
7630  - symmetrize - make the result symmetric
7631  + scale - scale with diagonal
7632 
7633  Output Parameter:
7634  . a_Gmat - output scalar graph >= 0
7635 
7636  */
7637 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, Mat *a_Gmat)
7638 {
7639   PetscInt       Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs;
7640   MPI_Comm       comm;
7641   Mat            Gmat;
7642   PetscBool      ismpiaij,isseqaij;
7643   Mat            a, b, c;
7644   MatType        jtype;
7645 
7646   PetscFunctionBegin;
7647   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
7648   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7649   PetscCall(MatGetSize(Amat, &MM, &NN));
7650   PetscCall(MatGetBlockSize(Amat, &bs));
7651   nloc = (Iend-Istart)/bs;
7652 
7653   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij));
7654   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij));
7655   PetscCheck(isseqaij || ismpiaij,comm,PETSC_ERR_USER,"Require (MPI)AIJ matrix type");
7656 
7657   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7658   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7659      implementation */
7660   if (bs > 1) {
7661     PetscCall(MatGetType(Amat,&jtype));
7662     PetscCall(MatCreate(comm, &Gmat));
7663     PetscCall(MatSetType(Gmat, jtype));
7664     PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE));
7665     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7666     if (isseqaij || ((Mat_MPIAIJ*)Amat->data)->garray) {
7667       PetscInt  *d_nnz, *o_nnz;
7668       MatScalar *aa,val,AA[4096];
7669       PetscInt  *aj,*ai,AJ[4096],nc;
7670       if (isseqaij) { a = Amat; b = NULL; }
7671       else {
7672         Mat_MPIAIJ *d = (Mat_MPIAIJ*)Amat->data;
7673         a = d->A; b = d->B;
7674       }
7675       PetscCall(PetscInfo(Amat,"New bs>1 Graph. nloc=%" PetscInt_FMT "\n",nloc));
7676       PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz));
7677       for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
7678         PetscInt       *nnz = (c==a) ? d_nnz : o_nnz, nmax=0;
7679         const PetscInt *cols;
7680         for (PetscInt brow=0,jj,ok=1,j0; brow < nloc*bs; brow += bs) { // block rows
7681           PetscCall(MatGetRow(c,brow,&jj,&cols,NULL));
7682           nnz[brow/bs] = jj/bs;
7683           if (jj%bs) ok = 0;
7684           if (cols) j0 = cols[0];
7685           else j0 = -1;
7686           PetscCall(MatRestoreRow(c,brow,&jj,&cols,NULL));
7687           if (nnz[brow/bs]>nmax) nmax = nnz[brow/bs];
7688           for (PetscInt ii=1; ii < bs && nnz[brow/bs] ; ii++) { // check for non-dense blocks
7689             PetscCall(MatGetRow(c,brow+ii,&jj,&cols,NULL));
7690             if (jj%bs) ok = 0;
7691             if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0;
7692             if (nnz[brow/bs] != jj/bs) ok = 0;
7693             PetscCall(MatRestoreRow(c,brow+ii,&jj,&cols,NULL));
7694           }
7695           if (!ok) {
7696             PetscCall(PetscFree2(d_nnz,o_nnz));
7697             goto old_bs;
7698           }
7699         }
7700         PetscCheck(nmax<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer %" PetscInt_FMT " too small 4096.",nmax);
7701       }
7702       PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz));
7703       PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz));
7704       PetscCall(PetscFree2(d_nnz,o_nnz));
7705       // diag
7706       for (PetscInt brow=0,n,grow; brow < nloc*bs; brow += bs) { // block rows
7707         Mat_SeqAIJ *aseq  = (Mat_SeqAIJ*)a->data;
7708         ai = aseq->i;
7709         n  = ai[brow+1] - ai[brow];
7710         aj = aseq->j + ai[brow];
7711         for (int k=0; k<n; k += bs) { // block columns
7712           AJ[k/bs] = aj[k]/bs + Istart/bs; // diag starts at (Istart,Istart)
7713           val = 0;
7714           for (int ii=0; ii<bs; ii++) { // rows in block
7715             aa = aseq->a + ai[brow+ii] + k;
7716             for (int jj=0; jj<bs; jj++) { // columns in block
7717               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7718             }
7719           }
7720           AA[k/bs] = val;
7721         }
7722         grow = Istart/bs + brow/bs;
7723         PetscCall(MatSetValues(Gmat,1,&grow,n/bs,AJ,AA,INSERT_VALUES));
7724       }
7725       // off-diag
7726       if (ismpiaij) {
7727         Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)Amat->data;
7728         const PetscScalar *vals;
7729         const PetscInt    *cols, *garray = aij->garray;
7730         PetscCheck(garray,PETSC_COMM_SELF,PETSC_ERR_USER,"No garray ?");
7731         for (PetscInt brow=0,grow; brow < nloc*bs; brow += bs) { // block rows
7732           PetscCall(MatGetRow(b,brow,&ncols,&cols,NULL));
7733           for (int k=0,cidx=0 ; k < ncols ; k += bs, cidx++) {
7734             AA[k/bs] = 0;
7735             AJ[cidx] = garray[cols[k]]/bs;
7736           }
7737           nc = ncols/bs;
7738           PetscCall(MatRestoreRow(b,brow,&ncols,&cols,NULL));
7739           for (int ii=0; ii<bs; ii++) { // rows in block
7740             PetscCall(MatGetRow(b,brow+ii,&ncols,&cols,&vals));
7741             for (int k=0; k<ncols; k += bs) {
7742               for (int jj=0; jj<bs; jj++) { // cols in block
7743                 AA[k/bs] += PetscAbs(PetscRealPart(vals[k+jj]));
7744               }
7745             }
7746             PetscCall(MatRestoreRow(b,brow+ii,&ncols,&cols,&vals));
7747           }
7748           grow = Istart/bs + brow/bs;
7749           PetscCall(MatSetValues(Gmat,1,&grow,nc,AJ,AA,INSERT_VALUES));
7750         }
7751       }
7752       PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY));
7753       PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY));
7754     } else {
7755       const PetscScalar *vals;
7756       const PetscInt    *idx;
7757       PetscInt          *d_nnz, *o_nnz,*w0,*w1,*w2;
7758       old_bs:
7759       /*
7760        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7761        */
7762       PetscCall(PetscInfo(Amat,"OLD bs>1 CreateGraph\n"));
7763       PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz));
7764       if (isseqaij) {
7765         PetscInt max_d_nnz;
7766         /*
7767          Determine exact preallocation count for (sequential) scalar matrix
7768          */
7769         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz));
7770         max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
7771         PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2));
7772         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) {
7773           PetscCall(MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL));
7774         }
7775         PetscCall(PetscFree3(w0,w1,w2));
7776       } else if (ismpiaij) {
7777         Mat            Daij,Oaij;
7778         const PetscInt *garray;
7779         PetscInt       max_d_nnz;
7780         PetscCall(MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray));
7781         /*
7782          Determine exact preallocation count for diagonal block portion of scalar matrix
7783          */
7784         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz));
7785         max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
7786         PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2));
7787         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7788           PetscCall(MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL));
7789         }
7790         PetscCall(PetscFree3(w0,w1,w2));
7791         /*
7792          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7793          */
7794         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7795           o_nnz[jj] = 0;
7796           for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */
7797             PetscCall(MatGetRow(Oaij,Ii+kk,&ncols,NULL,NULL));
7798             o_nnz[jj] += ncols;
7799             PetscCall(MatRestoreRow(Oaij,Ii+kk,&ncols,NULL,NULL));
7800           }
7801           if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc;
7802         }
7803       } else SETERRQ(comm,PETSC_ERR_USER,"Require AIJ matrix type");
7804       /* get scalar copy (norms) of matrix */
7805       PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz));
7806       PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz));
7807       PetscCall(PetscFree2(d_nnz,o_nnz));
7808       for (Ii = Istart; Ii < Iend; Ii++) {
7809         PetscInt dest_row = Ii/bs;
7810         PetscCall(MatGetRow(Amat,Ii,&ncols,&idx,&vals));
7811         for (jj=0; jj<ncols; jj++) {
7812           PetscInt    dest_col = idx[jj]/bs;
7813           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7814           PetscCall(MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES));
7815         }
7816         PetscCall(MatRestoreRow(Amat,Ii,&ncols,&idx,&vals));
7817       }
7818       PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY));
7819       PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY));
7820     }
7821   } else {
7822     /* TODO GPU: optimization proposal, each class provides fast implementation of this
7823      procedure via MatAbs API */
7824     /* just copy scalar matrix & abs() */
7825     PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7826     if (isseqaij) { a = Gmat; b = NULL; }
7827     else {
7828       Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data;
7829       a = d->A; b = d->B;
7830     }
7831     /* abs */
7832     for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
7833       MatInfo     info;
7834       PetscScalar *avals;
7835       PetscCall(MatGetInfo(c,MAT_LOCAL,&info));
7836       PetscCall(MatSeqAIJGetArray(c,&avals));
7837       for (int jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7838       PetscCall(MatSeqAIJRestoreArray(c,&avals));
7839     }
7840   }
7841   if (symmetrize) {
7842     PetscBool issym;
7843     PetscCall(MatGetOption(Amat,MAT_SYMMETRIC,&issym));
7844     if (!issym) {
7845       Mat matTrans;
7846       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
7847       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
7848       PetscCall(MatDestroy(&matTrans));
7849     }
7850     PetscCall(MatSetOption(Gmat,MAT_SYMMETRIC,PETSC_TRUE));
7851   } else {
7852     PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
7853   }
7854   if (scale) {
7855     /* scale c for all diagonal values = 1 or -1 */
7856     Vec               diag;
7857     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
7858     PetscCall(MatGetDiagonal(Gmat, diag));
7859     PetscCall(VecReciprocal(diag));
7860     PetscCall(VecSqrtAbs(diag));
7861     PetscCall(MatDiagonalScale(Gmat, diag, diag));
7862     PetscCall(VecDestroy(&diag));
7863   }
7864   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
7865   *a_Gmat = Gmat;
7866   PetscFunctionReturn(0);
7867 }
7868 
7869 /* -------------------------------------------------------------------------- */
7870 /*@C
7871    MatFilter_AIJ - filter values with small absolute values
7872      With vfilter < 0 does nothing so should not be called.
7873 
7874    Collective on Mat
7875 
7876    Input Parameters:
7877 +   Gmat - the graph
7878 .   vfilter - threshold parameter [0,1)
7879 
7880  Output Parameter:
7881  .  filteredG - output filtered scalar graph
7882 
7883    Level: developer
7884 
7885    Notes:
7886     This is called before graph coarsers are called.
7887     This could go into Mat, move 'symm' to GAMG
7888 
7889 .seealso: `PCGAMGSetThreshold()`
7890 @*/
7891 PETSC_INTERN PetscErrorCode MatFilter_AIJ(Mat Gmat,PetscReal vfilter, Mat *filteredG)
7892 {
7893   PetscInt          Istart,Iend,ncols,nnz0,nnz1, NN, MM, nloc;
7894   Mat               tGmat;
7895   MPI_Comm          comm;
7896   const PetscScalar *vals;
7897   const PetscInt    *idx;
7898   PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols=0;
7899   MatScalar         *AA; // this is checked in graph
7900   PetscBool         isseqaij;
7901   Mat               a, b, c;
7902   MatType           jtype;
7903 
7904   PetscFunctionBegin;
7905   PetscCall(PetscObjectGetComm((PetscObject)Gmat,&comm));
7906   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isseqaij));
7907   PetscCall(MatGetType(Gmat,&jtype));
7908   PetscCall(MatCreate(comm, &tGmat));
7909   PetscCall(MatSetType(tGmat, jtype));
7910 
7911   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
7912                Also, if the matrix is symmetric, can we skip this
7913                operation? It can be very expensive on large matrices. */
7914 
7915   // global sizes
7916   PetscCall(MatGetSize(Gmat, &MM, &NN));
7917   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
7918   nloc = Iend - Istart;
7919   PetscCall(PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz));
7920   if (isseqaij) { a = Gmat; b = NULL; }
7921   else {
7922     Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data;
7923     a = d->A; b = d->B;
7924     garray = d->garray;
7925   }
7926   /* Determine upper bound on non-zeros needed in new filtered matrix */
7927   for (PetscInt row=0; row < nloc; row++) {
7928     PetscCall(MatGetRow(a,row,&ncols,NULL,NULL));
7929     d_nnz[row] = ncols;
7930     if (ncols>maxcols) maxcols=ncols;
7931     PetscCall(MatRestoreRow(a,row,&ncols,NULL,NULL));
7932   }
7933   if (b) {
7934     for (PetscInt row=0; row < nloc; row++) {
7935       PetscCall(MatGetRow(b,row,&ncols,NULL,NULL));
7936       o_nnz[row] = ncols;
7937       if (ncols>maxcols) maxcols=ncols;
7938       PetscCall(MatRestoreRow(b,row,&ncols,NULL,NULL));
7939     }
7940   }
7941   PetscCall(MatSetSizes(tGmat,nloc,nloc,MM,MM));
7942   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
7943   PetscCall(MatSeqAIJSetPreallocation(tGmat,0,d_nnz));
7944   PetscCall(MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz));
7945   PetscCall(MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
7946   PetscCall(PetscFree2(d_nnz,o_nnz));
7947   //
7948   PetscCall(PetscMalloc2(maxcols, &AA,maxcols, &AJ));
7949   nnz0 = nnz1 = 0;
7950   for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
7951     for (PetscInt row=0, grow=Istart, ncol_row, jj ; row < nloc; row++,grow++) {
7952       PetscCall(MatGetRow(c,row,&ncols,&idx,&vals));
7953       for (ncol_row=jj=0; jj<ncols; jj++,nnz0++) {
7954         PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
7955         if (PetscRealPart(sv) > vfilter) {
7956           nnz1++;
7957           PetscInt cid = idx[jj] + Istart; //diag
7958           if (c!=a) cid = garray[idx[jj]];
7959           AA[ncol_row] = vals[jj];
7960           AJ[ncol_row] = cid;
7961           ncol_row++;
7962         }
7963       }
7964       PetscCall(MatRestoreRow(c,row,&ncols,&idx,&vals));
7965       PetscCall(MatSetValues(tGmat,1,&grow,ncol_row,AJ,AA,INSERT_VALUES));
7966     }
7967   }
7968   PetscCall(PetscFree2(AA,AJ));
7969   PetscCall(MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY));
7970   PetscCall(MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY));
7971   PetscCall(MatPropagateSymmetryOptions(Gmat,tGmat)); /* Normal Mat options are not relevant ? */
7972 
7973   PetscCall(PetscInfo(tGmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n",
7974                       (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, (double)vfilter,
7975                       (!nloc) ? 1. : (double)nnz0/(double)nloc,MM,(int)maxcols));
7976 
7977   *filteredG = tGmat;
7978   PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view"));
7979   PetscFunctionReturn(0);
7980 }
7981 
7982 /*
7983     Special version for direct calls from Fortran
7984 */
7985 #include <petsc/private/fortranimpl.h>
7986 
7987 /* Change these macros so can be used in void function */
7988 /* Identical to PetscCallVoid, except it assigns to *_ierr */
7989 #undef  PetscCall
7990 #define PetscCall(...) do {                                                                    \
7991     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__;                                              \
7992     if (PetscUnlikely(ierr_msv_mpiaij)) {                                                      \
7993       *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \
7994       return;                                                                                  \
7995     }                                                                                          \
7996   } while (0)
7997 
7998 #undef SETERRQ
7999 #define SETERRQ(comm,ierr,...) do {                                                            \
8000     *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \
8001     return;                                                                                    \
8002   } while (0)
8003 
8004 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8005 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8006 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8007 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8008 #else
8009 #endif
8010 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
8011 {
8012   Mat          mat  = *mmat;
8013   PetscInt     m    = *mm, n = *mn;
8014   InsertMode   addv = *maddv;
8015   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ*)mat->data;
8016   PetscScalar  value;
8017 
8018   MatCheckPreallocated(mat,1);
8019   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8020   else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
8021   {
8022     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
8023     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
8024     PetscBool roworiented = aij->roworiented;
8025 
8026     /* Some Variables required in the macro */
8027     Mat        A                    = aij->A;
8028     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
8029     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
8030     MatScalar  *aa;
8031     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8032     Mat        B                    = aij->B;
8033     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
8034     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
8035     MatScalar  *ba;
8036     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8037      * cannot use "#if defined" inside a macro. */
8038     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8039 
8040     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
8041     PetscInt  nonew = a->nonew;
8042     MatScalar *ap1,*ap2;
8043 
8044     PetscFunctionBegin;
8045     PetscCall(MatSeqAIJGetArray(A,&aa));
8046     PetscCall(MatSeqAIJGetArray(B,&ba));
8047     for (i=0; i<m; i++) {
8048       if (im[i] < 0) continue;
8049       PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
8050       if (im[i] >= rstart && im[i] < rend) {
8051         row      = im[i] - rstart;
8052         lastcol1 = -1;
8053         rp1      = aj + ai[row];
8054         ap1      = aa + ai[row];
8055         rmax1    = aimax[row];
8056         nrow1    = ailen[row];
8057         low1     = 0;
8058         high1    = nrow1;
8059         lastcol2 = -1;
8060         rp2      = bj + bi[row];
8061         ap2      = ba + bi[row];
8062         rmax2    = bimax[row];
8063         nrow2    = bilen[row];
8064         low2     = 0;
8065         high2    = nrow2;
8066 
8067         for (j=0; j<n; j++) {
8068           if (roworiented) value = v[i*n+j];
8069           else value = v[i+j*m];
8070           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8071           if (in[j] >= cstart && in[j] < cend) {
8072             col = in[j] - cstart;
8073             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
8074           } else if (in[j] < 0) continue;
8075           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8076             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
8077             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
8078           } else {
8079             if (mat->was_assembled) {
8080               if (!aij->colmap) {
8081                 PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8082               }
8083 #if defined(PETSC_USE_CTABLE)
8084               PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col));
8085               col--;
8086 #else
8087               col = aij->colmap[in[j]] - 1;
8088 #endif
8089               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
8090                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8091                 col  =  in[j];
8092                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8093                 B        = aij->B;
8094                 b        = (Mat_SeqAIJ*)B->data;
8095                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
8096                 rp2      = bj + bi[row];
8097                 ap2      = ba + bi[row];
8098                 rmax2    = bimax[row];
8099                 nrow2    = bilen[row];
8100                 low2     = 0;
8101                 high2    = nrow2;
8102                 bm       = aij->B->rmap->n;
8103                 ba       = b->a;
8104                 inserted = PETSC_FALSE;
8105               }
8106             } else col = in[j];
8107             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
8108           }
8109         }
8110       } else if (!aij->donotstash) {
8111         if (roworiented) {
8112           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8113         } else {
8114           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8115         }
8116       }
8117     }
8118     PetscCall(MatSeqAIJRestoreArray(A,&aa));
8119     PetscCall(MatSeqAIJRestoreArray(B,&ba));
8120   }
8121   PetscFunctionReturnVoid();
8122 }
8123 
8124 /* Undefining these here since they were redefined from their original definition above! No
8125  * other PETSc functions should be defined past this point, as it is impossible to recover the
8126  * original definitions */
8127 #undef PetscCall
8128 #undef SETERRQ
8129