xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 20ce5b998951c35d19eb3e114ee947b4f05dcd58)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
10 {
11   Mat            B;
12 
13   PetscFunctionBegin;
14   PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&B));
15   PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject)B));
16   PetscCall(MatGetRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done));
17   PetscFunctionReturn(0);
18 }
19 
20 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
21 {
22   Mat            B;
23 
24   PetscFunctionBegin;
25   PetscCall(PetscObjectQuery((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject*)&B));
26   PetscCall(MatRestoreRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done));
27   PetscCall(MatDestroy(&B));
28   PetscFunctionReturn(0);
29 }
30 
31 /*MC
32    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
33 
34    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
35    and MATMPIAIJ otherwise.  As a result, for single process communicators,
36   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
37   for communicators controlling multiple processes.  It is recommended that you call both of
38   the above preallocation routines for simplicity.
39 
40    Options Database Keys:
41 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
42 
43   Developer Notes:
44     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
45    enough exist.
46 
47   Level: beginner
48 
49 .seealso: `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
50 M*/
51 
52 /*MC
53    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
54 
55    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
56    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
57    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
58   for communicators controlling multiple processes.  It is recommended that you call both of
59   the above preallocation routines for simplicity.
60 
61    Options Database Keys:
62 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
63 
64   Level: beginner
65 
66 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
67 M*/
68 
69 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
70 {
71   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
72 
73   PetscFunctionBegin;
74 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
75   A->boundtocpu = flg;
76 #endif
77   if (a->A) PetscCall(MatBindToCPU(a->A,flg));
78   if (a->B) PetscCall(MatBindToCPU(a->B,flg));
79 
80   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
81    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
82    * to differ from the parent matrix. */
83   if (a->lvec) PetscCall(VecBindToCPU(a->lvec,flg));
84   if (a->diag) PetscCall(VecBindToCPU(a->diag,flg));
85 
86   PetscFunctionReturn(0);
87 }
88 
89 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
90 {
91   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
92 
93   PetscFunctionBegin;
94   if (mat->A) {
95     PetscCall(MatSetBlockSizes(mat->A,rbs,cbs));
96     PetscCall(MatSetBlockSizes(mat->B,rbs,1));
97   }
98   PetscFunctionReturn(0);
99 }
100 
101 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
102 {
103   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
104   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
105   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
106   const PetscInt  *ia,*ib;
107   const MatScalar *aa,*bb,*aav,*bav;
108   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
109   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
110 
111   PetscFunctionBegin;
112   *keptrows = NULL;
113 
114   ia   = a->i;
115   ib   = b->i;
116   PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav));
117   PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav));
118   for (i=0; i<m; i++) {
119     na = ia[i+1] - ia[i];
120     nb = ib[i+1] - ib[i];
121     if (!na && !nb) {
122       cnt++;
123       goto ok1;
124     }
125     aa = aav + ia[i];
126     for (j=0; j<na; j++) {
127       if (aa[j] != 0.0) goto ok1;
128     }
129     bb = bav + ib[i];
130     for (j=0; j <nb; j++) {
131       if (bb[j] != 0.0) goto ok1;
132     }
133     cnt++;
134 ok1:;
135   }
136   PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M)));
137   if (!n0rows) {
138     PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
139     PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
140     PetscFunctionReturn(0);
141   }
142   PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows));
143   cnt  = 0;
144   for (i=0; i<m; i++) {
145     na = ia[i+1] - ia[i];
146     nb = ib[i+1] - ib[i];
147     if (!na && !nb) continue;
148     aa = aav + ia[i];
149     for (j=0; j<na;j++) {
150       if (aa[j] != 0.0) {
151         rows[cnt++] = rstart + i;
152         goto ok2;
153       }
154     }
155     bb = bav + ib[i];
156     for (j=0; j<nb; j++) {
157       if (bb[j] != 0.0) {
158         rows[cnt++] = rstart + i;
159         goto ok2;
160       }
161     }
162 ok2:;
163   }
164   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows));
165   PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
166   PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
167   PetscFunctionReturn(0);
168 }
169 
170 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
171 {
172   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
173   PetscBool         cong;
174 
175   PetscFunctionBegin;
176   PetscCall(MatHasCongruentLayouts(Y,&cong));
177   if (Y->assembled && cong) {
178     PetscCall(MatDiagonalSet(aij->A,D,is));
179   } else {
180     PetscCall(MatDiagonalSet_Default(Y,D,is));
181   }
182   PetscFunctionReturn(0);
183 }
184 
185 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
188   PetscInt       i,rstart,nrows,*rows;
189 
190   PetscFunctionBegin;
191   *zrows = NULL;
192   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows));
193   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
194   for (i=0; i<nrows; i++) rows[i] += rstart;
195   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows));
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
200 {
201   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
202   PetscInt          i,m,n,*garray = aij->garray;
203   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
204   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
205   PetscReal         *work;
206   const PetscScalar *dummy;
207 
208   PetscFunctionBegin;
209   PetscCall(MatGetSize(A,&m,&n));
210   PetscCall(PetscCalloc1(n,&work));
211   PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy));
212   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy));
213   PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy));
214   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy));
215   if (type == NORM_2) {
216     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
217       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
218     }
219     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
220       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
221     }
222   } else if (type == NORM_1) {
223     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
224       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
225     }
226     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
227       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
228     }
229   } else if (type == NORM_INFINITY) {
230     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
231       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
232     }
233     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
234       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
235     }
236   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
237     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
238       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
239     }
240     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
241       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
242     }
243   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
244     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
245       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
246     }
247     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
248       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
249     }
250   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
251   if (type == NORM_INFINITY) {
252     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
253   } else {
254     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
255   }
256   PetscCall(PetscFree(work));
257   if (type == NORM_2) {
258     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
259   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
260     for (i=0; i<n; i++) reductions[i] /= m;
261   }
262   PetscFunctionReturn(0);
263 }
264 
265 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
266 {
267   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
268   IS              sis,gis;
269   const PetscInt  *isis,*igis;
270   PetscInt        n,*iis,nsis,ngis,rstart,i;
271 
272   PetscFunctionBegin;
273   PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis));
274   PetscCall(MatFindNonzeroRows(a->B,&gis));
275   PetscCall(ISGetSize(gis,&ngis));
276   PetscCall(ISGetSize(sis,&nsis));
277   PetscCall(ISGetIndices(sis,&isis));
278   PetscCall(ISGetIndices(gis,&igis));
279 
280   PetscCall(PetscMalloc1(ngis+nsis,&iis));
281   PetscCall(PetscArraycpy(iis,igis,ngis));
282   PetscCall(PetscArraycpy(iis+ngis,isis,nsis));
283   n    = ngis + nsis;
284   PetscCall(PetscSortRemoveDupsInt(&n,iis));
285   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
286   for (i=0; i<n; i++) iis[i] += rstart;
287   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is));
288 
289   PetscCall(ISRestoreIndices(sis,&isis));
290   PetscCall(ISRestoreIndices(gis,&igis));
291   PetscCall(ISDestroy(&sis));
292   PetscCall(ISDestroy(&gis));
293   PetscFunctionReturn(0);
294 }
295 
296 /*
297   Local utility routine that creates a mapping from the global column
298 number to the local number in the off-diagonal part of the local
299 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
300 a slightly higher hash table cost; without it it is not scalable (each processor
301 has an order N integer array but is fast to access.
302 */
303 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscInt       n = aij->B->cmap->n,i;
307 
308   PetscFunctionBegin;
309   PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
310 #if defined(PETSC_USE_CTABLE)
311   PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap));
312   for (i=0; i<n; i++) {
313     PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES));
314   }
315 #else
316   PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap));
317   PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt)));
318   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
319 #endif
320   PetscFunctionReturn(0);
321 }
322 
323 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
324 { \
325     if (col <= lastcol1)  low1 = 0;     \
326     else                 high1 = nrow1; \
327     lastcol1 = col;\
328     while (high1-low1 > 5) { \
329       t = (low1+high1)/2; \
330       if (rp1[t] > col) high1 = t; \
331       else              low1  = t; \
332     } \
333       for (_i=low1; _i<high1; _i++) { \
334         if (rp1[_i] > col) break; \
335         if (rp1[_i] == col) { \
336           if (addv == ADD_VALUES) { \
337             ap1[_i] += value;   \
338             /* Not sure LogFlops will slow dow the code or not */ \
339             (void)PetscLogFlops(1.0);   \
340            } \
341           else                    ap1[_i] = value; \
342           goto a_noinsert; \
343         } \
344       }  \
345       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
346       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
347       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
348       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
349       N = nrow1++ - 1; a->nz++; high1++; \
350       /* shift up all the later entries in this row */ \
351       PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\
352       PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\
353       rp1[_i] = col;  \
354       ap1[_i] = value;  \
355       A->nonzerostate++;\
356       a_noinsert: ; \
357       ailen[row] = nrow1; \
358 }
359 
360 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
361   { \
362     if (col <= lastcol2) low2 = 0;                        \
363     else high2 = nrow2;                                   \
364     lastcol2 = col;                                       \
365     while (high2-low2 > 5) {                              \
366       t = (low2+high2)/2;                                 \
367       if (rp2[t] > col) high2 = t;                        \
368       else             low2  = t;                         \
369     }                                                     \
370     for (_i=low2; _i<high2; _i++) {                       \
371       if (rp2[_i] > col) break;                           \
372       if (rp2[_i] == col) {                               \
373         if (addv == ADD_VALUES) {                         \
374           ap2[_i] += value;                               \
375           (void)PetscLogFlops(1.0);                       \
376         }                                                 \
377         else                    ap2[_i] = value;          \
378         goto b_noinsert;                                  \
379       }                                                   \
380     }                                                     \
381     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
382     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
383     PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
384     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
385     N = nrow2++ - 1; b->nz++; high2++;                    \
386     /* shift up all the later entries in this row */      \
387     PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\
388     PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\
389     rp2[_i] = col;                                        \
390     ap2[_i] = value;                                      \
391     B->nonzerostate++;                                    \
392     b_noinsert: ;                                         \
393     bilen[row] = nrow2;                                   \
394   }
395 
396 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
397 {
398   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
399   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
400   PetscInt       l,*garray = mat->garray,diag;
401   PetscScalar    *aa,*ba;
402 
403   PetscFunctionBegin;
404   /* code only works for square matrices A */
405 
406   /* find size of row to the left of the diagonal part */
407   PetscCall(MatGetOwnershipRange(A,&diag,NULL));
408   row  = row - diag;
409   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
410     if (garray[b->j[b->i[row]+l]] > diag) break;
411   }
412   if (l) {
413     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
414     PetscCall(PetscArraycpy(ba+b->i[row],v,l));
415     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
416   }
417 
418   /* diagonal part */
419   if (a->i[row+1]-a->i[row]) {
420     PetscCall(MatSeqAIJGetArray(mat->A,&aa));
421     PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row])));
422     PetscCall(MatSeqAIJRestoreArray(mat->A,&aa));
423   }
424 
425   /* right of diagonal part */
426   if (b->i[row+1]-b->i[row]-l) {
427     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
428     PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l));
429     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
430   }
431   PetscFunctionReturn(0);
432 }
433 
434 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
435 {
436   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
437   PetscScalar    value = 0.0;
438   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
439   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
440   PetscBool      roworiented = aij->roworiented;
441 
442   /* Some Variables required in the macro */
443   Mat        A                    = aij->A;
444   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
445   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
446   PetscBool  ignorezeroentries    = a->ignorezeroentries;
447   Mat        B                    = aij->B;
448   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
449   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
450   MatScalar  *aa,*ba;
451   PetscInt   *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
452   PetscInt   nonew;
453   MatScalar  *ap1,*ap2;
454 
455   PetscFunctionBegin;
456   PetscCall(MatSeqAIJGetArray(A,&aa));
457   PetscCall(MatSeqAIJGetArray(B,&ba));
458   for (i=0; i<m; i++) {
459     if (im[i] < 0) continue;
460     PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
461     if (im[i] >= rstart && im[i] < rend) {
462       row      = im[i] - rstart;
463       lastcol1 = -1;
464       rp1      = aj + ai[row];
465       ap1      = aa + ai[row];
466       rmax1    = aimax[row];
467       nrow1    = ailen[row];
468       low1     = 0;
469       high1    = nrow1;
470       lastcol2 = -1;
471       rp2      = bj + bi[row];
472       ap2      = ba + bi[row];
473       rmax2    = bimax[row];
474       nrow2    = bilen[row];
475       low2     = 0;
476       high2    = nrow2;
477 
478       for (j=0; j<n; j++) {
479         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
480         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
481         if (in[j] >= cstart && in[j] < cend) {
482           col   = in[j] - cstart;
483           nonew = a->nonew;
484           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
485         } else if (in[j] < 0) {
486           continue;
487         } else {
488           PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
489           if (mat->was_assembled) {
490             if (!aij->colmap) {
491               PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
492             }
493 #if defined(PETSC_USE_CTABLE)
494             PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */
495             col--;
496 #else
497             col = aij->colmap[in[j]] - 1;
498 #endif
499             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
500               PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */
501               col  =  in[j];
502               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
503               B        = aij->B;
504               b        = (Mat_SeqAIJ*)B->data;
505               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
506               rp2      = bj + bi[row];
507               ap2      = ba + bi[row];
508               rmax2    = bimax[row];
509               nrow2    = bilen[row];
510               low2     = 0;
511               high2    = nrow2;
512               bm       = aij->B->rmap->n;
513               ba       = b->a;
514             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
515               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
516                 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j]));
517               } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
518             }
519           } else col = in[j];
520           nonew = b->nonew;
521           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
522         }
523       }
524     } else {
525       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
526       if (!aij->donotstash) {
527         mat->assembled = PETSC_FALSE;
528         if (roworiented) {
529           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
530         } else {
531           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
532         }
533       }
534     }
535   }
536   PetscCall(MatSeqAIJRestoreArray(A,&aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
537   PetscCall(MatSeqAIJRestoreArray(B,&ba));
538   PetscFunctionReturn(0);
539 }
540 
541 /*
542     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
543     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
544     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
545 */
546 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
547 {
548   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
549   Mat            A           = aij->A; /* diagonal part of the matrix */
550   Mat            B           = aij->B; /* offdiagonal part of the matrix */
551   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
552   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
553   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
554   PetscInt       *ailen      = a->ilen,*aj = a->j;
555   PetscInt       *bilen      = b->ilen,*bj = b->j;
556   PetscInt       am          = aij->A->rmap->n,j;
557   PetscInt       diag_so_far = 0,dnz;
558   PetscInt       offd_so_far = 0,onz;
559 
560   PetscFunctionBegin;
561   /* Iterate over all rows of the matrix */
562   for (j=0; j<am; j++) {
563     dnz = onz = 0;
564     /*  Iterate over all non-zero columns of the current row */
565     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
566       /* If column is in the diagonal */
567       if (mat_j[col] >= cstart && mat_j[col] < cend) {
568         aj[diag_so_far++] = mat_j[col] - cstart;
569         dnz++;
570       } else { /* off-diagonal entries */
571         bj[offd_so_far++] = mat_j[col];
572         onz++;
573       }
574     }
575     ailen[j] = dnz;
576     bilen[j] = onz;
577   }
578   PetscFunctionReturn(0);
579 }
580 
581 /*
582     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
583     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
584     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
585     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
586     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
587 */
588 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
589 {
590   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
591   Mat            A      = aij->A; /* diagonal part of the matrix */
592   Mat            B      = aij->B; /* offdiagonal part of the matrix */
593   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
594   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
595   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
596   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
597   PetscInt       *ailen = a->ilen,*aj = a->j;
598   PetscInt       *bilen = b->ilen,*bj = b->j;
599   PetscInt       am     = aij->A->rmap->n,j;
600   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
601   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
602   PetscScalar    *aa = a->a,*ba = b->a;
603 
604   PetscFunctionBegin;
605   /* Iterate over all rows of the matrix */
606   for (j=0; j<am; j++) {
607     dnz_row = onz_row = 0;
608     rowstart_offd = full_offd_i[j];
609     rowstart_diag = full_diag_i[j];
610     /*  Iterate over all non-zero columns of the current row */
611     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
612       /* If column is in the diagonal */
613       if (mat_j[col] >= cstart && mat_j[col] < cend) {
614         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
615         aa[rowstart_diag+dnz_row] = mat_a[col];
616         dnz_row++;
617       } else { /* off-diagonal entries */
618         bj[rowstart_offd+onz_row] = mat_j[col];
619         ba[rowstart_offd+onz_row] = mat_a[col];
620         onz_row++;
621       }
622     }
623     ailen[j] = dnz_row;
624     bilen[j] = onz_row;
625   }
626   PetscFunctionReturn(0);
627 }
628 
629 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
630 {
631   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
632   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
633   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* negative row */
638     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= rstart && idxm[i] < rend) {
640       row = idxm[i] - rstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* negative column */
643         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= cstart && idxn[j] < cend) {
645           col  = idxn[j] - cstart;
646           PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j));
647         } else {
648           if (!aij->colmap) {
649             PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
650           }
651 #if defined(PETSC_USE_CTABLE)
652           PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col));
653           col--;
654 #else
655           col = aij->colmap[idxn[j]] - 1;
656 #endif
657           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
658           else {
659             PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j));
660           }
661         }
662       }
663     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
671   PetscInt       nstash,reallocs;
672 
673   PetscFunctionBegin;
674   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
675 
676   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
677   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
678   PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
679   PetscFunctionReturn(0);
680 }
681 
682 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
683 {
684   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
685   PetscMPIInt    n;
686   PetscInt       i,j,rstart,ncols,flg;
687   PetscInt       *row,*col;
688   PetscBool      other_disassembled;
689   PetscScalar    *val;
690 
691   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
692 
693   PetscFunctionBegin;
694   if (!aij->donotstash && !mat->nooffprocentries) {
695     while (1) {
696       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
697       if (!flg) break;
698 
699       for (i=0; i<n;) {
700         /* Now identify the consecutive vals belonging to the same row */
701         for (j=i,rstart=row[j]; j<n; j++) {
702           if (row[j] != rstart) break;
703         }
704         if (j < n) ncols = j-i;
705         else       ncols = n-i;
706         /* Now assemble all these values with a single function call */
707         PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
708         i    = j;
709       }
710     }
711     PetscCall(MatStashScatterEnd_Private(&mat->stash));
712   }
713 #if defined(PETSC_HAVE_DEVICE)
714   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
715   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
716   if (mat->boundtocpu) {
717     PetscCall(MatBindToCPU(aij->A,PETSC_TRUE));
718     PetscCall(MatBindToCPU(aij->B,PETSC_TRUE));
719   }
720 #endif
721   PetscCall(MatAssemblyBegin(aij->A,mode));
722   PetscCall(MatAssemblyEnd(aij->A,mode));
723 
724   /* determine if any processor has disassembled, if so we must
725      also disassemble ourself, in order that we may reassemble. */
726   /*
727      if nonzero structure of submatrix B cannot change then we know that
728      no processor disassembled thus we can skip this stuff
729   */
730   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
731     PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat)));
732     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
733       PetscCall(MatDisAssemble_MPIAIJ(mat));
734     }
735   }
736   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
737     PetscCall(MatSetUpMultiply_MPIAIJ(mat));
738   }
739   PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE));
740 #if defined(PETSC_HAVE_DEVICE)
741   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
742 #endif
743   PetscCall(MatAssemblyBegin(aij->B,mode));
744   PetscCall(MatAssemblyEnd(aij->B,mode));
745 
746   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
747 
748   aij->rowvalues = NULL;
749 
750   PetscCall(VecDestroy(&aij->diag));
751 
752   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
753   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
754     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
755     PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
756   }
757 #if defined(PETSC_HAVE_DEVICE)
758   mat->offloadmask = PETSC_OFFLOAD_BOTH;
759 #endif
760   PetscFunctionReturn(0);
761 }
762 
763 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
764 {
765   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
766 
767   PetscFunctionBegin;
768   PetscCall(MatZeroEntries(l->A));
769   PetscCall(MatZeroEntries(l->B));
770   PetscFunctionReturn(0);
771 }
772 
773 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
774 {
775   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
776   PetscObjectState sA, sB;
777   PetscInt        *lrows;
778   PetscInt         r, len;
779   PetscBool        cong, lch, gch;
780 
781   PetscFunctionBegin;
782   /* get locally owned rows */
783   PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows));
784   PetscCall(MatHasCongruentLayouts(A,&cong));
785   /* fix right hand side if needed */
786   if (x && b) {
787     const PetscScalar *xx;
788     PetscScalar       *bb;
789 
790     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
791     PetscCall(VecGetArrayRead(x, &xx));
792     PetscCall(VecGetArray(b, &bb));
793     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
794     PetscCall(VecRestoreArrayRead(x, &xx));
795     PetscCall(VecRestoreArray(b, &bb));
796   }
797 
798   sA = mat->A->nonzerostate;
799   sB = mat->B->nonzerostate;
800 
801   if (diag != 0.0 && cong) {
802     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
803     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
804   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
805     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
806     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
807     PetscInt   nnwA, nnwB;
808     PetscBool  nnzA, nnzB;
809 
810     nnwA = aijA->nonew;
811     nnwB = aijB->nonew;
812     nnzA = aijA->keepnonzeropattern;
813     nnzB = aijB->keepnonzeropattern;
814     if (!nnzA) {
815       PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
816       aijA->nonew = 0;
817     }
818     if (!nnzB) {
819       PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
820       aijB->nonew = 0;
821     }
822     /* Must zero here before the next loop */
823     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
824     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
825     for (r = 0; r < len; ++r) {
826       const PetscInt row = lrows[r] + A->rmap->rstart;
827       if (row >= A->cmap->N) continue;
828       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
829     }
830     aijA->nonew = nnwA;
831     aijB->nonew = nnwB;
832   } else {
833     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
834     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
835   }
836   PetscCall(PetscFree(lrows));
837   PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
838   PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
839 
840   /* reduce nonzerostate */
841   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
842   PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A)));
843   if (gch) A->nonzerostate++;
844   PetscFunctionReturn(0);
845 }
846 
847 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
848 {
849   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
850   PetscMPIInt       n = A->rmap->n;
851   PetscInt          i,j,r,m,len = 0;
852   PetscInt          *lrows,*owners = A->rmap->range;
853   PetscMPIInt       p = 0;
854   PetscSFNode       *rrows;
855   PetscSF           sf;
856   const PetscScalar *xx;
857   PetscScalar       *bb,*mask,*aij_a;
858   Vec               xmask,lmask;
859   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
860   const PetscInt    *aj, *ii,*ridx;
861   PetscScalar       *aa;
862 
863   PetscFunctionBegin;
864   /* Create SF where leaves are input rows and roots are owned rows */
865   PetscCall(PetscMalloc1(n, &lrows));
866   for (r = 0; r < n; ++r) lrows[r] = -1;
867   PetscCall(PetscMalloc1(N, &rrows));
868   for (r = 0; r < N; ++r) {
869     const PetscInt idx   = rows[r];
870     PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
871     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
872       PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p));
873     }
874     rrows[r].rank  = p;
875     rrows[r].index = rows[r] - owners[p];
876   }
877   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf));
878   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
879   /* Collect flags for rows to be zeroed */
880   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
881   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
882   PetscCall(PetscSFDestroy(&sf));
883   /* Compress and put in row numbers */
884   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
885   /* zero diagonal part of matrix */
886   PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b));
887   /* handle off diagonal part of matrix */
888   PetscCall(MatCreateVecs(A,&xmask,NULL));
889   PetscCall(VecDuplicate(l->lvec,&lmask));
890   PetscCall(VecGetArray(xmask,&bb));
891   for (i=0; i<len; i++) bb[lrows[i]] = 1;
892   PetscCall(VecRestoreArray(xmask,&bb));
893   PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
894   PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
895   PetscCall(VecDestroy(&xmask));
896   if (x && b) { /* this code is buggy when the row and column layout don't match */
897     PetscBool cong;
898 
899     PetscCall(MatHasCongruentLayouts(A,&cong));
900     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
901     PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
902     PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
903     PetscCall(VecGetArrayRead(l->lvec,&xx));
904     PetscCall(VecGetArray(b,&bb));
905   }
906   PetscCall(VecGetArray(lmask,&mask));
907   /* remove zeroed rows of off diagonal matrix */
908   PetscCall(MatSeqAIJGetArray(l->B,&aij_a));
909   ii = aij->i;
910   for (i=0; i<len; i++) {
911     PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]));
912   }
913   /* loop over all elements of off process part of matrix zeroing removed columns*/
914   if (aij->compressedrow.use) {
915     m    = aij->compressedrow.nrows;
916     ii   = aij->compressedrow.i;
917     ridx = aij->compressedrow.rindex;
918     for (i=0; i<m; i++) {
919       n  = ii[i+1] - ii[i];
920       aj = aij->j + ii[i];
921       aa = aij_a + ii[i];
922 
923       for (j=0; j<n; j++) {
924         if (PetscAbsScalar(mask[*aj])) {
925           if (b) bb[*ridx] -= *aa*xx[*aj];
926           *aa = 0.0;
927         }
928         aa++;
929         aj++;
930       }
931       ridx++;
932     }
933   } else { /* do not use compressed row format */
934     m = l->B->rmap->n;
935     for (i=0; i<m; i++) {
936       n  = ii[i+1] - ii[i];
937       aj = aij->j + ii[i];
938       aa = aij_a + ii[i];
939       for (j=0; j<n; j++) {
940         if (PetscAbsScalar(mask[*aj])) {
941           if (b) bb[i] -= *aa*xx[*aj];
942           *aa = 0.0;
943         }
944         aa++;
945         aj++;
946       }
947     }
948   }
949   if (x && b) {
950     PetscCall(VecRestoreArray(b,&bb));
951     PetscCall(VecRestoreArrayRead(l->lvec,&xx));
952   }
953   PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a));
954   PetscCall(VecRestoreArray(lmask,&mask));
955   PetscCall(VecDestroy(&lmask));
956   PetscCall(PetscFree(lrows));
957 
958   /* only change matrix nonzero state if pattern was allowed to be changed */
959   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
960     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
961     PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A)));
962   }
963   PetscFunctionReturn(0);
964 }
965 
966 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
967 {
968   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
969   PetscInt       nt;
970   VecScatter     Mvctx = a->Mvctx;
971 
972   PetscFunctionBegin;
973   PetscCall(VecGetLocalSize(xx,&nt));
974   PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt);
975   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
976   PetscCall((*a->A->ops->mult)(a->A,xx,yy));
977   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
978   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy));
979   PetscFunctionReturn(0);
980 }
981 
982 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
983 {
984   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
985 
986   PetscFunctionBegin;
987   PetscCall(MatMultDiagonalBlock(a->A,bb,xx));
988   PetscFunctionReturn(0);
989 }
990 
991 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
992 {
993   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
994   VecScatter     Mvctx = a->Mvctx;
995 
996   PetscFunctionBegin;
997   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
998   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz));
999   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
1000   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz));
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1005 {
1006   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1007 
1008   PetscFunctionBegin;
1009   /* do nondiagonal part */
1010   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1011   /* do local part */
1012   PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy));
1013   /* add partial results together */
1014   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1015   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1020 {
1021   MPI_Comm       comm;
1022   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1023   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1024   IS             Me,Notme;
1025   PetscInt       M,N,first,last,*notme,i;
1026   PetscBool      lf;
1027   PetscMPIInt    size;
1028 
1029   PetscFunctionBegin;
1030   /* Easy test: symmetric diagonal block */
1031   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1032   PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf));
1033   PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat)));
1034   if (!*f) PetscFunctionReturn(0);
1035   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
1036   PetscCallMPI(MPI_Comm_size(comm,&size));
1037   if (size == 1) PetscFunctionReturn(0);
1038 
1039   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1040   PetscCall(MatGetSize(Amat,&M,&N));
1041   PetscCall(MatGetOwnershipRange(Amat,&first,&last));
1042   PetscCall(PetscMalloc1(N-last+first,&notme));
1043   for (i=0; i<first; i++) notme[i] = i;
1044   for (i=last; i<M; i++) notme[i-last+first] = i;
1045   PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme));
1046   PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me));
1047   PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs));
1048   Aoff = Aoffs[0];
1049   PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs));
1050   Boff = Boffs[0];
1051   PetscCall(MatIsTranspose(Aoff,Boff,tol,f));
1052   PetscCall(MatDestroyMatrices(1,&Aoffs));
1053   PetscCall(MatDestroyMatrices(1,&Boffs));
1054   PetscCall(ISDestroy(&Me));
1055   PetscCall(ISDestroy(&Notme));
1056   PetscCall(PetscFree(notme));
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1061 {
1062   PetscFunctionBegin;
1063   PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f));
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1068 {
1069   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1070 
1071   PetscFunctionBegin;
1072   /* do nondiagonal part */
1073   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1074   /* do local part */
1075   PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz));
1076   /* add partial results together */
1077   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1078   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 /*
1083   This only works correctly for square matrices where the subblock A->A is the
1084    diagonal block
1085 */
1086 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1087 {
1088   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1089 
1090   PetscFunctionBegin;
1091   PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1092   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1093   PetscCall(MatGetDiagonal(a->A,v));
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1098 {
1099   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1100 
1101   PetscFunctionBegin;
1102   PetscCall(MatScale(a->A,aa));
1103   PetscCall(MatScale(a->B,aa));
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1108 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1109 {
1110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1111 
1112   PetscFunctionBegin;
1113   PetscCall(PetscSFDestroy(&aij->coo_sf));
1114   PetscCall(PetscFree(aij->Aperm1));
1115   PetscCall(PetscFree(aij->Bperm1));
1116   PetscCall(PetscFree(aij->Ajmap1));
1117   PetscCall(PetscFree(aij->Bjmap1));
1118 
1119   PetscCall(PetscFree(aij->Aimap2));
1120   PetscCall(PetscFree(aij->Bimap2));
1121   PetscCall(PetscFree(aij->Aperm2));
1122   PetscCall(PetscFree(aij->Bperm2));
1123   PetscCall(PetscFree(aij->Ajmap2));
1124   PetscCall(PetscFree(aij->Bjmap2));
1125 
1126   PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf));
1127   PetscCall(PetscFree(aij->Cperm1));
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1132 {
1133   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1134 
1135   PetscFunctionBegin;
1136 #if defined(PETSC_USE_LOG)
1137   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1138 #endif
1139   PetscCall(MatStashDestroy_Private(&mat->stash));
1140   PetscCall(VecDestroy(&aij->diag));
1141   PetscCall(MatDestroy(&aij->A));
1142   PetscCall(MatDestroy(&aij->B));
1143 #if defined(PETSC_USE_CTABLE)
1144   PetscCall(PetscTableDestroy(&aij->colmap));
1145 #else
1146   PetscCall(PetscFree(aij->colmap));
1147 #endif
1148   PetscCall(PetscFree(aij->garray));
1149   PetscCall(VecDestroy(&aij->lvec));
1150   PetscCall(VecScatterDestroy(&aij->Mvctx));
1151   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
1152   PetscCall(PetscFree(aij->ld));
1153 
1154   /* Free COO */
1155   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
1156 
1157   PetscCall(PetscFree(mat->data));
1158 
1159   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1160   PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL));
1161 
1162   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
1163   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
1164   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
1165   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL));
1166   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL));
1167   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL));
1168   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL));
1169   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL));
1170   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL));
1171   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL));
1172 #if defined(PETSC_HAVE_CUDA)
1173   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL));
1174 #endif
1175 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1176   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL));
1177 #endif
1178   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
1179 #if defined(PETSC_HAVE_ELEMENTAL)
1180   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL));
1181 #endif
1182 #if defined(PETSC_HAVE_SCALAPACK)
1183   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL));
1184 #endif
1185 #if defined(PETSC_HAVE_HYPRE)
1186   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL));
1187   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL));
1188 #endif
1189   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1190   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL));
1191   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL));
1192   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL));
1193   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL));
1194   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL));
1195 #if defined(PETSC_HAVE_MKL_SPARSE)
1196   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL));
1197 #endif
1198   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL));
1199   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1200   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL));
1201   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL));
1202   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL));
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1207 {
1208   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1209   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1210   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1211   const PetscInt    *garray = aij->garray;
1212   const PetscScalar *aa,*ba;
1213   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1214   PetscInt          *rowlens;
1215   PetscInt          *colidxs;
1216   PetscScalar       *matvals;
1217 
1218   PetscFunctionBegin;
1219   PetscCall(PetscViewerSetUp(viewer));
1220 
1221   M  = mat->rmap->N;
1222   N  = mat->cmap->N;
1223   m  = mat->rmap->n;
1224   rs = mat->rmap->rstart;
1225   cs = mat->cmap->rstart;
1226   nz = A->nz + B->nz;
1227 
1228   /* write matrix header */
1229   header[0] = MAT_FILE_CLASSID;
1230   header[1] = M; header[2] = N; header[3] = nz;
1231   PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat)));
1232   PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT));
1233 
1234   /* fill in and store row lengths  */
1235   PetscCall(PetscMalloc1(m,&rowlens));
1236   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1237   PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT));
1238   PetscCall(PetscFree(rowlens));
1239 
1240   /* fill in and store column indices */
1241   PetscCall(PetscMalloc1(nz,&colidxs));
1242   for (cnt=0, i=0; i<m; i++) {
1243     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1244       if (garray[B->j[jb]] > cs) break;
1245       colidxs[cnt++] = garray[B->j[jb]];
1246     }
1247     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1248       colidxs[cnt++] = A->j[ja] + cs;
1249     for (; jb<B->i[i+1]; jb++)
1250       colidxs[cnt++] = garray[B->j[jb]];
1251   }
1252   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1253   PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
1254   PetscCall(PetscFree(colidxs));
1255 
1256   /* fill in and store nonzero values */
1257   PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa));
1258   PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba));
1259   PetscCall(PetscMalloc1(nz,&matvals));
1260   for (cnt=0, i=0; i<m; i++) {
1261     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1262       if (garray[B->j[jb]] > cs) break;
1263       matvals[cnt++] = ba[jb];
1264     }
1265     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1266       matvals[cnt++] = aa[ja];
1267     for (; jb<B->i[i+1]; jb++)
1268       matvals[cnt++] = ba[jb];
1269   }
1270   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa));
1271   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba));
1272   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1273   PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
1274   PetscCall(PetscFree(matvals));
1275 
1276   /* write block size option to the viewer's .info file */
1277   PetscCall(MatView_Binary_BlockSizes(mat,viewer));
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 #include <petscdraw.h>
1282 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1283 {
1284   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1285   PetscMPIInt       rank = aij->rank,size = aij->size;
1286   PetscBool         isdraw,iascii,isbinary;
1287   PetscViewer       sviewer;
1288   PetscViewerFormat format;
1289 
1290   PetscFunctionBegin;
1291   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1292   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1293   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1294   if (iascii) {
1295     PetscCall(PetscViewerGetFormat(viewer,&format));
1296     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1297       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1298       PetscCall(PetscMalloc1(size,&nz));
1299       PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat)));
1300       for (i=0; i<(PetscInt)size; i++) {
1301         nmax = PetscMax(nmax,nz[i]);
1302         nmin = PetscMin(nmin,nz[i]);
1303         navg += nz[i];
1304       }
1305       PetscCall(PetscFree(nz));
1306       navg = navg/size;
1307       PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n",nmin,navg,nmax));
1308       PetscFunctionReturn(0);
1309     }
1310     PetscCall(PetscViewerGetFormat(viewer,&format));
1311     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1312       MatInfo   info;
1313       PetscInt *inodes=NULL;
1314 
1315       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
1316       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
1317       PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL));
1318       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1319       if (!inodes) {
1320         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n",
1321                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1322       } else {
1323         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n",
1324                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1325       }
1326       PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info));
1327       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1328       PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info));
1329       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1330       PetscCall(PetscViewerFlush(viewer));
1331       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1332       PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
1333       PetscCall(VecScatterView(aij->Mvctx,viewer));
1334       PetscFunctionReturn(0);
1335     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1336       PetscInt inodecount,inodelimit,*inodes;
1337       PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit));
1338       if (inodes) {
1339         PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit));
1340       } else {
1341         PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n"));
1342       }
1343       PetscFunctionReturn(0);
1344     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1345       PetscFunctionReturn(0);
1346     }
1347   } else if (isbinary) {
1348     if (size == 1) {
1349       PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1350       PetscCall(MatView(aij->A,viewer));
1351     } else {
1352       PetscCall(MatView_MPIAIJ_Binary(mat,viewer));
1353     }
1354     PetscFunctionReturn(0);
1355   } else if (iascii && size == 1) {
1356     PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1357     PetscCall(MatView(aij->A,viewer));
1358     PetscFunctionReturn(0);
1359   } else if (isdraw) {
1360     PetscDraw draw;
1361     PetscBool isnull;
1362     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
1363     PetscCall(PetscDrawIsNull(draw,&isnull));
1364     if (isnull) PetscFunctionReturn(0);
1365   }
1366 
1367   { /* assemble the entire matrix onto first processor */
1368     Mat A = NULL, Av;
1369     IS  isrow,iscol;
1370 
1371     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1372     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1373     PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A));
1374     PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL));
1375 /*  The commented code uses MatCreateSubMatrices instead */
1376 /*
1377     Mat *AA, A = NULL, Av;
1378     IS  isrow,iscol;
1379 
1380     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1381     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1382     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1383     if (rank == 0) {
1384        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1385        A    = AA[0];
1386        Av   = AA[0];
1387     }
1388     PetscCall(MatDestroySubMatrices(1,&AA));
1389 */
1390     PetscCall(ISDestroy(&iscol));
1391     PetscCall(ISDestroy(&isrow));
1392     /*
1393        Everyone has to call to draw the matrix since the graphics waits are
1394        synchronized across all processors that share the PetscDraw object
1395     */
1396     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1397     if (rank == 0) {
1398       if (((PetscObject)mat)->name) {
1399         PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name));
1400       }
1401       PetscCall(MatView_SeqAIJ(Av,sviewer));
1402     }
1403     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1404     PetscCall(PetscViewerFlush(viewer));
1405     PetscCall(MatDestroy(&A));
1406   }
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1411 {
1412   PetscBool      iascii,isdraw,issocket,isbinary;
1413 
1414   PetscFunctionBegin;
1415   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1416   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1417   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1418   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
1419   if (iascii || isdraw || isbinary || issocket) {
1420     PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer));
1421   }
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1426 {
1427   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1428   Vec            bb1 = NULL;
1429   PetscBool      hasop;
1430 
1431   PetscFunctionBegin;
1432   if (flag == SOR_APPLY_UPPER) {
1433     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1434     PetscFunctionReturn(0);
1435   }
1436 
1437   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1438     PetscCall(VecDuplicate(bb,&bb1));
1439   }
1440 
1441   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1442     if (flag & SOR_ZERO_INITIAL_GUESS) {
1443       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1444       its--;
1445     }
1446 
1447     while (its--) {
1448       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1449       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1450 
1451       /* update rhs: bb1 = bb - B*x */
1452       PetscCall(VecScale(mat->lvec,-1.0));
1453       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1454 
1455       /* local sweep */
1456       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx));
1457     }
1458   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1459     if (flag & SOR_ZERO_INITIAL_GUESS) {
1460       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1461       its--;
1462     }
1463     while (its--) {
1464       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1465       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1466 
1467       /* update rhs: bb1 = bb - B*x */
1468       PetscCall(VecScale(mat->lvec,-1.0));
1469       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1470 
1471       /* local sweep */
1472       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx));
1473     }
1474   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1475     if (flag & SOR_ZERO_INITIAL_GUESS) {
1476       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1477       its--;
1478     }
1479     while (its--) {
1480       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1481       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1482 
1483       /* update rhs: bb1 = bb - B*x */
1484       PetscCall(VecScale(mat->lvec,-1.0));
1485       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1486 
1487       /* local sweep */
1488       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx));
1489     }
1490   } else if (flag & SOR_EISENSTAT) {
1491     Vec xx1;
1492 
1493     PetscCall(VecDuplicate(bb,&xx1));
1494     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx));
1495 
1496     PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1497     PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1498     if (!mat->diag) {
1499       PetscCall(MatCreateVecs(matin,&mat->diag,NULL));
1500       PetscCall(MatGetDiagonal(matin,mat->diag));
1501     }
1502     PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop));
1503     if (hasop) {
1504       PetscCall(MatMultDiagonalBlock(matin,xx,bb1));
1505     } else {
1506       PetscCall(VecPointwiseMult(bb1,mat->diag,xx));
1507     }
1508     PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb));
1509 
1510     PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1));
1511 
1512     /* local sweep */
1513     PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1));
1514     PetscCall(VecAXPY(xx,1.0,xx1));
1515     PetscCall(VecDestroy(&xx1));
1516   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1517 
1518   PetscCall(VecDestroy(&bb1));
1519 
1520   matin->factorerrortype = mat->A->factorerrortype;
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1525 {
1526   Mat            aA,aB,Aperm;
1527   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1528   PetscScalar    *aa,*ba;
1529   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1530   PetscSF        rowsf,sf;
1531   IS             parcolp = NULL;
1532   PetscBool      done;
1533 
1534   PetscFunctionBegin;
1535   PetscCall(MatGetLocalSize(A,&m,&n));
1536   PetscCall(ISGetIndices(rowp,&rwant));
1537   PetscCall(ISGetIndices(colp,&cwant));
1538   PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest));
1539 
1540   /* Invert row permutation to find out where my rows should go */
1541   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf));
1542   PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant));
1543   PetscCall(PetscSFSetFromOptions(rowsf));
1544   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1545   PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1546   PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1547 
1548   /* Invert column permutation to find out where my columns should go */
1549   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1550   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant));
1551   PetscCall(PetscSFSetFromOptions(sf));
1552   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1553   PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1554   PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1555   PetscCall(PetscSFDestroy(&sf));
1556 
1557   PetscCall(ISRestoreIndices(rowp,&rwant));
1558   PetscCall(ISRestoreIndices(colp,&cwant));
1559   PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols));
1560 
1561   /* Find out where my gcols should go */
1562   PetscCall(MatGetSize(aB,NULL,&ng));
1563   PetscCall(PetscMalloc1(ng,&gcdest));
1564   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1565   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols));
1566   PetscCall(PetscSFSetFromOptions(sf));
1567   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1568   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1569   PetscCall(PetscSFDestroy(&sf));
1570 
1571   PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz));
1572   PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1573   PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1574   for (i=0; i<m; i++) {
1575     PetscInt    row = rdest[i];
1576     PetscMPIInt rowner;
1577     PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner));
1578     for (j=ai[i]; j<ai[i+1]; j++) {
1579       PetscInt    col = cdest[aj[j]];
1580       PetscMPIInt cowner;
1581       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */
1582       if (rowner == cowner) dnnz[i]++;
1583       else onnz[i]++;
1584     }
1585     for (j=bi[i]; j<bi[i+1]; j++) {
1586       PetscInt    col = gcdest[bj[j]];
1587       PetscMPIInt cowner;
1588       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner));
1589       if (rowner == cowner) dnnz[i]++;
1590       else onnz[i]++;
1591     }
1592   }
1593   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1594   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1595   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1596   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1597   PetscCall(PetscSFDestroy(&rowsf));
1598 
1599   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm));
1600   PetscCall(MatSeqAIJGetArray(aA,&aa));
1601   PetscCall(MatSeqAIJGetArray(aB,&ba));
1602   for (i=0; i<m; i++) {
1603     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1604     PetscInt j0,rowlen;
1605     rowlen = ai[i+1] - ai[i];
1606     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1607       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1608       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES));
1609     }
1610     rowlen = bi[i+1] - bi[i];
1611     for (j0=j=0; j<rowlen; j0=j) {
1612       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1613       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES));
1614     }
1615   }
1616   PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY));
1617   PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY));
1618   PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1619   PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1620   PetscCall(MatSeqAIJRestoreArray(aA,&aa));
1621   PetscCall(MatSeqAIJRestoreArray(aB,&ba));
1622   PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz));
1623   PetscCall(PetscFree3(work,rdest,cdest));
1624   PetscCall(PetscFree(gcdest));
1625   if (parcolp) PetscCall(ISDestroy(&colp));
1626   *B = Aperm;
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1631 {
1632   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1633 
1634   PetscFunctionBegin;
1635   PetscCall(MatGetSize(aij->B,NULL,nghosts));
1636   if (ghosts) *ghosts = aij->garray;
1637   PetscFunctionReturn(0);
1638 }
1639 
1640 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1641 {
1642   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1643   Mat            A    = mat->A,B = mat->B;
1644   PetscLogDouble isend[5],irecv[5];
1645 
1646   PetscFunctionBegin;
1647   info->block_size = 1.0;
1648   PetscCall(MatGetInfo(A,MAT_LOCAL,info));
1649 
1650   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1651   isend[3] = info->memory;  isend[4] = info->mallocs;
1652 
1653   PetscCall(MatGetInfo(B,MAT_LOCAL,info));
1654 
1655   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1656   isend[3] += info->memory;  isend[4] += info->mallocs;
1657   if (flag == MAT_LOCAL) {
1658     info->nz_used      = isend[0];
1659     info->nz_allocated = isend[1];
1660     info->nz_unneeded  = isend[2];
1661     info->memory       = isend[3];
1662     info->mallocs      = isend[4];
1663   } else if (flag == MAT_GLOBAL_MAX) {
1664     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
1665 
1666     info->nz_used      = irecv[0];
1667     info->nz_allocated = irecv[1];
1668     info->nz_unneeded  = irecv[2];
1669     info->memory       = irecv[3];
1670     info->mallocs      = irecv[4];
1671   } else if (flag == MAT_GLOBAL_SUM) {
1672     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
1673 
1674     info->nz_used      = irecv[0];
1675     info->nz_allocated = irecv[1];
1676     info->nz_unneeded  = irecv[2];
1677     info->memory       = irecv[3];
1678     info->mallocs      = irecv[4];
1679   }
1680   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1681   info->fill_ratio_needed = 0;
1682   info->factor_mallocs    = 0;
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1687 {
1688   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1689 
1690   PetscFunctionBegin;
1691   switch (op) {
1692   case MAT_NEW_NONZERO_LOCATIONS:
1693   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1694   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1695   case MAT_KEEP_NONZERO_PATTERN:
1696   case MAT_NEW_NONZERO_LOCATION_ERR:
1697   case MAT_USE_INODES:
1698   case MAT_IGNORE_ZERO_ENTRIES:
1699   case MAT_FORM_EXPLICIT_TRANSPOSE:
1700     MatCheckPreallocated(A,1);
1701     PetscCall(MatSetOption(a->A,op,flg));
1702     PetscCall(MatSetOption(a->B,op,flg));
1703     break;
1704   case MAT_ROW_ORIENTED:
1705     MatCheckPreallocated(A,1);
1706     a->roworiented = flg;
1707 
1708     PetscCall(MatSetOption(a->A,op,flg));
1709     PetscCall(MatSetOption(a->B,op,flg));
1710     break;
1711   case MAT_FORCE_DIAGONAL_ENTRIES:
1712   case MAT_SORTED_FULL:
1713     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1714     break;
1715   case MAT_IGNORE_OFF_PROC_ENTRIES:
1716     a->donotstash = flg;
1717     break;
1718   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1719   case MAT_SPD:
1720   case MAT_SYMMETRIC:
1721   case MAT_STRUCTURALLY_SYMMETRIC:
1722   case MAT_HERMITIAN:
1723   case MAT_SYMMETRY_ETERNAL:
1724     break;
1725   case MAT_SUBMAT_SINGLEIS:
1726     A->submat_singleis = flg;
1727     break;
1728   case MAT_STRUCTURE_ONLY:
1729     /* The option is handled directly by MatSetOption() */
1730     break;
1731   default:
1732     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1733   }
1734   PetscFunctionReturn(0);
1735 }
1736 
1737 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1738 {
1739   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1740   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1741   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1742   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1743   PetscInt       *cmap,*idx_p;
1744 
1745   PetscFunctionBegin;
1746   PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1747   mat->getrowactive = PETSC_TRUE;
1748 
1749   if (!mat->rowvalues && (idx || v)) {
1750     /*
1751         allocate enough space to hold information from the longest row.
1752     */
1753     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1754     PetscInt   max = 1,tmp;
1755     for (i=0; i<matin->rmap->n; i++) {
1756       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1757       if (max < tmp) max = tmp;
1758     }
1759     PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices));
1760   }
1761 
1762   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1763   lrow = row - rstart;
1764 
1765   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1766   if (!v)   {pvA = NULL; pvB = NULL;}
1767   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1768   PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
1769   PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1770   nztot = nzA + nzB;
1771 
1772   cmap = mat->garray;
1773   if (v  || idx) {
1774     if (nztot) {
1775       /* Sort by increasing column numbers, assuming A and B already sorted */
1776       PetscInt imark = -1;
1777       if (v) {
1778         *v = v_p = mat->rowvalues;
1779         for (i=0; i<nzB; i++) {
1780           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1781           else break;
1782         }
1783         imark = i;
1784         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1785         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1786       }
1787       if (idx) {
1788         *idx = idx_p = mat->rowindices;
1789         if (imark > -1) {
1790           for (i=0; i<imark; i++) {
1791             idx_p[i] = cmap[cworkB[i]];
1792           }
1793         } else {
1794           for (i=0; i<nzB; i++) {
1795             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1796             else break;
1797           }
1798           imark = i;
1799         }
1800         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1801         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1802       }
1803     } else {
1804       if (idx) *idx = NULL;
1805       if (v)   *v   = NULL;
1806     }
1807   }
1808   *nz  = nztot;
1809   PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
1810   PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1815 {
1816   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1817 
1818   PetscFunctionBegin;
1819   PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1820   aij->getrowactive = PETSC_FALSE;
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1825 {
1826   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ*)mat->data;
1827   Mat_SeqAIJ      *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1828   PetscInt        i,j,cstart = mat->cmap->rstart;
1829   PetscReal       sum = 0.0;
1830   const MatScalar *v,*amata,*bmata;
1831 
1832   PetscFunctionBegin;
1833   if (aij->size == 1) {
1834     PetscCall(MatNorm(aij->A,type,norm));
1835   } else {
1836     PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata));
1837     PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata));
1838     if (type == NORM_FROBENIUS) {
1839       v = amata;
1840       for (i=0; i<amat->nz; i++) {
1841         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1842       }
1843       v = bmata;
1844       for (i=0; i<bmat->nz; i++) {
1845         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1846       }
1847       PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1848       *norm = PetscSqrtReal(*norm);
1849       PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz));
1850     } else if (type == NORM_1) { /* max column norm */
1851       PetscReal *tmp,*tmp2;
1852       PetscInt  *jj,*garray = aij->garray;
1853       PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp));
1854       PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2));
1855       *norm = 0.0;
1856       v     = amata; jj = amat->j;
1857       for (j=0; j<amat->nz; j++) {
1858         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1859       }
1860       v = bmata; jj = bmat->j;
1861       for (j=0; j<bmat->nz; j++) {
1862         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1863       }
1864       PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1865       for (j=0; j<mat->cmap->N; j++) {
1866         if (tmp2[j] > *norm) *norm = tmp2[j];
1867       }
1868       PetscCall(PetscFree(tmp));
1869       PetscCall(PetscFree(tmp2));
1870       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1871     } else if (type == NORM_INFINITY) { /* max row norm */
1872       PetscReal ntemp = 0.0;
1873       for (j=0; j<aij->A->rmap->n; j++) {
1874         v   = amata + amat->i[j];
1875         sum = 0.0;
1876         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1877           sum += PetscAbsScalar(*v); v++;
1878         }
1879         v = bmata + bmat->i[j];
1880         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1881           sum += PetscAbsScalar(*v); v++;
1882         }
1883         if (sum > ntemp) ntemp = sum;
1884       }
1885       PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat)));
1886       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1887     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1888     PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata));
1889     PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata));
1890   }
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1895 {
1896   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1897   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1898   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1899   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1900   Mat             B,A_diag,*B_diag;
1901   const MatScalar *pbv,*bv;
1902 
1903   PetscFunctionBegin;
1904   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1905   ai = Aloc->i; aj = Aloc->j;
1906   bi = Bloc->i; bj = Bloc->j;
1907   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1908     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1909     PetscSFNode          *oloc;
1910     PETSC_UNUSED PetscSF sf;
1911 
1912     PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc));
1913     /* compute d_nnz for preallocation */
1914     PetscCall(PetscArrayzero(d_nnz,na));
1915     for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++;
1916     /* compute local off-diagonal contributions */
1917     PetscCall(PetscArrayzero(g_nnz,nb));
1918     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1919     /* map those to global */
1920     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1921     PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray));
1922     PetscCall(PetscSFSetFromOptions(sf));
1923     PetscCall(PetscArrayzero(o_nnz,na));
1924     PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1925     PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1926     PetscCall(PetscSFDestroy(&sf));
1927 
1928     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
1929     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
1930     PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs)));
1931     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
1932     PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
1933     PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc));
1934   } else {
1935     B    = *matout;
1936     PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
1937   }
1938 
1939   b           = (Mat_MPIAIJ*)B->data;
1940   A_diag      = a->A;
1941   B_diag      = &b->A;
1942   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1943   A_diag_ncol = A_diag->cmap->N;
1944   B_diag_ilen = sub_B_diag->ilen;
1945   B_diag_i    = sub_B_diag->i;
1946 
1947   /* Set ilen for diagonal of B */
1948   for (i=0; i<A_diag_ncol; i++) {
1949     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1950   }
1951 
1952   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1953   very quickly (=without using MatSetValues), because all writes are local. */
1954   PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag));
1955 
1956   /* copy over the B part */
1957   PetscCall(PetscMalloc1(bi[mb],&cols));
1958   PetscCall(MatSeqAIJGetArrayRead(a->B,&bv));
1959   pbv  = bv;
1960   row  = A->rmap->rstart;
1961   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1962   cols_tmp = cols;
1963   for (i=0; i<mb; i++) {
1964     ncol = bi[i+1]-bi[i];
1965     PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES));
1966     row++;
1967     pbv += ncol; cols_tmp += ncol;
1968   }
1969   PetscCall(PetscFree(cols));
1970   PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv));
1971 
1972   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
1973   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
1974   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1975     *matout = B;
1976   } else {
1977     PetscCall(MatHeaderMerge(A,&B));
1978   }
1979   PetscFunctionReturn(0);
1980 }
1981 
1982 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1983 {
1984   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1985   Mat            a    = aij->A,b = aij->B;
1986   PetscInt       s1,s2,s3;
1987 
1988   PetscFunctionBegin;
1989   PetscCall(MatGetLocalSize(mat,&s2,&s3));
1990   if (rr) {
1991     PetscCall(VecGetLocalSize(rr,&s1));
1992     PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1993     /* Overlap communication with computation. */
1994     PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1995   }
1996   if (ll) {
1997     PetscCall(VecGetLocalSize(ll,&s1));
1998     PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1999     PetscCall((*b->ops->diagonalscale)(b,ll,NULL));
2000   }
2001   /* scale  the diagonal block */
2002   PetscCall((*a->ops->diagonalscale)(a,ll,rr));
2003 
2004   if (rr) {
2005     /* Do a scatter end and then right scale the off-diagonal block */
2006     PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
2007     PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec));
2008   }
2009   PetscFunctionReturn(0);
2010 }
2011 
2012 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2013 {
2014   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2015 
2016   PetscFunctionBegin;
2017   PetscCall(MatSetUnfactored(a->A));
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2022 {
2023   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2024   Mat            a,b,c,d;
2025   PetscBool      flg;
2026 
2027   PetscFunctionBegin;
2028   a = matA->A; b = matA->B;
2029   c = matB->A; d = matB->B;
2030 
2031   PetscCall(MatEqual(a,c,&flg));
2032   if (flg) {
2033     PetscCall(MatEqual(b,d,&flg));
2034   }
2035   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2036   PetscFunctionReturn(0);
2037 }
2038 
2039 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2040 {
2041   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2042   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2043 
2044   PetscFunctionBegin;
2045   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2046   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2047     /* because of the column compression in the off-processor part of the matrix a->B,
2048        the number of columns in a->B and b->B may be different, hence we cannot call
2049        the MatCopy() directly on the two parts. If need be, we can provide a more
2050        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2051        then copying the submatrices */
2052     PetscCall(MatCopy_Basic(A,B,str));
2053   } else {
2054     PetscCall(MatCopy(a->A,b->A,str));
2055     PetscCall(MatCopy(a->B,b->B,str));
2056   }
2057   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2058   PetscFunctionReturn(0);
2059 }
2060 
2061 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2062 {
2063   PetscFunctionBegin;
2064   PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 /*
2069    Computes the number of nonzeros per row needed for preallocation when X and Y
2070    have different nonzero structure.
2071 */
2072 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2073 {
2074   PetscInt       i,j,k,nzx,nzy;
2075 
2076   PetscFunctionBegin;
2077   /* Set the number of nonzeros in the new matrix */
2078   for (i=0; i<m; i++) {
2079     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2080     nzx = xi[i+1] - xi[i];
2081     nzy = yi[i+1] - yi[i];
2082     nnz[i] = 0;
2083     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2084       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2085       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2086       nnz[i]++;
2087     }
2088     for (; k<nzy; k++) nnz[i]++;
2089   }
2090   PetscFunctionReturn(0);
2091 }
2092 
2093 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2094 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2095 {
2096   PetscInt       m = Y->rmap->N;
2097   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2098   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2099 
2100   PetscFunctionBegin;
2101   PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz));
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2106 {
2107   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2108 
2109   PetscFunctionBegin;
2110   if (str == SAME_NONZERO_PATTERN) {
2111     PetscCall(MatAXPY(yy->A,a,xx->A,str));
2112     PetscCall(MatAXPY(yy->B,a,xx->B,str));
2113   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2114     PetscCall(MatAXPY_Basic(Y,a,X,str));
2115   } else {
2116     Mat      B;
2117     PetscInt *nnz_d,*nnz_o;
2118 
2119     PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d));
2120     PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o));
2121     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
2122     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
2123     PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap));
2124     PetscCall(MatSetType(B,((PetscObject)Y)->type_name));
2125     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d));
2126     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
2127     PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o));
2128     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
2129     PetscCall(MatHeaderMerge(Y,&B));
2130     PetscCall(PetscFree(nnz_d));
2131     PetscCall(PetscFree(nnz_o));
2132   }
2133   PetscFunctionReturn(0);
2134 }
2135 
2136 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2137 
2138 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2139 {
2140   PetscFunctionBegin;
2141   if (PetscDefined(USE_COMPLEX)) {
2142     Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2143 
2144     PetscCall(MatConjugate_SeqAIJ(aij->A));
2145     PetscCall(MatConjugate_SeqAIJ(aij->B));
2146   }
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2151 {
2152   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2153 
2154   PetscFunctionBegin;
2155   PetscCall(MatRealPart(a->A));
2156   PetscCall(MatRealPart(a->B));
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2161 {
2162   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2163 
2164   PetscFunctionBegin;
2165   PetscCall(MatImaginaryPart(a->A));
2166   PetscCall(MatImaginaryPart(a->B));
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2171 {
2172   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2173   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2174   PetscScalar       *va,*vv;
2175   Vec               vB,vA;
2176   const PetscScalar *vb;
2177 
2178   PetscFunctionBegin;
2179   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA));
2180   PetscCall(MatGetRowMaxAbs(a->A,vA,idx));
2181 
2182   PetscCall(VecGetArrayWrite(vA,&va));
2183   if (idx) {
2184     for (i=0; i<m; i++) {
2185       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2186     }
2187   }
2188 
2189   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB));
2190   PetscCall(PetscMalloc1(m,&idxb));
2191   PetscCall(MatGetRowMaxAbs(a->B,vB,idxb));
2192 
2193   PetscCall(VecGetArrayWrite(v,&vv));
2194   PetscCall(VecGetArrayRead(vB,&vb));
2195   for (i=0; i<m; i++) {
2196     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2197       vv[i] = vb[i];
2198       if (idx) idx[i] = a->garray[idxb[i]];
2199     } else {
2200       vv[i] = va[i];
2201       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2202         idx[i] = a->garray[idxb[i]];
2203     }
2204   }
2205   PetscCall(VecRestoreArrayWrite(vA,&vv));
2206   PetscCall(VecRestoreArrayWrite(vA,&va));
2207   PetscCall(VecRestoreArrayRead(vB,&vb));
2208   PetscCall(PetscFree(idxb));
2209   PetscCall(VecDestroy(&vA));
2210   PetscCall(VecDestroy(&vB));
2211   PetscFunctionReturn(0);
2212 }
2213 
2214 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2215 {
2216   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2217   PetscInt          m = A->rmap->n,n = A->cmap->n;
2218   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2219   PetscInt          *cmap  = mat->garray;
2220   PetscInt          *diagIdx, *offdiagIdx;
2221   Vec               diagV, offdiagV;
2222   PetscScalar       *a, *diagA, *offdiagA;
2223   const PetscScalar *ba,*bav;
2224   PetscInt          r,j,col,ncols,*bi,*bj;
2225   Mat               B = mat->B;
2226   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2227 
2228   PetscFunctionBegin;
2229   /* When a process holds entire A and other processes have no entry */
2230   if (A->cmap->N == n) {
2231     PetscCall(VecGetArrayWrite(v,&diagA));
2232     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2233     PetscCall(MatGetRowMinAbs(mat->A,diagV,idx));
2234     PetscCall(VecDestroy(&diagV));
2235     PetscCall(VecRestoreArrayWrite(v,&diagA));
2236     PetscFunctionReturn(0);
2237   } else if (n == 0) {
2238     if (m) {
2239       PetscCall(VecGetArrayWrite(v,&a));
2240       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2241       PetscCall(VecRestoreArrayWrite(v,&a));
2242     }
2243     PetscFunctionReturn(0);
2244   }
2245 
2246   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2247   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2248   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2249   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2250 
2251   /* Get offdiagIdx[] for implicit 0.0 */
2252   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2253   ba   = bav;
2254   bi   = b->i;
2255   bj   = b->j;
2256   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2257   for (r = 0; r < m; r++) {
2258     ncols = bi[r+1] - bi[r];
2259     if (ncols == A->cmap->N - n) { /* Brow is dense */
2260       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2261     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2262       offdiagA[r] = 0.0;
2263 
2264       /* Find first hole in the cmap */
2265       for (j=0; j<ncols; j++) {
2266         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2267         if (col > j && j < cstart) {
2268           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2269           break;
2270         } else if (col > j + n && j >= cstart) {
2271           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2272           break;
2273         }
2274       }
2275       if (j == ncols && ncols < A->cmap->N - n) {
2276         /* a hole is outside compressed Bcols */
2277         if (ncols == 0) {
2278           if (cstart) {
2279             offdiagIdx[r] = 0;
2280           } else offdiagIdx[r] = cend;
2281         } else { /* ncols > 0 */
2282           offdiagIdx[r] = cmap[ncols-1] + 1;
2283           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2284         }
2285       }
2286     }
2287 
2288     for (j=0; j<ncols; j++) {
2289       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2290       ba++; bj++;
2291     }
2292   }
2293 
2294   PetscCall(VecGetArrayWrite(v, &a));
2295   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2296   for (r = 0; r < m; ++r) {
2297     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2298       a[r]   = diagA[r];
2299       if (idx) idx[r] = cstart + diagIdx[r];
2300     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2301       a[r] = diagA[r];
2302       if (idx) {
2303         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2304           idx[r] = cstart + diagIdx[r];
2305         } else idx[r] = offdiagIdx[r];
2306       }
2307     } else {
2308       a[r]   = offdiagA[r];
2309       if (idx) idx[r] = offdiagIdx[r];
2310     }
2311   }
2312   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2313   PetscCall(VecRestoreArrayWrite(v, &a));
2314   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2315   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2316   PetscCall(VecDestroy(&diagV));
2317   PetscCall(VecDestroy(&offdiagV));
2318   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2319   PetscFunctionReturn(0);
2320 }
2321 
2322 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2323 {
2324   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2325   PetscInt          m = A->rmap->n,n = A->cmap->n;
2326   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2327   PetscInt          *cmap  = mat->garray;
2328   PetscInt          *diagIdx, *offdiagIdx;
2329   Vec               diagV, offdiagV;
2330   PetscScalar       *a, *diagA, *offdiagA;
2331   const PetscScalar *ba,*bav;
2332   PetscInt          r,j,col,ncols,*bi,*bj;
2333   Mat               B = mat->B;
2334   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2335 
2336   PetscFunctionBegin;
2337   /* When a process holds entire A and other processes have no entry */
2338   if (A->cmap->N == n) {
2339     PetscCall(VecGetArrayWrite(v,&diagA));
2340     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2341     PetscCall(MatGetRowMin(mat->A,diagV,idx));
2342     PetscCall(VecDestroy(&diagV));
2343     PetscCall(VecRestoreArrayWrite(v,&diagA));
2344     PetscFunctionReturn(0);
2345   } else if (n == 0) {
2346     if (m) {
2347       PetscCall(VecGetArrayWrite(v,&a));
2348       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2349       PetscCall(VecRestoreArrayWrite(v,&a));
2350     }
2351     PetscFunctionReturn(0);
2352   }
2353 
2354   PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx));
2355   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2356   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2357   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2358 
2359   /* Get offdiagIdx[] for implicit 0.0 */
2360   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2361   ba   = bav;
2362   bi   = b->i;
2363   bj   = b->j;
2364   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2365   for (r = 0; r < m; r++) {
2366     ncols = bi[r+1] - bi[r];
2367     if (ncols == A->cmap->N - n) { /* Brow is dense */
2368       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2369     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2370       offdiagA[r] = 0.0;
2371 
2372       /* Find first hole in the cmap */
2373       for (j=0; j<ncols; j++) {
2374         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2375         if (col > j && j < cstart) {
2376           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2377           break;
2378         } else if (col > j + n && j >= cstart) {
2379           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2380           break;
2381         }
2382       }
2383       if (j == ncols && ncols < A->cmap->N - n) {
2384         /* a hole is outside compressed Bcols */
2385         if (ncols == 0) {
2386           if (cstart) {
2387             offdiagIdx[r] = 0;
2388           } else offdiagIdx[r] = cend;
2389         } else { /* ncols > 0 */
2390           offdiagIdx[r] = cmap[ncols-1] + 1;
2391           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2392         }
2393       }
2394     }
2395 
2396     for (j=0; j<ncols; j++) {
2397       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2398       ba++; bj++;
2399     }
2400   }
2401 
2402   PetscCall(VecGetArrayWrite(v, &a));
2403   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2404   for (r = 0; r < m; ++r) {
2405     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2406       a[r]   = diagA[r];
2407       if (idx) idx[r] = cstart + diagIdx[r];
2408     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2409       a[r] = diagA[r];
2410       if (idx) {
2411         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2412           idx[r] = cstart + diagIdx[r];
2413         } else idx[r] = offdiagIdx[r];
2414       }
2415     } else {
2416       a[r]   = offdiagA[r];
2417       if (idx) idx[r] = offdiagIdx[r];
2418     }
2419   }
2420   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2421   PetscCall(VecRestoreArrayWrite(v, &a));
2422   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2423   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2424   PetscCall(VecDestroy(&diagV));
2425   PetscCall(VecDestroy(&offdiagV));
2426   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2431 {
2432   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2433   PetscInt          m = A->rmap->n,n = A->cmap->n;
2434   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2435   PetscInt          *cmap  = mat->garray;
2436   PetscInt          *diagIdx, *offdiagIdx;
2437   Vec               diagV, offdiagV;
2438   PetscScalar       *a, *diagA, *offdiagA;
2439   const PetscScalar *ba,*bav;
2440   PetscInt          r,j,col,ncols,*bi,*bj;
2441   Mat               B = mat->B;
2442   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2443 
2444   PetscFunctionBegin;
2445   /* When a process holds entire A and other processes have no entry */
2446   if (A->cmap->N == n) {
2447     PetscCall(VecGetArrayWrite(v,&diagA));
2448     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2449     PetscCall(MatGetRowMax(mat->A,diagV,idx));
2450     PetscCall(VecDestroy(&diagV));
2451     PetscCall(VecRestoreArrayWrite(v,&diagA));
2452     PetscFunctionReturn(0);
2453   } else if (n == 0) {
2454     if (m) {
2455       PetscCall(VecGetArrayWrite(v,&a));
2456       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2457       PetscCall(VecRestoreArrayWrite(v,&a));
2458     }
2459     PetscFunctionReturn(0);
2460   }
2461 
2462   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2463   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2464   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2465   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2466 
2467   /* Get offdiagIdx[] for implicit 0.0 */
2468   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2469   ba   = bav;
2470   bi   = b->i;
2471   bj   = b->j;
2472   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2473   for (r = 0; r < m; r++) {
2474     ncols = bi[r+1] - bi[r];
2475     if (ncols == A->cmap->N - n) { /* Brow is dense */
2476       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2477     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2478       offdiagA[r] = 0.0;
2479 
2480       /* Find first hole in the cmap */
2481       for (j=0; j<ncols; j++) {
2482         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2483         if (col > j && j < cstart) {
2484           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2485           break;
2486         } else if (col > j + n && j >= cstart) {
2487           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2488           break;
2489         }
2490       }
2491       if (j == ncols && ncols < A->cmap->N - n) {
2492         /* a hole is outside compressed Bcols */
2493         if (ncols == 0) {
2494           if (cstart) {
2495             offdiagIdx[r] = 0;
2496           } else offdiagIdx[r] = cend;
2497         } else { /* ncols > 0 */
2498           offdiagIdx[r] = cmap[ncols-1] + 1;
2499           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2500         }
2501       }
2502     }
2503 
2504     for (j=0; j<ncols; j++) {
2505       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2506       ba++; bj++;
2507     }
2508   }
2509 
2510   PetscCall(VecGetArrayWrite(v,    &a));
2511   PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA));
2512   for (r = 0; r < m; ++r) {
2513     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2514       a[r] = diagA[r];
2515       if (idx) idx[r] = cstart + diagIdx[r];
2516     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2517       a[r] = diagA[r];
2518       if (idx) {
2519         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2520           idx[r] = cstart + diagIdx[r];
2521         } else idx[r] = offdiagIdx[r];
2522       }
2523     } else {
2524       a[r] = offdiagA[r];
2525       if (idx) idx[r] = offdiagIdx[r];
2526     }
2527   }
2528   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2529   PetscCall(VecRestoreArrayWrite(v,       &a));
2530   PetscCall(VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA));
2531   PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA));
2532   PetscCall(VecDestroy(&diagV));
2533   PetscCall(VecDestroy(&offdiagV));
2534   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2535   PetscFunctionReturn(0);
2536 }
2537 
2538 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2539 {
2540   Mat            *dummy;
2541 
2542   PetscFunctionBegin;
2543   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy));
2544   *newmat = *dummy;
2545   PetscCall(PetscFree(dummy));
2546   PetscFunctionReturn(0);
2547 }
2548 
2549 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2550 {
2551   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2552 
2553   PetscFunctionBegin;
2554   PetscCall(MatInvertBlockDiagonal(a->A,values));
2555   A->factorerrortype = a->A->factorerrortype;
2556   PetscFunctionReturn(0);
2557 }
2558 
2559 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2560 {
2561   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2562 
2563   PetscFunctionBegin;
2564   PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2565   PetscCall(MatSetRandom(aij->A,rctx));
2566   if (x->assembled) {
2567     PetscCall(MatSetRandom(aij->B,rctx));
2568   } else {
2569     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx));
2570   }
2571   PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY));
2572   PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY));
2573   PetscFunctionReturn(0);
2574 }
2575 
2576 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2577 {
2578   PetscFunctionBegin;
2579   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2580   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 /*@
2585    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2586 
2587    Collective on Mat
2588 
2589    Input Parameters:
2590 +    A - the matrix
2591 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2592 
2593  Level: advanced
2594 
2595 @*/
2596 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2597 {
2598   PetscFunctionBegin;
2599   PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));
2600   PetscFunctionReturn(0);
2601 }
2602 
2603 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2604 {
2605   PetscBool            sc = PETSC_FALSE,flg;
2606 
2607   PetscFunctionBegin;
2608   PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options");
2609   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2610   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg));
2611   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc));
2612   PetscOptionsHeadEnd();
2613   PetscFunctionReturn(0);
2614 }
2615 
2616 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2617 {
2618   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2619   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2620 
2621   PetscFunctionBegin;
2622   if (!Y->preallocated) {
2623     PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL));
2624   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2625     PetscInt nonew = aij->nonew;
2626     PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL));
2627     aij->nonew = nonew;
2628   }
2629   PetscCall(MatShift_Basic(Y,a));
2630   PetscFunctionReturn(0);
2631 }
2632 
2633 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2634 {
2635   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2636 
2637   PetscFunctionBegin;
2638   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2639   PetscCall(MatMissingDiagonal(a->A,missing,d));
2640   if (d) {
2641     PetscInt rstart;
2642     PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
2643     *d += rstart;
2644 
2645   }
2646   PetscFunctionReturn(0);
2647 }
2648 
2649 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2650 {
2651   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2652 
2653   PetscFunctionBegin;
2654   PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag));
2655   PetscFunctionReturn(0);
2656 }
2657 
2658 /* -------------------------------------------------------------------*/
2659 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2660                                        MatGetRow_MPIAIJ,
2661                                        MatRestoreRow_MPIAIJ,
2662                                        MatMult_MPIAIJ,
2663                                 /* 4*/ MatMultAdd_MPIAIJ,
2664                                        MatMultTranspose_MPIAIJ,
2665                                        MatMultTransposeAdd_MPIAIJ,
2666                                        NULL,
2667                                        NULL,
2668                                        NULL,
2669                                 /*10*/ NULL,
2670                                        NULL,
2671                                        NULL,
2672                                        MatSOR_MPIAIJ,
2673                                        MatTranspose_MPIAIJ,
2674                                 /*15*/ MatGetInfo_MPIAIJ,
2675                                        MatEqual_MPIAIJ,
2676                                        MatGetDiagonal_MPIAIJ,
2677                                        MatDiagonalScale_MPIAIJ,
2678                                        MatNorm_MPIAIJ,
2679                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2680                                        MatAssemblyEnd_MPIAIJ,
2681                                        MatSetOption_MPIAIJ,
2682                                        MatZeroEntries_MPIAIJ,
2683                                 /*24*/ MatZeroRows_MPIAIJ,
2684                                        NULL,
2685                                        NULL,
2686                                        NULL,
2687                                        NULL,
2688                                 /*29*/ MatSetUp_MPIAIJ,
2689                                        NULL,
2690                                        NULL,
2691                                        MatGetDiagonalBlock_MPIAIJ,
2692                                        NULL,
2693                                 /*34*/ MatDuplicate_MPIAIJ,
2694                                        NULL,
2695                                        NULL,
2696                                        NULL,
2697                                        NULL,
2698                                 /*39*/ MatAXPY_MPIAIJ,
2699                                        MatCreateSubMatrices_MPIAIJ,
2700                                        MatIncreaseOverlap_MPIAIJ,
2701                                        MatGetValues_MPIAIJ,
2702                                        MatCopy_MPIAIJ,
2703                                 /*44*/ MatGetRowMax_MPIAIJ,
2704                                        MatScale_MPIAIJ,
2705                                        MatShift_MPIAIJ,
2706                                        MatDiagonalSet_MPIAIJ,
2707                                        MatZeroRowsColumns_MPIAIJ,
2708                                 /*49*/ MatSetRandom_MPIAIJ,
2709                                        MatGetRowIJ_MPIAIJ,
2710                                        MatRestoreRowIJ_MPIAIJ,
2711                                        NULL,
2712                                        NULL,
2713                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2714                                        NULL,
2715                                        MatSetUnfactored_MPIAIJ,
2716                                        MatPermute_MPIAIJ,
2717                                        NULL,
2718                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2719                                        MatDestroy_MPIAIJ,
2720                                        MatView_MPIAIJ,
2721                                        NULL,
2722                                        NULL,
2723                                 /*64*/ NULL,
2724                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2725                                        NULL,
2726                                        NULL,
2727                                        NULL,
2728                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2729                                        MatGetRowMinAbs_MPIAIJ,
2730                                        NULL,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                 /*75*/ MatFDColoringApply_AIJ,
2735                                        MatSetFromOptions_MPIAIJ,
2736                                        NULL,
2737                                        NULL,
2738                                        MatFindZeroDiagonals_MPIAIJ,
2739                                 /*80*/ NULL,
2740                                        NULL,
2741                                        NULL,
2742                                 /*83*/ MatLoad_MPIAIJ,
2743                                        MatIsSymmetric_MPIAIJ,
2744                                        NULL,
2745                                        NULL,
2746                                        NULL,
2747                                        NULL,
2748                                 /*89*/ NULL,
2749                                        NULL,
2750                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2751                                        NULL,
2752                                        NULL,
2753                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2754                                        NULL,
2755                                        NULL,
2756                                        NULL,
2757                                        MatBindToCPU_MPIAIJ,
2758                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2759                                        NULL,
2760                                        NULL,
2761                                        MatConjugate_MPIAIJ,
2762                                        NULL,
2763                                 /*104*/MatSetValuesRow_MPIAIJ,
2764                                        MatRealPart_MPIAIJ,
2765                                        MatImaginaryPart_MPIAIJ,
2766                                        NULL,
2767                                        NULL,
2768                                 /*109*/NULL,
2769                                        NULL,
2770                                        MatGetRowMin_MPIAIJ,
2771                                        NULL,
2772                                        MatMissingDiagonal_MPIAIJ,
2773                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2774                                        NULL,
2775                                        MatGetGhosts_MPIAIJ,
2776                                        NULL,
2777                                        NULL,
2778                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2779                                        NULL,
2780                                        NULL,
2781                                        NULL,
2782                                        MatGetMultiProcBlock_MPIAIJ,
2783                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2784                                        MatGetColumnReductions_MPIAIJ,
2785                                        MatInvertBlockDiagonal_MPIAIJ,
2786                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2787                                        MatCreateSubMatricesMPI_MPIAIJ,
2788                                 /*129*/NULL,
2789                                        NULL,
2790                                        NULL,
2791                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2792                                        NULL,
2793                                 /*134*/NULL,
2794                                        NULL,
2795                                        NULL,
2796                                        NULL,
2797                                        NULL,
2798                                 /*139*/MatSetBlockSizes_MPIAIJ,
2799                                        NULL,
2800                                        NULL,
2801                                        MatFDColoringSetUp_MPIXAIJ,
2802                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2803                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2804                                 /*145*/NULL,
2805                                        NULL,
2806                                        NULL,
2807                                        MatCreateGraph_Simple_AIJ,
2808                                        MatFilter_AIJ
2809 };
2810 
2811 /* ----------------------------------------------------------------------------------------*/
2812 
2813 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2814 {
2815   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2816 
2817   PetscFunctionBegin;
2818   PetscCall(MatStoreValues(aij->A));
2819   PetscCall(MatStoreValues(aij->B));
2820   PetscFunctionReturn(0);
2821 }
2822 
2823 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2824 {
2825   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2826 
2827   PetscFunctionBegin;
2828   PetscCall(MatRetrieveValues(aij->A));
2829   PetscCall(MatRetrieveValues(aij->B));
2830   PetscFunctionReturn(0);
2831 }
2832 
2833 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2834 {
2835   Mat_MPIAIJ     *b;
2836   PetscMPIInt    size;
2837 
2838   PetscFunctionBegin;
2839   PetscCall(PetscLayoutSetUp(B->rmap));
2840   PetscCall(PetscLayoutSetUp(B->cmap));
2841   b = (Mat_MPIAIJ*)B->data;
2842 
2843 #if defined(PETSC_USE_CTABLE)
2844   PetscCall(PetscTableDestroy(&b->colmap));
2845 #else
2846   PetscCall(PetscFree(b->colmap));
2847 #endif
2848   PetscCall(PetscFree(b->garray));
2849   PetscCall(VecDestroy(&b->lvec));
2850   PetscCall(VecScatterDestroy(&b->Mvctx));
2851 
2852   /* Because the B will have been resized we simply destroy it and create a new one each time */
2853   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
2854   PetscCall(MatDestroy(&b->B));
2855   PetscCall(MatCreate(PETSC_COMM_SELF,&b->B));
2856   PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
2857   PetscCall(MatSetBlockSizesFromMats(b->B,B,B));
2858   PetscCall(MatSetType(b->B,MATSEQAIJ));
2859   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
2860 
2861   if (!B->preallocated) {
2862     PetscCall(MatCreate(PETSC_COMM_SELF,&b->A));
2863     PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
2864     PetscCall(MatSetBlockSizesFromMats(b->A,B,B));
2865     PetscCall(MatSetType(b->A,MATSEQAIJ));
2866     PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
2867   }
2868 
2869   PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz));
2870   PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz));
2871   B->preallocated  = PETSC_TRUE;
2872   B->was_assembled = PETSC_FALSE;
2873   B->assembled     = PETSC_FALSE;
2874   PetscFunctionReturn(0);
2875 }
2876 
2877 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2878 {
2879   Mat_MPIAIJ     *b;
2880 
2881   PetscFunctionBegin;
2882   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2883   PetscCall(PetscLayoutSetUp(B->rmap));
2884   PetscCall(PetscLayoutSetUp(B->cmap));
2885   b = (Mat_MPIAIJ*)B->data;
2886 
2887 #if defined(PETSC_USE_CTABLE)
2888   PetscCall(PetscTableDestroy(&b->colmap));
2889 #else
2890   PetscCall(PetscFree(b->colmap));
2891 #endif
2892   PetscCall(PetscFree(b->garray));
2893   PetscCall(VecDestroy(&b->lvec));
2894   PetscCall(VecScatterDestroy(&b->Mvctx));
2895 
2896   PetscCall(MatResetPreallocation(b->A));
2897   PetscCall(MatResetPreallocation(b->B));
2898   B->preallocated  = PETSC_TRUE;
2899   B->was_assembled = PETSC_FALSE;
2900   B->assembled = PETSC_FALSE;
2901   PetscFunctionReturn(0);
2902 }
2903 
2904 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2905 {
2906   Mat            mat;
2907   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2908 
2909   PetscFunctionBegin;
2910   *newmat = NULL;
2911   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
2912   PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
2913   PetscCall(MatSetBlockSizesFromMats(mat,matin,matin));
2914   PetscCall(MatSetType(mat,((PetscObject)matin)->type_name));
2915   a       = (Mat_MPIAIJ*)mat->data;
2916 
2917   mat->factortype   = matin->factortype;
2918   mat->assembled    = matin->assembled;
2919   mat->insertmode   = NOT_SET_VALUES;
2920   mat->preallocated = matin->preallocated;
2921 
2922   a->size         = oldmat->size;
2923   a->rank         = oldmat->rank;
2924   a->donotstash   = oldmat->donotstash;
2925   a->roworiented  = oldmat->roworiented;
2926   a->rowindices   = NULL;
2927   a->rowvalues    = NULL;
2928   a->getrowactive = PETSC_FALSE;
2929 
2930   PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap));
2931   PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap));
2932 
2933   if (oldmat->colmap) {
2934 #if defined(PETSC_USE_CTABLE)
2935     PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
2936 #else
2937     PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap));
2938     PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt)));
2939     PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N));
2940 #endif
2941   } else a->colmap = NULL;
2942   if (oldmat->garray) {
2943     PetscInt len;
2944     len  = oldmat->B->cmap->n;
2945     PetscCall(PetscMalloc1(len+1,&a->garray));
2946     PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
2947     if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len));
2948   } else a->garray = NULL;
2949 
2950   /* It may happen MatDuplicate is called with a non-assembled matrix
2951      In fact, MatDuplicate only requires the matrix to be preallocated
2952      This may happen inside a DMCreateMatrix_Shell */
2953   if (oldmat->lvec) {
2954     PetscCall(VecDuplicate(oldmat->lvec,&a->lvec));
2955     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
2956   }
2957   if (oldmat->Mvctx) {
2958     PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
2959     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
2960   }
2961   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2962   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2963   PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B));
2964   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
2965   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
2966   *newmat = mat;
2967   PetscFunctionReturn(0);
2968 }
2969 
2970 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2971 {
2972   PetscBool      isbinary, ishdf5;
2973 
2974   PetscFunctionBegin;
2975   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2976   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2977   /* force binary viewer to load .info file if it has not yet done so */
2978   PetscCall(PetscViewerSetUp(viewer));
2979   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2980   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
2981   if (isbinary) {
2982     PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer));
2983   } else if (ishdf5) {
2984 #if defined(PETSC_HAVE_HDF5)
2985     PetscCall(MatLoad_AIJ_HDF5(newMat,viewer));
2986 #else
2987     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2988 #endif
2989   } else {
2990     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2991   }
2992   PetscFunctionReturn(0);
2993 }
2994 
2995 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
2996 {
2997   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
2998   PetscInt       *rowidxs,*colidxs;
2999   PetscScalar    *matvals;
3000 
3001   PetscFunctionBegin;
3002   PetscCall(PetscViewerSetUp(viewer));
3003 
3004   /* read in matrix header */
3005   PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT));
3006   PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3007   M  = header[1]; N = header[2]; nz = header[3];
3008   PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
3009   PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
3010   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3011 
3012   /* set block sizes from the viewer's .info file */
3013   PetscCall(MatLoad_Binary_BlockSizes(mat,viewer));
3014   /* set global sizes if not set already */
3015   if (mat->rmap->N < 0) mat->rmap->N = M;
3016   if (mat->cmap->N < 0) mat->cmap->N = N;
3017   PetscCall(PetscLayoutSetUp(mat->rmap));
3018   PetscCall(PetscLayoutSetUp(mat->cmap));
3019 
3020   /* check if the matrix sizes are correct */
3021   PetscCall(MatGetSize(mat,&rows,&cols));
3022   PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3023 
3024   /* read in row lengths and build row indices */
3025   PetscCall(MatGetLocalSize(mat,&m,NULL));
3026   PetscCall(PetscMalloc1(m+1,&rowidxs));
3027   PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT));
3028   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3029   PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer)));
3030   PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3031   /* read in column indices and matrix values */
3032   PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals));
3033   PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
3034   PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
3035   /* store matrix indices and values */
3036   PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals));
3037   PetscCall(PetscFree(rowidxs));
3038   PetscCall(PetscFree2(colidxs,matvals));
3039   PetscFunctionReturn(0);
3040 }
3041 
3042 /* Not scalable because of ISAllGather() unless getting all columns. */
3043 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3044 {
3045   IS             iscol_local;
3046   PetscBool      isstride;
3047   PetscMPIInt    lisstride=0,gisstride;
3048 
3049   PetscFunctionBegin;
3050   /* check if we are grabbing all columns*/
3051   PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride));
3052 
3053   if (isstride) {
3054     PetscInt  start,len,mstart,mlen;
3055     PetscCall(ISStrideGetInfo(iscol,&start,NULL));
3056     PetscCall(ISGetLocalSize(iscol,&len));
3057     PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen));
3058     if (mstart == start && mlen-mstart == len) lisstride = 1;
3059   }
3060 
3061   PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat)));
3062   if (gisstride) {
3063     PetscInt N;
3064     PetscCall(MatGetSize(mat,NULL,&N));
3065     PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local));
3066     PetscCall(ISSetIdentity(iscol_local));
3067     PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3068   } else {
3069     PetscInt cbs;
3070     PetscCall(ISGetBlockSize(iscol,&cbs));
3071     PetscCall(ISAllGather(iscol,&iscol_local));
3072     PetscCall(ISSetBlockSize(iscol_local,cbs));
3073   }
3074 
3075   *isseq = iscol_local;
3076   PetscFunctionReturn(0);
3077 }
3078 
3079 /*
3080  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3081  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3082 
3083  Input Parameters:
3084    mat - matrix
3085    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3086            i.e., mat->rstart <= isrow[i] < mat->rend
3087    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3088            i.e., mat->cstart <= iscol[i] < mat->cend
3089  Output Parameter:
3090    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3091    iscol_o - sequential column index set for retrieving mat->B
3092    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3093  */
3094 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3095 {
3096   Vec            x,cmap;
3097   const PetscInt *is_idx;
3098   PetscScalar    *xarray,*cmaparray;
3099   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3100   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3101   Mat            B=a->B;
3102   Vec            lvec=a->lvec,lcmap;
3103   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3104   MPI_Comm       comm;
3105   VecScatter     Mvctx=a->Mvctx;
3106 
3107   PetscFunctionBegin;
3108   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3109   PetscCall(ISGetLocalSize(iscol,&ncols));
3110 
3111   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3112   PetscCall(MatCreateVecs(mat,&x,NULL));
3113   PetscCall(VecSet(x,-1.0));
3114   PetscCall(VecDuplicate(x,&cmap));
3115   PetscCall(VecSet(cmap,-1.0));
3116 
3117   /* Get start indices */
3118   PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm));
3119   isstart -= ncols;
3120   PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend));
3121 
3122   PetscCall(ISGetIndices(iscol,&is_idx));
3123   PetscCall(VecGetArray(x,&xarray));
3124   PetscCall(VecGetArray(cmap,&cmaparray));
3125   PetscCall(PetscMalloc1(ncols,&idx));
3126   for (i=0; i<ncols; i++) {
3127     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3128     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3129     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3130   }
3131   PetscCall(VecRestoreArray(x,&xarray));
3132   PetscCall(VecRestoreArray(cmap,&cmaparray));
3133   PetscCall(ISRestoreIndices(iscol,&is_idx));
3134 
3135   /* Get iscol_d */
3136   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d));
3137   PetscCall(ISGetBlockSize(iscol,&i));
3138   PetscCall(ISSetBlockSize(*iscol_d,i));
3139 
3140   /* Get isrow_d */
3141   PetscCall(ISGetLocalSize(isrow,&m));
3142   rstart = mat->rmap->rstart;
3143   PetscCall(PetscMalloc1(m,&idx));
3144   PetscCall(ISGetIndices(isrow,&is_idx));
3145   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3146   PetscCall(ISRestoreIndices(isrow,&is_idx));
3147 
3148   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d));
3149   PetscCall(ISGetBlockSize(isrow,&i));
3150   PetscCall(ISSetBlockSize(*isrow_d,i));
3151 
3152   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3153   PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3154   PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3155 
3156   PetscCall(VecDuplicate(lvec,&lcmap));
3157 
3158   PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3159   PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3160 
3161   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3162   /* off-process column indices */
3163   count = 0;
3164   PetscCall(PetscMalloc1(Bn,&idx));
3165   PetscCall(PetscMalloc1(Bn,&cmap1));
3166 
3167   PetscCall(VecGetArray(lvec,&xarray));
3168   PetscCall(VecGetArray(lcmap,&cmaparray));
3169   for (i=0; i<Bn; i++) {
3170     if (PetscRealPart(xarray[i]) > -1.0) {
3171       idx[count]     = i;                   /* local column index in off-diagonal part B */
3172       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3173       count++;
3174     }
3175   }
3176   PetscCall(VecRestoreArray(lvec,&xarray));
3177   PetscCall(VecRestoreArray(lcmap,&cmaparray));
3178 
3179   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o));
3180   /* cannot ensure iscol_o has same blocksize as iscol! */
3181 
3182   PetscCall(PetscFree(idx));
3183   *garray = cmap1;
3184 
3185   PetscCall(VecDestroy(&x));
3186   PetscCall(VecDestroy(&cmap));
3187   PetscCall(VecDestroy(&lcmap));
3188   PetscFunctionReturn(0);
3189 }
3190 
3191 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3192 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3193 {
3194   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3195   Mat            M = NULL;
3196   MPI_Comm       comm;
3197   IS             iscol_d,isrow_d,iscol_o;
3198   Mat            Asub = NULL,Bsub = NULL;
3199   PetscInt       n;
3200 
3201   PetscFunctionBegin;
3202   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3203 
3204   if (call == MAT_REUSE_MATRIX) {
3205     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3206     PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d));
3207     PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3208 
3209     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d));
3210     PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3211 
3212     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o));
3213     PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3214 
3215     /* Update diagonal and off-diagonal portions of submat */
3216     asub = (Mat_MPIAIJ*)(*submat)->data;
3217     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A));
3218     PetscCall(ISGetLocalSize(iscol_o,&n));
3219     if (n) {
3220       PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B));
3221     }
3222     PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY));
3223     PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY));
3224 
3225   } else { /* call == MAT_INITIAL_MATRIX) */
3226     const PetscInt *garray;
3227     PetscInt        BsubN;
3228 
3229     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3230     PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray));
3231 
3232     /* Create local submatrices Asub and Bsub */
3233     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub));
3234     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub));
3235 
3236     /* Create submatrix M */
3237     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M));
3238 
3239     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3240     asub = (Mat_MPIAIJ*)M->data;
3241 
3242     PetscCall(ISGetLocalSize(iscol_o,&BsubN));
3243     n = asub->B->cmap->N;
3244     if (BsubN > n) {
3245       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3246       const PetscInt *idx;
3247       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3248       PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN));
3249 
3250       PetscCall(PetscMalloc1(n,&idx_new));
3251       j = 0;
3252       PetscCall(ISGetIndices(iscol_o,&idx));
3253       for (i=0; i<n; i++) {
3254         if (j >= BsubN) break;
3255         while (subgarray[i] > garray[j]) j++;
3256 
3257         if (subgarray[i] == garray[j]) {
3258           idx_new[i] = idx[j++];
3259         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]);
3260       }
3261       PetscCall(ISRestoreIndices(iscol_o,&idx));
3262 
3263       PetscCall(ISDestroy(&iscol_o));
3264       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o));
3265 
3266     } else if (BsubN < n) {
3267       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N);
3268     }
3269 
3270     PetscCall(PetscFree(garray));
3271     *submat = M;
3272 
3273     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3274     PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d));
3275     PetscCall(ISDestroy(&isrow_d));
3276 
3277     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d));
3278     PetscCall(ISDestroy(&iscol_d));
3279 
3280     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o));
3281     PetscCall(ISDestroy(&iscol_o));
3282   }
3283   PetscFunctionReturn(0);
3284 }
3285 
3286 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3287 {
3288   IS             iscol_local=NULL,isrow_d;
3289   PetscInt       csize;
3290   PetscInt       n,i,j,start,end;
3291   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3292   MPI_Comm       comm;
3293 
3294   PetscFunctionBegin;
3295   /* If isrow has same processor distribution as mat,
3296      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3297   if (call == MAT_REUSE_MATRIX) {
3298     PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d));
3299     if (isrow_d) {
3300       sameRowDist  = PETSC_TRUE;
3301       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3302     } else {
3303       PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local));
3304       if (iscol_local) {
3305         sameRowDist  = PETSC_TRUE;
3306         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3307       }
3308     }
3309   } else {
3310     /* Check if isrow has same processor distribution as mat */
3311     sameDist[0] = PETSC_FALSE;
3312     PetscCall(ISGetLocalSize(isrow,&n));
3313     if (!n) {
3314       sameDist[0] = PETSC_TRUE;
3315     } else {
3316       PetscCall(ISGetMinMax(isrow,&i,&j));
3317       PetscCall(MatGetOwnershipRange(mat,&start,&end));
3318       if (i >= start && j < end) {
3319         sameDist[0] = PETSC_TRUE;
3320       }
3321     }
3322 
3323     /* Check if iscol has same processor distribution as mat */
3324     sameDist[1] = PETSC_FALSE;
3325     PetscCall(ISGetLocalSize(iscol,&n));
3326     if (!n) {
3327       sameDist[1] = PETSC_TRUE;
3328     } else {
3329       PetscCall(ISGetMinMax(iscol,&i,&j));
3330       PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end));
3331       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3332     }
3333 
3334     PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3335     PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm));
3336     sameRowDist = tsameDist[0];
3337   }
3338 
3339   if (sameRowDist) {
3340     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3341       /* isrow and iscol have same processor distribution as mat */
3342       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat));
3343       PetscFunctionReturn(0);
3344     } else { /* sameRowDist */
3345       /* isrow has same processor distribution as mat */
3346       if (call == MAT_INITIAL_MATRIX) {
3347         PetscBool sorted;
3348         PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3349         PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */
3350         PetscCall(ISGetSize(iscol,&i));
3351         PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i);
3352 
3353         PetscCall(ISSorted(iscol_local,&sorted));
3354         if (sorted) {
3355           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3356           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat));
3357           PetscFunctionReturn(0);
3358         }
3359       } else { /* call == MAT_REUSE_MATRIX */
3360         IS iscol_sub;
3361         PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3362         if (iscol_sub) {
3363           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat));
3364           PetscFunctionReturn(0);
3365         }
3366       }
3367     }
3368   }
3369 
3370   /* General case: iscol -> iscol_local which has global size of iscol */
3371   if (call == MAT_REUSE_MATRIX) {
3372     PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
3373     PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3374   } else {
3375     if (!iscol_local) {
3376       PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3377     }
3378   }
3379 
3380   PetscCall(ISGetLocalSize(iscol,&csize));
3381   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat));
3382 
3383   if (call == MAT_INITIAL_MATRIX) {
3384     PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
3385     PetscCall(ISDestroy(&iscol_local));
3386   }
3387   PetscFunctionReturn(0);
3388 }
3389 
3390 /*@C
3391      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3392          and "off-diagonal" part of the matrix in CSR format.
3393 
3394    Collective
3395 
3396    Input Parameters:
3397 +  comm - MPI communicator
3398 .  A - "diagonal" portion of matrix
3399 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3400 -  garray - global index of B columns
3401 
3402    Output Parameter:
3403 .   mat - the matrix, with input A as its local diagonal matrix
3404    Level: advanced
3405 
3406    Notes:
3407        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3408        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3409 
3410 .seealso: `MatCreateMPIAIJWithSplitArrays()`
3411 @*/
3412 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3413 {
3414   Mat_MPIAIJ        *maij;
3415   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3416   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3417   const PetscScalar *oa;
3418   Mat               Bnew;
3419   PetscInt          m,n,N;
3420   MatType           mpi_mat_type;
3421 
3422   PetscFunctionBegin;
3423   PetscCall(MatCreate(comm,mat));
3424   PetscCall(MatGetSize(A,&m,&n));
3425   PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N);
3426   PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs);
3427   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3428   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3429 
3430   /* Get global columns of mat */
3431   PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm));
3432 
3433   PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N));
3434   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3435   PetscCall(MatGetMPIMatType_Private(A,&mpi_mat_type));
3436   PetscCall(MatSetType(*mat,mpi_mat_type));
3437 
3438   PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs));
3439   maij = (Mat_MPIAIJ*)(*mat)->data;
3440 
3441   (*mat)->preallocated = PETSC_TRUE;
3442 
3443   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3444   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3445 
3446   /* Set A as diagonal portion of *mat */
3447   maij->A = A;
3448 
3449   nz = oi[m];
3450   for (i=0; i<nz; i++) {
3451     col   = oj[i];
3452     oj[i] = garray[col];
3453   }
3454 
3455   /* Set Bnew as off-diagonal portion of *mat */
3456   PetscCall(MatSeqAIJGetArrayRead(B,&oa));
3457   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew));
3458   PetscCall(MatSeqAIJRestoreArrayRead(B,&oa));
3459   bnew        = (Mat_SeqAIJ*)Bnew->data;
3460   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3461   maij->B     = Bnew;
3462 
3463   PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N);
3464 
3465   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3466   b->free_a       = PETSC_FALSE;
3467   b->free_ij      = PETSC_FALSE;
3468   PetscCall(MatDestroy(&B));
3469 
3470   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3471   bnew->free_a       = PETSC_TRUE;
3472   bnew->free_ij      = PETSC_TRUE;
3473 
3474   /* condense columns of maij->B */
3475   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
3476   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
3477   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
3478   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
3479   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3480   PetscFunctionReturn(0);
3481 }
3482 
3483 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3484 
3485 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3486 {
3487   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3488   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3489   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3490   Mat            M,Msub,B=a->B;
3491   MatScalar      *aa;
3492   Mat_SeqAIJ     *aij;
3493   PetscInt       *garray = a->garray,*colsub,Ncols;
3494   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3495   IS             iscol_sub,iscmap;
3496   const PetscInt *is_idx,*cmap;
3497   PetscBool      allcolumns=PETSC_FALSE;
3498   MPI_Comm       comm;
3499 
3500   PetscFunctionBegin;
3501   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3502   if (call == MAT_REUSE_MATRIX) {
3503     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3504     PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3505     PetscCall(ISGetLocalSize(iscol_sub,&count));
3506 
3507     PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap));
3508     PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3509 
3510     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub));
3511     PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3512 
3513     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub));
3514 
3515   } else { /* call == MAT_INITIAL_MATRIX) */
3516     PetscBool flg;
3517 
3518     PetscCall(ISGetLocalSize(iscol,&n));
3519     PetscCall(ISGetSize(iscol,&Ncols));
3520 
3521     /* (1) iscol -> nonscalable iscol_local */
3522     /* Check for special case: each processor gets entire matrix columns */
3523     PetscCall(ISIdentity(iscol_local,&flg));
3524     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3525     PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3526     if (allcolumns) {
3527       iscol_sub = iscol_local;
3528       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3529       PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap));
3530 
3531     } else {
3532       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3533       PetscInt *idx,*cmap1,k;
3534       PetscCall(PetscMalloc1(Ncols,&idx));
3535       PetscCall(PetscMalloc1(Ncols,&cmap1));
3536       PetscCall(ISGetIndices(iscol_local,&is_idx));
3537       count = 0;
3538       k     = 0;
3539       for (i=0; i<Ncols; i++) {
3540         j = is_idx[i];
3541         if (j >= cstart && j < cend) {
3542           /* diagonal part of mat */
3543           idx[count]     = j;
3544           cmap1[count++] = i; /* column index in submat */
3545         } else if (Bn) {
3546           /* off-diagonal part of mat */
3547           if (j == garray[k]) {
3548             idx[count]     = j;
3549             cmap1[count++] = i;  /* column index in submat */
3550           } else if (j > garray[k]) {
3551             while (j > garray[k] && k < Bn-1) k++;
3552             if (j == garray[k]) {
3553               idx[count]     = j;
3554               cmap1[count++] = i; /* column index in submat */
3555             }
3556           }
3557         }
3558       }
3559       PetscCall(ISRestoreIndices(iscol_local,&is_idx));
3560 
3561       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub));
3562       PetscCall(ISGetBlockSize(iscol,&cbs));
3563       PetscCall(ISSetBlockSize(iscol_sub,cbs));
3564 
3565       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap));
3566     }
3567 
3568     /* (3) Create sequential Msub */
3569     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub));
3570   }
3571 
3572   PetscCall(ISGetLocalSize(iscol_sub,&count));
3573   aij  = (Mat_SeqAIJ*)(Msub)->data;
3574   ii   = aij->i;
3575   PetscCall(ISGetIndices(iscmap,&cmap));
3576 
3577   /*
3578       m - number of local rows
3579       Ncols - number of columns (same on all processors)
3580       rstart - first row in new global matrix generated
3581   */
3582   PetscCall(MatGetSize(Msub,&m,NULL));
3583 
3584   if (call == MAT_INITIAL_MATRIX) {
3585     /* (4) Create parallel newmat */
3586     PetscMPIInt    rank,size;
3587     PetscInt       csize;
3588 
3589     PetscCallMPI(MPI_Comm_size(comm,&size));
3590     PetscCallMPI(MPI_Comm_rank(comm,&rank));
3591 
3592     /*
3593         Determine the number of non-zeros in the diagonal and off-diagonal
3594         portions of the matrix in order to do correct preallocation
3595     */
3596 
3597     /* first get start and end of "diagonal" columns */
3598     PetscCall(ISGetLocalSize(iscol,&csize));
3599     if (csize == PETSC_DECIDE) {
3600       PetscCall(ISGetSize(isrow,&mglobal));
3601       if (mglobal == Ncols) { /* square matrix */
3602         nlocal = m;
3603       } else {
3604         nlocal = Ncols/size + ((Ncols % size) > rank);
3605       }
3606     } else {
3607       nlocal = csize;
3608     }
3609     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3610     rstart = rend - nlocal;
3611     PetscCheck(rank != size - 1 || rend == Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols);
3612 
3613     /* next, compute all the lengths */
3614     jj    = aij->j;
3615     PetscCall(PetscMalloc1(2*m+1,&dlens));
3616     olens = dlens + m;
3617     for (i=0; i<m; i++) {
3618       jend = ii[i+1] - ii[i];
3619       olen = 0;
3620       dlen = 0;
3621       for (j=0; j<jend; j++) {
3622         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3623         else dlen++;
3624         jj++;
3625       }
3626       olens[i] = olen;
3627       dlens[i] = dlen;
3628     }
3629 
3630     PetscCall(ISGetBlockSize(isrow,&bs));
3631     PetscCall(ISGetBlockSize(iscol,&cbs));
3632 
3633     PetscCall(MatCreate(comm,&M));
3634     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols));
3635     PetscCall(MatSetBlockSizes(M,bs,cbs));
3636     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3637     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3638     PetscCall(PetscFree(dlens));
3639 
3640   } else { /* call == MAT_REUSE_MATRIX */
3641     M    = *newmat;
3642     PetscCall(MatGetLocalSize(M,&i,NULL));
3643     PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3644     PetscCall(MatZeroEntries(M));
3645     /*
3646          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3647        rather than the slower MatSetValues().
3648     */
3649     M->was_assembled = PETSC_TRUE;
3650     M->assembled     = PETSC_FALSE;
3651   }
3652 
3653   /* (5) Set values of Msub to *newmat */
3654   PetscCall(PetscMalloc1(count,&colsub));
3655   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
3656 
3657   jj   = aij->j;
3658   PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa));
3659   for (i=0; i<m; i++) {
3660     row = rstart + i;
3661     nz  = ii[i+1] - ii[i];
3662     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3663     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES));
3664     jj += nz; aa += nz;
3665   }
3666   PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa));
3667   PetscCall(ISRestoreIndices(iscmap,&cmap));
3668 
3669   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3670   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3671 
3672   PetscCall(PetscFree(colsub));
3673 
3674   /* save Msub, iscol_sub and iscmap used in processor for next request */
3675   if (call == MAT_INITIAL_MATRIX) {
3676     *newmat = M;
3677     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub));
3678     PetscCall(MatDestroy(&Msub));
3679 
3680     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub));
3681     PetscCall(ISDestroy(&iscol_sub));
3682 
3683     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap));
3684     PetscCall(ISDestroy(&iscmap));
3685 
3686     if (iscol_local) {
3687       PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local));
3688       PetscCall(ISDestroy(&iscol_local));
3689     }
3690   }
3691   PetscFunctionReturn(0);
3692 }
3693 
3694 /*
3695     Not great since it makes two copies of the submatrix, first an SeqAIJ
3696   in local and then by concatenating the local matrices the end result.
3697   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3698 
3699   Note: This requires a sequential iscol with all indices.
3700 */
3701 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3702 {
3703   PetscMPIInt    rank,size;
3704   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3705   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3706   Mat            M,Mreuse;
3707   MatScalar      *aa,*vwork;
3708   MPI_Comm       comm;
3709   Mat_SeqAIJ     *aij;
3710   PetscBool      colflag,allcolumns=PETSC_FALSE;
3711 
3712   PetscFunctionBegin;
3713   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3714   PetscCallMPI(MPI_Comm_rank(comm,&rank));
3715   PetscCallMPI(MPI_Comm_size(comm,&size));
3716 
3717   /* Check for special case: each processor gets entire matrix columns */
3718   PetscCall(ISIdentity(iscol,&colflag));
3719   PetscCall(ISGetLocalSize(iscol,&n));
3720   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3721   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3722 
3723   if (call ==  MAT_REUSE_MATRIX) {
3724     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse));
3725     PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3726     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse));
3727   } else {
3728     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse));
3729   }
3730 
3731   /*
3732       m - number of local rows
3733       n - number of columns (same on all processors)
3734       rstart - first row in new global matrix generated
3735   */
3736   PetscCall(MatGetSize(Mreuse,&m,&n));
3737   PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs));
3738   if (call == MAT_INITIAL_MATRIX) {
3739     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3740     ii  = aij->i;
3741     jj  = aij->j;
3742 
3743     /*
3744         Determine the number of non-zeros in the diagonal and off-diagonal
3745         portions of the matrix in order to do correct preallocation
3746     */
3747 
3748     /* first get start and end of "diagonal" columns */
3749     if (csize == PETSC_DECIDE) {
3750       PetscCall(ISGetSize(isrow,&mglobal));
3751       if (mglobal == n) { /* square matrix */
3752         nlocal = m;
3753       } else {
3754         nlocal = n/size + ((n % size) > rank);
3755       }
3756     } else {
3757       nlocal = csize;
3758     }
3759     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3760     rstart = rend - nlocal;
3761     PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
3762 
3763     /* next, compute all the lengths */
3764     PetscCall(PetscMalloc1(2*m+1,&dlens));
3765     olens = dlens + m;
3766     for (i=0; i<m; i++) {
3767       jend = ii[i+1] - ii[i];
3768       olen = 0;
3769       dlen = 0;
3770       for (j=0; j<jend; j++) {
3771         if (*jj < rstart || *jj >= rend) olen++;
3772         else dlen++;
3773         jj++;
3774       }
3775       olens[i] = olen;
3776       dlens[i] = dlen;
3777     }
3778     PetscCall(MatCreate(comm,&M));
3779     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n));
3780     PetscCall(MatSetBlockSizes(M,bs,cbs));
3781     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3782     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3783     PetscCall(PetscFree(dlens));
3784   } else {
3785     PetscInt ml,nl;
3786 
3787     M    = *newmat;
3788     PetscCall(MatGetLocalSize(M,&ml,&nl));
3789     PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3790     PetscCall(MatZeroEntries(M));
3791     /*
3792          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3793        rather than the slower MatSetValues().
3794     */
3795     M->was_assembled = PETSC_TRUE;
3796     M->assembled     = PETSC_FALSE;
3797   }
3798   PetscCall(MatGetOwnershipRange(M,&rstart,&rend));
3799   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3800   ii   = aij->i;
3801   jj   = aij->j;
3802 
3803   /* trigger copy to CPU if needed */
3804   PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa));
3805   for (i=0; i<m; i++) {
3806     row   = rstart + i;
3807     nz    = ii[i+1] - ii[i];
3808     cwork = jj; jj += nz;
3809     vwork = aa; aa += nz;
3810     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES));
3811   }
3812   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa));
3813 
3814   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3815   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3816   *newmat = M;
3817 
3818   /* save submatrix used in processor for next request */
3819   if (call ==  MAT_INITIAL_MATRIX) {
3820     PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse));
3821     PetscCall(MatDestroy(&Mreuse));
3822   }
3823   PetscFunctionReturn(0);
3824 }
3825 
3826 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3827 {
3828   PetscInt       m,cstart, cend,j,nnz,i,d;
3829   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3830   const PetscInt *JJ;
3831   PetscBool      nooffprocentries;
3832 
3833   PetscFunctionBegin;
3834   PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]);
3835 
3836   PetscCall(PetscLayoutSetUp(B->rmap));
3837   PetscCall(PetscLayoutSetUp(B->cmap));
3838   m      = B->rmap->n;
3839   cstart = B->cmap->rstart;
3840   cend   = B->cmap->rend;
3841   rstart = B->rmap->rstart;
3842 
3843   PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz));
3844 
3845   if (PetscDefined(USE_DEBUG)) {
3846     for (i=0; i<m; i++) {
3847       nnz = Ii[i+1]- Ii[i];
3848       JJ  = J + Ii[i];
3849       PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz);
3850       PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]);
3851       PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N);
3852     }
3853   }
3854 
3855   for (i=0; i<m; i++) {
3856     nnz     = Ii[i+1]- Ii[i];
3857     JJ      = J + Ii[i];
3858     nnz_max = PetscMax(nnz_max,nnz);
3859     d       = 0;
3860     for (j=0; j<nnz; j++) {
3861       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3862     }
3863     d_nnz[i] = d;
3864     o_nnz[i] = nnz - d;
3865   }
3866   PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
3867   PetscCall(PetscFree2(d_nnz,o_nnz));
3868 
3869   for (i=0; i<m; i++) {
3870     ii   = i + rstart;
3871     PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES));
3872   }
3873   nooffprocentries    = B->nooffprocentries;
3874   B->nooffprocentries = PETSC_TRUE;
3875   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
3876   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3877   B->nooffprocentries = nooffprocentries;
3878 
3879   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3880   PetscFunctionReturn(0);
3881 }
3882 
3883 /*@
3884    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3885    (the default parallel PETSc format).
3886 
3887    Collective
3888 
3889    Input Parameters:
3890 +  B - the matrix
3891 .  i - the indices into j for the start of each local row (starts with zero)
3892 .  j - the column indices for each local row (starts with zero)
3893 -  v - optional values in the matrix
3894 
3895    Level: developer
3896 
3897    Notes:
3898        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3899      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3900      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3901 
3902        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3903 
3904        The format which is used for the sparse matrix input, is equivalent to a
3905     row-major ordering.. i.e for the following matrix, the input data expected is
3906     as shown
3907 
3908 $        1 0 0
3909 $        2 0 3     P0
3910 $       -------
3911 $        4 5 6     P1
3912 $
3913 $     Process0 [P0]: rows_owned=[0,1]
3914 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3915 $        j =  {0,0,2}  [size = 3]
3916 $        v =  {1,2,3}  [size = 3]
3917 $
3918 $     Process1 [P1]: rows_owned=[2]
3919 $        i =  {0,3}    [size = nrow+1  = 1+1]
3920 $        j =  {0,1,2}  [size = 3]
3921 $        v =  {4,5,6}  [size = 3]
3922 
3923 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
3924           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
3925 @*/
3926 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3927 {
3928   PetscFunctionBegin;
3929   PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3930   PetscFunctionReturn(0);
3931 }
3932 
3933 /*@C
3934    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3935    (the default parallel PETSc format).  For good matrix assembly performance
3936    the user should preallocate the matrix storage by setting the parameters
3937    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3938    performance can be increased by more than a factor of 50.
3939 
3940    Collective
3941 
3942    Input Parameters:
3943 +  B - the matrix
3944 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3945            (same value is used for all local rows)
3946 .  d_nnz - array containing the number of nonzeros in the various rows of the
3947            DIAGONAL portion of the local submatrix (possibly different for each row)
3948            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3949            The size of this array is equal to the number of local rows, i.e 'm'.
3950            For matrices that will be factored, you must leave room for (and set)
3951            the diagonal entry even if it is zero.
3952 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3953            submatrix (same value is used for all local rows).
3954 -  o_nnz - array containing the number of nonzeros in the various rows of the
3955            OFF-DIAGONAL portion of the local submatrix (possibly different for
3956            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3957            structure. The size of this array is equal to the number
3958            of local rows, i.e 'm'.
3959 
3960    If the *_nnz parameter is given then the *_nz parameter is ignored
3961 
3962    The AIJ format (also called the Yale sparse matrix format or
3963    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3964    storage.  The stored row and column indices begin with zero.
3965    See Users-Manual: ch_mat for details.
3966 
3967    The parallel matrix is partitioned such that the first m0 rows belong to
3968    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3969    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3970 
3971    The DIAGONAL portion of the local submatrix of a processor can be defined
3972    as the submatrix which is obtained by extraction the part corresponding to
3973    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3974    first row that belongs to the processor, r2 is the last row belonging to
3975    the this processor, and c1-c2 is range of indices of the local part of a
3976    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3977    common case of a square matrix, the row and column ranges are the same and
3978    the DIAGONAL part is also square. The remaining portion of the local
3979    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3980 
3981    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3982 
3983    You can call MatGetInfo() to get information on how effective the preallocation was;
3984    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3985    You can also run with the option -info and look for messages with the string
3986    malloc in them to see if additional memory allocation was needed.
3987 
3988    Example usage:
3989 
3990    Consider the following 8x8 matrix with 34 non-zero values, that is
3991    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3992    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3993    as follows:
3994 
3995 .vb
3996             1  2  0  |  0  3  0  |  0  4
3997     Proc0   0  5  6  |  7  0  0  |  8  0
3998             9  0 10  | 11  0  0  | 12  0
3999     -------------------------------------
4000            13  0 14  | 15 16 17  |  0  0
4001     Proc1   0 18  0  | 19 20 21  |  0  0
4002             0  0  0  | 22 23  0  | 24  0
4003     -------------------------------------
4004     Proc2  25 26 27  |  0  0 28  | 29  0
4005            30  0  0  | 31 32 33  |  0 34
4006 .ve
4007 
4008    This can be represented as a collection of submatrices as:
4009 
4010 .vb
4011       A B C
4012       D E F
4013       G H I
4014 .ve
4015 
4016    Where the submatrices A,B,C are owned by proc0, D,E,F are
4017    owned by proc1, G,H,I are owned by proc2.
4018 
4019    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4020    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4021    The 'M','N' parameters are 8,8, and have the same values on all procs.
4022 
4023    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4024    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4025    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4026    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4027    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4028    matrix, ans [DF] as another SeqAIJ matrix.
4029 
4030    When d_nz, o_nz parameters are specified, d_nz storage elements are
4031    allocated for every row of the local diagonal submatrix, and o_nz
4032    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4033    One way to choose d_nz and o_nz is to use the max nonzerors per local
4034    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4035    In this case, the values of d_nz,o_nz are:
4036 .vb
4037      proc0 : dnz = 2, o_nz = 2
4038      proc1 : dnz = 3, o_nz = 2
4039      proc2 : dnz = 1, o_nz = 4
4040 .ve
4041    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4042    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4043    for proc3. i.e we are using 12+15+10=37 storage locations to store
4044    34 values.
4045 
4046    When d_nnz, o_nnz parameters are specified, the storage is specified
4047    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4048    In the above case the values for d_nnz,o_nnz are:
4049 .vb
4050      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4051      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4052      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4053 .ve
4054    Here the space allocated is sum of all the above values i.e 34, and
4055    hence pre-allocation is perfect.
4056 
4057    Level: intermediate
4058 
4059 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4060           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4061 @*/
4062 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4063 {
4064   PetscFunctionBegin;
4065   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4066   PetscValidType(B,1);
4067   PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
4068   PetscFunctionReturn(0);
4069 }
4070 
4071 /*@
4072      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4073          CSR format for the local rows.
4074 
4075    Collective
4076 
4077    Input Parameters:
4078 +  comm - MPI communicator
4079 .  m - number of local rows (Cannot be PETSC_DECIDE)
4080 .  n - This value should be the same as the local size used in creating the
4081        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4082        calculated if N is given) For square matrices n is almost always m.
4083 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4084 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4085 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4086 .   j - column indices
4087 -   a - matrix values
4088 
4089    Output Parameter:
4090 .   mat - the matrix
4091 
4092    Level: intermediate
4093 
4094    Notes:
4095        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4096      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4097      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4098 
4099        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4100 
4101        The format which is used for the sparse matrix input, is equivalent to a
4102     row-major ordering.. i.e for the following matrix, the input data expected is
4103     as shown
4104 
4105        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4106 
4107 $        1 0 0
4108 $        2 0 3     P0
4109 $       -------
4110 $        4 5 6     P1
4111 $
4112 $     Process0 [P0]: rows_owned=[0,1]
4113 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4114 $        j =  {0,0,2}  [size = 3]
4115 $        v =  {1,2,3}  [size = 3]
4116 $
4117 $     Process1 [P1]: rows_owned=[2]
4118 $        i =  {0,3}    [size = nrow+1  = 1+1]
4119 $        j =  {0,1,2}  [size = 3]
4120 $        v =  {4,5,6}  [size = 3]
4121 
4122 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4123           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4124 @*/
4125 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4126 {
4127   PetscFunctionBegin;
4128   PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4129   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4130   PetscCall(MatCreate(comm,mat));
4131   PetscCall(MatSetSizes(*mat,m,n,M,N));
4132   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4133   PetscCall(MatSetType(*mat,MATMPIAIJ));
4134   PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a));
4135   PetscFunctionReturn(0);
4136 }
4137 
4138 /*@
4139      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4140          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4141 
4142    Collective
4143 
4144    Input Parameters:
4145 +  mat - the matrix
4146 .  m - number of local rows (Cannot be PETSC_DECIDE)
4147 .  n - This value should be the same as the local size used in creating the
4148        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4149        calculated if N is given) For square matrices n is almost always m.
4150 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4151 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4152 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4153 .  J - column indices
4154 -  v - matrix values
4155 
4156    Level: intermediate
4157 
4158 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4159           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4160 @*/
4161 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4162 {
4163   PetscInt       cstart,nnz,i,j;
4164   PetscInt       *ld;
4165   PetscBool      nooffprocentries;
4166   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4167   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4168   PetscScalar    *ad,*ao;
4169   const PetscInt *Adi = Ad->i;
4170   PetscInt       ldi,Iii,md;
4171 
4172   PetscFunctionBegin;
4173   PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4174   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4175   PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4176   PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4177 
4178   PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad));
4179   PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao));
4180   cstart = mat->cmap->rstart;
4181   if (!Aij->ld) {
4182     /* count number of entries below block diagonal */
4183     PetscCall(PetscCalloc1(m,&ld));
4184     Aij->ld = ld;
4185     for (i=0; i<m; i++) {
4186       nnz  = Ii[i+1]- Ii[i];
4187       j     = 0;
4188       while  (J[j] < cstart && j < nnz) {j++;}
4189       J    += nnz;
4190       ld[i] = j;
4191     }
4192   } else {
4193     ld = Aij->ld;
4194   }
4195 
4196   for (i=0; i<m; i++) {
4197     nnz  = Ii[i+1]- Ii[i];
4198     Iii  = Ii[i];
4199     ldi  = ld[i];
4200     md   = Adi[i+1]-Adi[i];
4201     PetscCall(PetscArraycpy(ao,v + Iii,ldi));
4202     PetscCall(PetscArraycpy(ad,v + Iii + ldi,md));
4203     PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md));
4204     ad  += md;
4205     ao  += nnz - md;
4206   }
4207   nooffprocentries      = mat->nooffprocentries;
4208   mat->nooffprocentries = PETSC_TRUE;
4209   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad));
4210   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao));
4211   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4212   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4213   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4214   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
4215   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
4216   mat->nooffprocentries = nooffprocentries;
4217   PetscFunctionReturn(0);
4218 }
4219 
4220 /*@C
4221    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4222    (the default parallel PETSc format).  For good matrix assembly performance
4223    the user should preallocate the matrix storage by setting the parameters
4224    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4225    performance can be increased by more than a factor of 50.
4226 
4227    Collective
4228 
4229    Input Parameters:
4230 +  comm - MPI communicator
4231 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4232            This value should be the same as the local size used in creating the
4233            y vector for the matrix-vector product y = Ax.
4234 .  n - This value should be the same as the local size used in creating the
4235        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4236        calculated if N is given) For square matrices n is almost always m.
4237 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4238 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4239 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4240            (same value is used for all local rows)
4241 .  d_nnz - array containing the number of nonzeros in the various rows of the
4242            DIAGONAL portion of the local submatrix (possibly different for each row)
4243            or NULL, if d_nz is used to specify the nonzero structure.
4244            The size of this array is equal to the number of local rows, i.e 'm'.
4245 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4246            submatrix (same value is used for all local rows).
4247 -  o_nnz - array containing the number of nonzeros in the various rows of the
4248            OFF-DIAGONAL portion of the local submatrix (possibly different for
4249            each row) or NULL, if o_nz is used to specify the nonzero
4250            structure. The size of this array is equal to the number
4251            of local rows, i.e 'm'.
4252 
4253    Output Parameter:
4254 .  A - the matrix
4255 
4256    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4257    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4258    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4259 
4260    Notes:
4261    If the *_nnz parameter is given then the *_nz parameter is ignored
4262 
4263    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4264    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4265    storage requirements for this matrix.
4266 
4267    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4268    processor than it must be used on all processors that share the object for
4269    that argument.
4270 
4271    The user MUST specify either the local or global matrix dimensions
4272    (possibly both).
4273 
4274    The parallel matrix is partitioned across processors such that the
4275    first m0 rows belong to process 0, the next m1 rows belong to
4276    process 1, the next m2 rows belong to process 2 etc.. where
4277    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4278    values corresponding to [m x N] submatrix.
4279 
4280    The columns are logically partitioned with the n0 columns belonging
4281    to 0th partition, the next n1 columns belonging to the next
4282    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4283 
4284    The DIAGONAL portion of the local submatrix on any given processor
4285    is the submatrix corresponding to the rows and columns m,n
4286    corresponding to the given processor. i.e diagonal matrix on
4287    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4288    etc. The remaining portion of the local submatrix [m x (N-n)]
4289    constitute the OFF-DIAGONAL portion. The example below better
4290    illustrates this concept.
4291 
4292    For a square global matrix we define each processor's diagonal portion
4293    to be its local rows and the corresponding columns (a square submatrix);
4294    each processor's off-diagonal portion encompasses the remainder of the
4295    local matrix (a rectangular submatrix).
4296 
4297    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4298 
4299    When calling this routine with a single process communicator, a matrix of
4300    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4301    type of communicator, use the construction mechanism
4302 .vb
4303      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4304 .ve
4305 
4306 $     MatCreate(...,&A);
4307 $     MatSetType(A,MATMPIAIJ);
4308 $     MatSetSizes(A, m,n,M,N);
4309 $     MatMPIAIJSetPreallocation(A,...);
4310 
4311    By default, this format uses inodes (identical nodes) when possible.
4312    We search for consecutive rows with the same nonzero structure, thereby
4313    reusing matrix information to achieve increased efficiency.
4314 
4315    Options Database Keys:
4316 +  -mat_no_inode  - Do not use inodes
4317 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4318 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
4319         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4320         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.
4321 
4322    Example usage:
4323 
4324    Consider the following 8x8 matrix with 34 non-zero values, that is
4325    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4326    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4327    as follows
4328 
4329 .vb
4330             1  2  0  |  0  3  0  |  0  4
4331     Proc0   0  5  6  |  7  0  0  |  8  0
4332             9  0 10  | 11  0  0  | 12  0
4333     -------------------------------------
4334            13  0 14  | 15 16 17  |  0  0
4335     Proc1   0 18  0  | 19 20 21  |  0  0
4336             0  0  0  | 22 23  0  | 24  0
4337     -------------------------------------
4338     Proc2  25 26 27  |  0  0 28  | 29  0
4339            30  0  0  | 31 32 33  |  0 34
4340 .ve
4341 
4342    This can be represented as a collection of submatrices as
4343 
4344 .vb
4345       A B C
4346       D E F
4347       G H I
4348 .ve
4349 
4350    Where the submatrices A,B,C are owned by proc0, D,E,F are
4351    owned by proc1, G,H,I are owned by proc2.
4352 
4353    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4354    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4355    The 'M','N' parameters are 8,8, and have the same values on all procs.
4356 
4357    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4358    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4359    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4360    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4361    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4362    matrix, ans [DF] as another SeqAIJ matrix.
4363 
4364    When d_nz, o_nz parameters are specified, d_nz storage elements are
4365    allocated for every row of the local diagonal submatrix, and o_nz
4366    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4367    One way to choose d_nz and o_nz is to use the max nonzerors per local
4368    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4369    In this case, the values of d_nz,o_nz are
4370 .vb
4371      proc0 : dnz = 2, o_nz = 2
4372      proc1 : dnz = 3, o_nz = 2
4373      proc2 : dnz = 1, o_nz = 4
4374 .ve
4375    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4376    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4377    for proc3. i.e we are using 12+15+10=37 storage locations to store
4378    34 values.
4379 
4380    When d_nnz, o_nnz parameters are specified, the storage is specified
4381    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4382    In the above case the values for d_nnz,o_nnz are
4383 .vb
4384      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4385      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4386      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4387 .ve
4388    Here the space allocated is sum of all the above values i.e 34, and
4389    hence pre-allocation is perfect.
4390 
4391    Level: intermediate
4392 
4393 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4394           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4395 @*/
4396 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4397 {
4398   PetscMPIInt    size;
4399 
4400   PetscFunctionBegin;
4401   PetscCall(MatCreate(comm,A));
4402   PetscCall(MatSetSizes(*A,m,n,M,N));
4403   PetscCallMPI(MPI_Comm_size(comm,&size));
4404   if (size > 1) {
4405     PetscCall(MatSetType(*A,MATMPIAIJ));
4406     PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz));
4407   } else {
4408     PetscCall(MatSetType(*A,MATSEQAIJ));
4409     PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz));
4410   }
4411   PetscFunctionReturn(0);
4412 }
4413 
4414 /*@C
4415   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4416 
4417   Not collective
4418 
4419   Input Parameter:
4420 . A - The MPIAIJ matrix
4421 
4422   Output Parameters:
4423 + Ad - The local diagonal block as a SeqAIJ matrix
4424 . Ao - The local off-diagonal block as a SeqAIJ matrix
4425 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4426 
4427   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4428   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4429   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4430   local column numbers to global column numbers in the original matrix.
4431 
4432   Level: intermediate
4433 
4434 .seealso: `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4435 @*/
4436 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4437 {
4438   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4439   PetscBool      flg;
4440 
4441   PetscFunctionBegin;
4442   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg));
4443   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4444   if (Ad)     *Ad     = a->A;
4445   if (Ao)     *Ao     = a->B;
4446   if (colmap) *colmap = a->garray;
4447   PetscFunctionReturn(0);
4448 }
4449 
4450 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4451 {
4452   PetscInt       m,N,i,rstart,nnz,Ii;
4453   PetscInt       *indx;
4454   PetscScalar    *values;
4455   MatType        rootType;
4456 
4457   PetscFunctionBegin;
4458   PetscCall(MatGetSize(inmat,&m,&N));
4459   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4460     PetscInt       *dnz,*onz,sum,bs,cbs;
4461 
4462     if (n == PETSC_DECIDE) {
4463       PetscCall(PetscSplitOwnership(comm,&n,&N));
4464     }
4465     /* Check sum(n) = N */
4466     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
4467     PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
4468 
4469     PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm));
4470     rstart -= m;
4471 
4472     MatPreallocateBegin(comm,m,n,dnz,onz);
4473     for (i=0; i<m; i++) {
4474       PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4475       PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz));
4476       PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4477     }
4478 
4479     PetscCall(MatCreate(comm,outmat));
4480     PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4481     PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
4482     PetscCall(MatSetBlockSizes(*outmat,bs,cbs));
4483     PetscCall(MatGetRootType_Private(inmat,&rootType));
4484     PetscCall(MatSetType(*outmat,rootType));
4485     PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz));
4486     PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz));
4487     MatPreallocateEnd(dnz,onz);
4488     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
4489   }
4490 
4491   /* numeric phase */
4492   PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL));
4493   for (i=0; i<m; i++) {
4494     PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4495     Ii   = i + rstart;
4496     PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
4497     PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4498   }
4499   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
4500   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
4501   PetscFunctionReturn(0);
4502 }
4503 
4504 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4505 {
4506   PetscMPIInt       rank;
4507   PetscInt          m,N,i,rstart,nnz;
4508   size_t            len;
4509   const PetscInt    *indx;
4510   PetscViewer       out;
4511   char              *name;
4512   Mat               B;
4513   const PetscScalar *values;
4514 
4515   PetscFunctionBegin;
4516   PetscCall(MatGetLocalSize(A,&m,NULL));
4517   PetscCall(MatGetSize(A,NULL,&N));
4518   /* Should this be the type of the diagonal block of A? */
4519   PetscCall(MatCreate(PETSC_COMM_SELF,&B));
4520   PetscCall(MatSetSizes(B,m,N,m,N));
4521   PetscCall(MatSetBlockSizesFromMats(B,A,A));
4522   PetscCall(MatSetType(B,MATSEQAIJ));
4523   PetscCall(MatSeqAIJSetPreallocation(B,0,NULL));
4524   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
4525   for (i=0; i<m; i++) {
4526     PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values));
4527     PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES));
4528     PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values));
4529   }
4530   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
4531   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
4532 
4533   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
4534   PetscCall(PetscStrlen(outfile,&len));
4535   PetscCall(PetscMalloc1(len+6,&name));
4536   PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank));
4537   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out));
4538   PetscCall(PetscFree(name));
4539   PetscCall(MatView(B,out));
4540   PetscCall(PetscViewerDestroy(&out));
4541   PetscCall(MatDestroy(&B));
4542   PetscFunctionReturn(0);
4543 }
4544 
4545 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4546 {
4547   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4548 
4549   PetscFunctionBegin;
4550   if (!merge) PetscFunctionReturn(0);
4551   PetscCall(PetscFree(merge->id_r));
4552   PetscCall(PetscFree(merge->len_s));
4553   PetscCall(PetscFree(merge->len_r));
4554   PetscCall(PetscFree(merge->bi));
4555   PetscCall(PetscFree(merge->bj));
4556   PetscCall(PetscFree(merge->buf_ri[0]));
4557   PetscCall(PetscFree(merge->buf_ri));
4558   PetscCall(PetscFree(merge->buf_rj[0]));
4559   PetscCall(PetscFree(merge->buf_rj));
4560   PetscCall(PetscFree(merge->coi));
4561   PetscCall(PetscFree(merge->coj));
4562   PetscCall(PetscFree(merge->owners_co));
4563   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4564   PetscCall(PetscFree(merge));
4565   PetscFunctionReturn(0);
4566 }
4567 
4568 #include <../src/mat/utils/freespace.h>
4569 #include <petscbt.h>
4570 
4571 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4572 {
4573   MPI_Comm            comm;
4574   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4575   PetscMPIInt         size,rank,taga,*len_s;
4576   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4577   PetscInt            proc,m;
4578   PetscInt            **buf_ri,**buf_rj;
4579   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4580   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4581   MPI_Request         *s_waits,*r_waits;
4582   MPI_Status          *status;
4583   const MatScalar     *aa,*a_a;
4584   MatScalar           **abuf_r,*ba_i;
4585   Mat_Merge_SeqsToMPI *merge;
4586   PetscContainer      container;
4587 
4588   PetscFunctionBegin;
4589   PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm));
4590   PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0));
4591 
4592   PetscCallMPI(MPI_Comm_size(comm,&size));
4593   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4594 
4595   PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container));
4596   PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4597   PetscCall(PetscContainerGetPointer(container,(void**)&merge));
4598   PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a));
4599   aa   = a_a;
4600 
4601   bi     = merge->bi;
4602   bj     = merge->bj;
4603   buf_ri = merge->buf_ri;
4604   buf_rj = merge->buf_rj;
4605 
4606   PetscCall(PetscMalloc1(size,&status));
4607   owners = merge->rowmap->range;
4608   len_s  = merge->len_s;
4609 
4610   /* send and recv matrix values */
4611   /*-----------------------------*/
4612   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga));
4613   PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits));
4614 
4615   PetscCall(PetscMalloc1(merge->nsend+1,&s_waits));
4616   for (proc=0,k=0; proc<size; proc++) {
4617     if (!len_s[proc]) continue;
4618     i    = owners[proc];
4619     PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k));
4620     k++;
4621   }
4622 
4623   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status));
4624   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status));
4625   PetscCall(PetscFree(status));
4626 
4627   PetscCall(PetscFree(s_waits));
4628   PetscCall(PetscFree(r_waits));
4629 
4630   /* insert mat values of mpimat */
4631   /*----------------------------*/
4632   PetscCall(PetscMalloc1(N,&ba_i));
4633   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4634 
4635   for (k=0; k<merge->nrecv; k++) {
4636     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4637     nrows       = *(buf_ri_k[k]);
4638     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4639     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4640   }
4641 
4642   /* set values of ba */
4643   m    = merge->rowmap->n;
4644   for (i=0; i<m; i++) {
4645     arow = owners[rank] + i;
4646     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4647     bnzi = bi[i+1] - bi[i];
4648     PetscCall(PetscArrayzero(ba_i,bnzi));
4649 
4650     /* add local non-zero vals of this proc's seqmat into ba */
4651     anzi   = ai[arow+1] - ai[arow];
4652     aj     = a->j + ai[arow];
4653     aa     = a_a + ai[arow];
4654     nextaj = 0;
4655     for (j=0; nextaj<anzi; j++) {
4656       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4657         ba_i[j] += aa[nextaj++];
4658       }
4659     }
4660 
4661     /* add received vals into ba */
4662     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4663       /* i-th row */
4664       if (i == *nextrow[k]) {
4665         anzi   = *(nextai[k]+1) - *nextai[k];
4666         aj     = buf_rj[k] + *(nextai[k]);
4667         aa     = abuf_r[k] + *(nextai[k]);
4668         nextaj = 0;
4669         for (j=0; nextaj<anzi; j++) {
4670           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4671             ba_i[j] += aa[nextaj++];
4672           }
4673         }
4674         nextrow[k]++; nextai[k]++;
4675       }
4676     }
4677     PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES));
4678   }
4679   PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a));
4680   PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY));
4681   PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY));
4682 
4683   PetscCall(PetscFree(abuf_r[0]));
4684   PetscCall(PetscFree(abuf_r));
4685   PetscCall(PetscFree(ba_i));
4686   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4687   PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0));
4688   PetscFunctionReturn(0);
4689 }
4690 
4691 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4692 {
4693   Mat                 B_mpi;
4694   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4695   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4696   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4697   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4698   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4699   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4700   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4701   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4702   MPI_Status          *status;
4703   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4704   PetscBT             lnkbt;
4705   Mat_Merge_SeqsToMPI *merge;
4706   PetscContainer      container;
4707 
4708   PetscFunctionBegin;
4709   PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0));
4710 
4711   /* make sure it is a PETSc comm */
4712   PetscCall(PetscCommDuplicate(comm,&comm,NULL));
4713   PetscCallMPI(MPI_Comm_size(comm,&size));
4714   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4715 
4716   PetscCall(PetscNew(&merge));
4717   PetscCall(PetscMalloc1(size,&status));
4718 
4719   /* determine row ownership */
4720   /*---------------------------------------------------------*/
4721   PetscCall(PetscLayoutCreate(comm,&merge->rowmap));
4722   PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m));
4723   PetscCall(PetscLayoutSetSize(merge->rowmap,M));
4724   PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1));
4725   PetscCall(PetscLayoutSetUp(merge->rowmap));
4726   PetscCall(PetscMalloc1(size,&len_si));
4727   PetscCall(PetscMalloc1(size,&merge->len_s));
4728 
4729   m      = merge->rowmap->n;
4730   owners = merge->rowmap->range;
4731 
4732   /* determine the number of messages to send, their lengths */
4733   /*---------------------------------------------------------*/
4734   len_s = merge->len_s;
4735 
4736   len          = 0; /* length of buf_si[] */
4737   merge->nsend = 0;
4738   for (proc=0; proc<size; proc++) {
4739     len_si[proc] = 0;
4740     if (proc == rank) {
4741       len_s[proc] = 0;
4742     } else {
4743       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4744       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4745     }
4746     if (len_s[proc]) {
4747       merge->nsend++;
4748       nrows = 0;
4749       for (i=owners[proc]; i<owners[proc+1]; i++) {
4750         if (ai[i+1] > ai[i]) nrows++;
4751       }
4752       len_si[proc] = 2*(nrows+1);
4753       len         += len_si[proc];
4754     }
4755   }
4756 
4757   /* determine the number and length of messages to receive for ij-structure */
4758   /*-------------------------------------------------------------------------*/
4759   PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv));
4760   PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri));
4761 
4762   /* post the Irecv of j-structure */
4763   /*-------------------------------*/
4764   PetscCall(PetscCommGetNewTag(comm,&tagj));
4765   PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits));
4766 
4767   /* post the Isend of j-structure */
4768   /*--------------------------------*/
4769   PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits));
4770 
4771   for (proc=0, k=0; proc<size; proc++) {
4772     if (!len_s[proc]) continue;
4773     i    = owners[proc];
4774     PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k));
4775     k++;
4776   }
4777 
4778   /* receives and sends of j-structure are complete */
4779   /*------------------------------------------------*/
4780   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status));
4781   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status));
4782 
4783   /* send and recv i-structure */
4784   /*---------------------------*/
4785   PetscCall(PetscCommGetNewTag(comm,&tagi));
4786   PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits));
4787 
4788   PetscCall(PetscMalloc1(len+1,&buf_s));
4789   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4790   for (proc=0,k=0; proc<size; proc++) {
4791     if (!len_s[proc]) continue;
4792     /* form outgoing message for i-structure:
4793          buf_si[0]:                 nrows to be sent
4794                [1:nrows]:           row index (global)
4795                [nrows+1:2*nrows+1]: i-structure index
4796     */
4797     /*-------------------------------------------*/
4798     nrows       = len_si[proc]/2 - 1;
4799     buf_si_i    = buf_si + nrows+1;
4800     buf_si[0]   = nrows;
4801     buf_si_i[0] = 0;
4802     nrows       = 0;
4803     for (i=owners[proc]; i<owners[proc+1]; i++) {
4804       anzi = ai[i+1] - ai[i];
4805       if (anzi) {
4806         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4807         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4808         nrows++;
4809       }
4810     }
4811     PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k));
4812     k++;
4813     buf_si += len_si[proc];
4814   }
4815 
4816   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status));
4817   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status));
4818 
4819   PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv));
4820   for (i=0; i<merge->nrecv; i++) {
4821     PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));
4822   }
4823 
4824   PetscCall(PetscFree(len_si));
4825   PetscCall(PetscFree(len_ri));
4826   PetscCall(PetscFree(rj_waits));
4827   PetscCall(PetscFree2(si_waits,sj_waits));
4828   PetscCall(PetscFree(ri_waits));
4829   PetscCall(PetscFree(buf_s));
4830   PetscCall(PetscFree(status));
4831 
4832   /* compute a local seq matrix in each processor */
4833   /*----------------------------------------------*/
4834   /* allocate bi array and free space for accumulating nonzero column info */
4835   PetscCall(PetscMalloc1(m+1,&bi));
4836   bi[0] = 0;
4837 
4838   /* create and initialize a linked list */
4839   nlnk = N+1;
4840   PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt));
4841 
4842   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4843   len  = ai[owners[rank+1]] - ai[owners[rank]];
4844   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space));
4845 
4846   current_space = free_space;
4847 
4848   /* determine symbolic info for each local row */
4849   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4850 
4851   for (k=0; k<merge->nrecv; k++) {
4852     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4853     nrows       = *buf_ri_k[k];
4854     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4855     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4856   }
4857 
4858   MatPreallocateBegin(comm,m,n,dnz,onz);
4859   len  = 0;
4860   for (i=0; i<m; i++) {
4861     bnzi = 0;
4862     /* add local non-zero cols of this proc's seqmat into lnk */
4863     arow  = owners[rank] + i;
4864     anzi  = ai[arow+1] - ai[arow];
4865     aj    = a->j + ai[arow];
4866     PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4867     bnzi += nlnk;
4868     /* add received col data into lnk */
4869     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4870       if (i == *nextrow[k]) { /* i-th row */
4871         anzi  = *(nextai[k]+1) - *nextai[k];
4872         aj    = buf_rj[k] + *nextai[k];
4873         PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4874         bnzi += nlnk;
4875         nextrow[k]++; nextai[k]++;
4876       }
4877     }
4878     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4879 
4880     /* if free space is not available, make more free space */
4881     if (current_space->local_remaining<bnzi) {
4882       PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space));
4883       nspacedouble++;
4884     }
4885     /* copy data into free space, then initialize lnk */
4886     PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt));
4887     PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz));
4888 
4889     current_space->array           += bnzi;
4890     current_space->local_used      += bnzi;
4891     current_space->local_remaining -= bnzi;
4892 
4893     bi[i+1] = bi[i] + bnzi;
4894   }
4895 
4896   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4897 
4898   PetscCall(PetscMalloc1(bi[m]+1,&bj));
4899   PetscCall(PetscFreeSpaceContiguous(&free_space,bj));
4900   PetscCall(PetscLLDestroy(lnk,lnkbt));
4901 
4902   /* create symbolic parallel matrix B_mpi */
4903   /*---------------------------------------*/
4904   PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs));
4905   PetscCall(MatCreate(comm,&B_mpi));
4906   if (n==PETSC_DECIDE) {
4907     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N));
4908   } else {
4909     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4910   }
4911   PetscCall(MatSetBlockSizes(B_mpi,bs,cbs));
4912   PetscCall(MatSetType(B_mpi,MATMPIAIJ));
4913   PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz));
4914   MatPreallocateEnd(dnz,onz);
4915   PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE));
4916 
4917   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4918   B_mpi->assembled  = PETSC_FALSE;
4919   merge->bi         = bi;
4920   merge->bj         = bj;
4921   merge->buf_ri     = buf_ri;
4922   merge->buf_rj     = buf_rj;
4923   merge->coi        = NULL;
4924   merge->coj        = NULL;
4925   merge->owners_co  = NULL;
4926 
4927   PetscCall(PetscCommDestroy(&comm));
4928 
4929   /* attach the supporting struct to B_mpi for reuse */
4930   PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container));
4931   PetscCall(PetscContainerSetPointer(container,merge));
4932   PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI));
4933   PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container));
4934   PetscCall(PetscContainerDestroy(&container));
4935   *mpimat = B_mpi;
4936 
4937   PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0));
4938   PetscFunctionReturn(0);
4939 }
4940 
4941 /*@C
4942       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4943                  matrices from each processor
4944 
4945     Collective
4946 
4947    Input Parameters:
4948 +    comm - the communicators the parallel matrix will live on
4949 .    seqmat - the input sequential matrices
4950 .    m - number of local rows (or PETSC_DECIDE)
4951 .    n - number of local columns (or PETSC_DECIDE)
4952 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4953 
4954    Output Parameter:
4955 .    mpimat - the parallel matrix generated
4956 
4957     Level: advanced
4958 
4959    Notes:
4960      The dimensions of the sequential matrix in each processor MUST be the same.
4961      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4962      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4963 @*/
4964 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4965 {
4966   PetscMPIInt    size;
4967 
4968   PetscFunctionBegin;
4969   PetscCallMPI(MPI_Comm_size(comm,&size));
4970   if (size == 1) {
4971     PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
4972     if (scall == MAT_INITIAL_MATRIX) {
4973       PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat));
4974     } else {
4975       PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN));
4976     }
4977     PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
4978     PetscFunctionReturn(0);
4979   }
4980   PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
4981   if (scall == MAT_INITIAL_MATRIX) {
4982     PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat));
4983   }
4984   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat));
4985   PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
4986   PetscFunctionReturn(0);
4987 }
4988 
4989 /*@
4990      MatAIJGetLocalMat - Creates a SeqAIJ from a MATAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4991           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4992           with MatGetSize()
4993 
4994     Not Collective
4995 
4996    Input Parameters:
4997 +    A - the matrix
4998 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4999 
5000    Output Parameter:
5001 .    A_loc - the local sequential matrix generated
5002 
5003     Level: developer
5004 
5005    Notes:
5006      In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix.
5007 
5008      Destroy the matrix with MatDestroy()
5009 
5010 .seealso: MatMPIAIJGetLocalMat()
5011 
5012 @*/
5013 PetscErrorCode MatAIJGetLocalMat(Mat A,Mat *A_loc)
5014 {
5015   PetscBool      mpi;
5016 
5017   PetscFunctionBegin;
5018   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&mpi));
5019   if (mpi) {
5020     PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,A_loc));
5021   } else {
5022     *A_loc = A;
5023     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5024   }
5025   PetscFunctionReturn(0);
5026 }
5027 
5028 /*@
5029      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5030           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5031           with MatGetSize()
5032 
5033     Not Collective
5034 
5035    Input Parameters:
5036 +    A - the matrix
5037 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5038 
5039    Output Parameter:
5040 .    A_loc - the local sequential matrix generated
5041 
5042     Level: developer
5043 
5044    Notes:
5045      In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix.
5046 
5047      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5048      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5049      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5050      modify the values of the returned A_loc.
5051 
5052 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5053 @*/
5054 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5055 {
5056   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5057   Mat_SeqAIJ        *mat,*a,*b;
5058   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5059   const PetscScalar *aa,*ba,*aav,*bav;
5060   PetscScalar       *ca,*cam;
5061   PetscMPIInt       size;
5062   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5063   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5064   PetscBool         match;
5065 
5066   PetscFunctionBegin;
5067   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match));
5068   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5069   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5070   if (size == 1) {
5071     if (scall == MAT_INITIAL_MATRIX) {
5072       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5073       *A_loc = mpimat->A;
5074     } else if (scall == MAT_REUSE_MATRIX) {
5075       PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN));
5076     }
5077     PetscFunctionReturn(0);
5078   }
5079 
5080   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5081   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5082   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5083   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5084   PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav));
5085   PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav));
5086   aa   = aav;
5087   ba   = bav;
5088   if (scall == MAT_INITIAL_MATRIX) {
5089     PetscCall(PetscMalloc1(1+am,&ci));
5090     ci[0] = 0;
5091     for (i=0; i<am; i++) {
5092       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5093     }
5094     PetscCall(PetscMalloc1(1+ci[am],&cj));
5095     PetscCall(PetscMalloc1(1+ci[am],&ca));
5096     k    = 0;
5097     for (i=0; i<am; i++) {
5098       ncols_o = bi[i+1] - bi[i];
5099       ncols_d = ai[i+1] - ai[i];
5100       /* off-diagonal portion of A */
5101       for (jo=0; jo<ncols_o; jo++) {
5102         col = cmap[*bj];
5103         if (col >= cstart) break;
5104         cj[k]   = col; bj++;
5105         ca[k++] = *ba++;
5106       }
5107       /* diagonal portion of A */
5108       for (j=0; j<ncols_d; j++) {
5109         cj[k]   = cstart + *aj++;
5110         ca[k++] = *aa++;
5111       }
5112       /* off-diagonal portion of A */
5113       for (j=jo; j<ncols_o; j++) {
5114         cj[k]   = cmap[*bj++];
5115         ca[k++] = *ba++;
5116       }
5117     }
5118     /* put together the new matrix */
5119     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc));
5120     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5121     /* Since these are PETSc arrays, change flags to free them as necessary. */
5122     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5123     mat->free_a  = PETSC_TRUE;
5124     mat->free_ij = PETSC_TRUE;
5125     mat->nonew   = 0;
5126   } else if (scall == MAT_REUSE_MATRIX) {
5127     mat  =(Mat_SeqAIJ*)(*A_loc)->data;
5128     ci   = mat->i;
5129     cj   = mat->j;
5130     PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam));
5131     for (i=0; i<am; i++) {
5132       /* off-diagonal portion of A */
5133       ncols_o = bi[i+1] - bi[i];
5134       for (jo=0; jo<ncols_o; jo++) {
5135         col = cmap[*bj];
5136         if (col >= cstart) break;
5137         *cam++ = *ba++; bj++;
5138       }
5139       /* diagonal portion of A */
5140       ncols_d = ai[i+1] - ai[i];
5141       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5142       /* off-diagonal portion of A */
5143       for (j=jo; j<ncols_o; j++) {
5144         *cam++ = *ba++; bj++;
5145       }
5146     }
5147     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam));
5148   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5149   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav));
5150   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav));
5151   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5152   PetscFunctionReturn(0);
5153 }
5154 
5155 /*@
5156      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5157           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5158 
5159     Not Collective
5160 
5161    Input Parameters:
5162 +    A - the matrix
5163 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5164 
5165    Output Parameters:
5166 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5167 -    A_loc - the local sequential matrix generated
5168 
5169     Level: developer
5170 
5171    Notes:
5172      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5173 
5174 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5175 
5176 @*/
5177 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5178 {
5179   Mat            Ao,Ad;
5180   const PetscInt *cmap;
5181   PetscMPIInt    size;
5182   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5183 
5184   PetscFunctionBegin;
5185   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap));
5186   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5187   if (size == 1) {
5188     if (scall == MAT_INITIAL_MATRIX) {
5189       PetscCall(PetscObjectReference((PetscObject)Ad));
5190       *A_loc = Ad;
5191     } else if (scall == MAT_REUSE_MATRIX) {
5192       PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN));
5193     }
5194     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob));
5195     PetscFunctionReturn(0);
5196   }
5197   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f));
5198   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5199   if (f) {
5200     PetscCall((*f)(A,scall,glob,A_loc));
5201   } else {
5202     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5203     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5204     Mat_SeqAIJ        *c;
5205     PetscInt          *ai = a->i, *aj = a->j;
5206     PetscInt          *bi = b->i, *bj = b->j;
5207     PetscInt          *ci,*cj;
5208     const PetscScalar *aa,*ba;
5209     PetscScalar       *ca;
5210     PetscInt          i,j,am,dn,on;
5211 
5212     PetscCall(MatGetLocalSize(Ad,&am,&dn));
5213     PetscCall(MatGetLocalSize(Ao,NULL,&on));
5214     PetscCall(MatSeqAIJGetArrayRead(Ad,&aa));
5215     PetscCall(MatSeqAIJGetArrayRead(Ao,&ba));
5216     if (scall == MAT_INITIAL_MATRIX) {
5217       PetscInt k;
5218       PetscCall(PetscMalloc1(1+am,&ci));
5219       PetscCall(PetscMalloc1(ai[am]+bi[am],&cj));
5220       PetscCall(PetscMalloc1(ai[am]+bi[am],&ca));
5221       ci[0] = 0;
5222       for (i=0,k=0; i<am; i++) {
5223         const PetscInt ncols_o = bi[i+1] - bi[i];
5224         const PetscInt ncols_d = ai[i+1] - ai[i];
5225         ci[i+1] = ci[i] + ncols_o + ncols_d;
5226         /* diagonal portion of A */
5227         for (j=0; j<ncols_d; j++,k++) {
5228           cj[k] = *aj++;
5229           ca[k] = *aa++;
5230         }
5231         /* off-diagonal portion of A */
5232         for (j=0; j<ncols_o; j++,k++) {
5233           cj[k] = dn + *bj++;
5234           ca[k] = *ba++;
5235         }
5236       }
5237       /* put together the new matrix */
5238       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc));
5239       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5240       /* Since these are PETSc arrays, change flags to free them as necessary. */
5241       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5242       c->free_a  = PETSC_TRUE;
5243       c->free_ij = PETSC_TRUE;
5244       c->nonew   = 0;
5245       PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name));
5246     } else if (scall == MAT_REUSE_MATRIX) {
5247       PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca));
5248       for (i=0; i<am; i++) {
5249         const PetscInt ncols_d = ai[i+1] - ai[i];
5250         const PetscInt ncols_o = bi[i+1] - bi[i];
5251         /* diagonal portion of A */
5252         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5253         /* off-diagonal portion of A */
5254         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5255       }
5256       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca));
5257     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5258     PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa));
5259     PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa));
5260     if (glob) {
5261       PetscInt cst, *gidx;
5262 
5263       PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL));
5264       PetscCall(PetscMalloc1(dn+on,&gidx));
5265       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5266       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5267       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob));
5268     }
5269   }
5270   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5271   PetscFunctionReturn(0);
5272 }
5273 
5274 /*@C
5275      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5276 
5277     Not Collective
5278 
5279    Input Parameters:
5280 +    A - the matrix
5281 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5282 -    row, col - index sets of rows and columns to extract (or NULL)
5283 
5284    Output Parameter:
5285 .    A_loc - the local sequential matrix generated
5286 
5287     Level: developer
5288 
5289 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5290 
5291 @*/
5292 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5293 {
5294   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5295   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5296   IS             isrowa,iscola;
5297   Mat            *aloc;
5298   PetscBool      match;
5299 
5300   PetscFunctionBegin;
5301   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match));
5302   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5303   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0));
5304   if (!row) {
5305     start = A->rmap->rstart; end = A->rmap->rend;
5306     PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa));
5307   } else {
5308     isrowa = *row;
5309   }
5310   if (!col) {
5311     start = A->cmap->rstart;
5312     cmap  = a->garray;
5313     nzA   = a->A->cmap->n;
5314     nzB   = a->B->cmap->n;
5315     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5316     ncols = 0;
5317     for (i=0; i<nzB; i++) {
5318       if (cmap[i] < start) idx[ncols++] = cmap[i];
5319       else break;
5320     }
5321     imark = i;
5322     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5323     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5324     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola));
5325   } else {
5326     iscola = *col;
5327   }
5328   if (scall != MAT_INITIAL_MATRIX) {
5329     PetscCall(PetscMalloc1(1,&aloc));
5330     aloc[0] = *A_loc;
5331   }
5332   PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc));
5333   if (!col) { /* attach global id of condensed columns */
5334     PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola));
5335   }
5336   *A_loc = aloc[0];
5337   PetscCall(PetscFree(aloc));
5338   if (!row) {
5339     PetscCall(ISDestroy(&isrowa));
5340   }
5341   if (!col) {
5342     PetscCall(ISDestroy(&iscola));
5343   }
5344   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0));
5345   PetscFunctionReturn(0);
5346 }
5347 
5348 /*
5349  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5350  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5351  * on a global size.
5352  * */
5353 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5354 {
5355   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5356   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5357   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5358   PetscMPIInt              owner;
5359   PetscSFNode              *iremote,*oiremote;
5360   const PetscInt           *lrowindices;
5361   PetscSF                  sf,osf;
5362   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5363   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5364   MPI_Comm                 comm;
5365   ISLocalToGlobalMapping   mapping;
5366   const PetscScalar        *pd_a,*po_a;
5367 
5368   PetscFunctionBegin;
5369   PetscCall(PetscObjectGetComm((PetscObject)P,&comm));
5370   /* plocalsize is the number of roots
5371    * nrows is the number of leaves
5372    * */
5373   PetscCall(MatGetLocalSize(P,&plocalsize,NULL));
5374   PetscCall(ISGetLocalSize(rows,&nrows));
5375   PetscCall(PetscCalloc1(nrows,&iremote));
5376   PetscCall(ISGetIndices(rows,&lrowindices));
5377   for (i=0;i<nrows;i++) {
5378     /* Find a remote index and an owner for a row
5379      * The row could be local or remote
5380      * */
5381     owner = 0;
5382     lidx  = 0;
5383     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx));
5384     iremote[i].index = lidx;
5385     iremote[i].rank  = owner;
5386   }
5387   /* Create SF to communicate how many nonzero columns for each row */
5388   PetscCall(PetscSFCreate(comm,&sf));
5389   /* SF will figure out the number of nonzero colunms for each row, and their
5390    * offsets
5391    * */
5392   PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5393   PetscCall(PetscSFSetFromOptions(sf));
5394   PetscCall(PetscSFSetUp(sf));
5395 
5396   PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets));
5397   PetscCall(PetscCalloc1(2*plocalsize,&nrcols));
5398   PetscCall(PetscCalloc1(nrows,&pnnz));
5399   roffsets[0] = 0;
5400   roffsets[1] = 0;
5401   for (i=0;i<plocalsize;i++) {
5402     /* diag */
5403     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5404     /* off diag */
5405     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5406     /* compute offsets so that we relative location for each row */
5407     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5408     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5409   }
5410   PetscCall(PetscCalloc1(2*nrows,&nlcols));
5411   PetscCall(PetscCalloc1(2*nrows,&loffsets));
5412   /* 'r' means root, and 'l' means leaf */
5413   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5414   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5415   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5416   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5417   PetscCall(PetscSFDestroy(&sf));
5418   PetscCall(PetscFree(roffsets));
5419   PetscCall(PetscFree(nrcols));
5420   dntotalcols = 0;
5421   ontotalcols = 0;
5422   ncol = 0;
5423   for (i=0;i<nrows;i++) {
5424     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5425     ncol = PetscMax(pnnz[i],ncol);
5426     /* diag */
5427     dntotalcols += nlcols[i*2+0];
5428     /* off diag */
5429     ontotalcols += nlcols[i*2+1];
5430   }
5431   /* We do not need to figure the right number of columns
5432    * since all the calculations will be done by going through the raw data
5433    * */
5434   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth));
5435   PetscCall(MatSetUp(*P_oth));
5436   PetscCall(PetscFree(pnnz));
5437   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5438   /* diag */
5439   PetscCall(PetscCalloc1(dntotalcols,&iremote));
5440   /* off diag */
5441   PetscCall(PetscCalloc1(ontotalcols,&oiremote));
5442   /* diag */
5443   PetscCall(PetscCalloc1(dntotalcols,&ilocal));
5444   /* off diag */
5445   PetscCall(PetscCalloc1(ontotalcols,&oilocal));
5446   dntotalcols = 0;
5447   ontotalcols = 0;
5448   ntotalcols  = 0;
5449   for (i=0;i<nrows;i++) {
5450     owner = 0;
5451     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL));
5452     /* Set iremote for diag matrix */
5453     for (j=0;j<nlcols[i*2+0];j++) {
5454       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5455       iremote[dntotalcols].rank    = owner;
5456       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5457       ilocal[dntotalcols++]        = ntotalcols++;
5458     }
5459     /* off diag */
5460     for (j=0;j<nlcols[i*2+1];j++) {
5461       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5462       oiremote[ontotalcols].rank    = owner;
5463       oilocal[ontotalcols++]        = ntotalcols++;
5464     }
5465   }
5466   PetscCall(ISRestoreIndices(rows,&lrowindices));
5467   PetscCall(PetscFree(loffsets));
5468   PetscCall(PetscFree(nlcols));
5469   PetscCall(PetscSFCreate(comm,&sf));
5470   /* P serves as roots and P_oth is leaves
5471    * Diag matrix
5472    * */
5473   PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5474   PetscCall(PetscSFSetFromOptions(sf));
5475   PetscCall(PetscSFSetUp(sf));
5476 
5477   PetscCall(PetscSFCreate(comm,&osf));
5478   /* Off diag */
5479   PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER));
5480   PetscCall(PetscSFSetFromOptions(osf));
5481   PetscCall(PetscSFSetUp(osf));
5482   PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5483   PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5484   /* We operate on the matrix internal data for saving memory */
5485   PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5486   PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5487   PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL));
5488   /* Convert to global indices for diag matrix */
5489   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5490   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5491   /* We want P_oth store global indices */
5492   PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping));
5493   /* Use memory scalable approach */
5494   PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH));
5495   PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j));
5496   PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5497   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5498   /* Convert back to local indices */
5499   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5500   PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5501   nout = 0;
5502   PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j));
5503   PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout);
5504   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5505   /* Exchange values */
5506   PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5507   PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5508   PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5509   PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5510   /* Stop PETSc from shrinking memory */
5511   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5512   PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY));
5513   PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY));
5514   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5515   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf));
5516   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf));
5517   PetscCall(PetscSFDestroy(&sf));
5518   PetscCall(PetscSFDestroy(&osf));
5519   PetscFunctionReturn(0);
5520 }
5521 
5522 /*
5523  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5524  * This supports MPIAIJ and MAIJ
5525  * */
5526 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5527 {
5528   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5529   Mat_SeqAIJ            *p_oth;
5530   IS                    rows,map;
5531   PetscHMapI            hamp;
5532   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5533   MPI_Comm              comm;
5534   PetscSF               sf,osf;
5535   PetscBool             has;
5536 
5537   PetscFunctionBegin;
5538   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5539   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0));
5540   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5541    *  and then create a submatrix (that often is an overlapping matrix)
5542    * */
5543   if (reuse == MAT_INITIAL_MATRIX) {
5544     /* Use a hash table to figure out unique keys */
5545     PetscCall(PetscHMapICreate(&hamp));
5546     PetscCall(PetscHMapIResize(hamp,a->B->cmap->n));
5547     PetscCall(PetscCalloc1(a->B->cmap->n,&mapping));
5548     count = 0;
5549     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5550     for (i=0;i<a->B->cmap->n;i++) {
5551       key  = a->garray[i]/dof;
5552       PetscCall(PetscHMapIHas(hamp,key,&has));
5553       if (!has) {
5554         mapping[i] = count;
5555         PetscCall(PetscHMapISet(hamp,key,count++));
5556       } else {
5557         /* Current 'i' has the same value the previous step */
5558         mapping[i] = count-1;
5559       }
5560     }
5561     PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map));
5562     PetscCall(PetscHMapIGetSize(hamp,&htsize));
5563     PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count);
5564     PetscCall(PetscCalloc1(htsize,&rowindices));
5565     off = 0;
5566     PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices));
5567     PetscCall(PetscHMapIDestroy(&hamp));
5568     PetscCall(PetscSortInt(htsize,rowindices));
5569     PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows));
5570     /* In case, the matrix was already created but users want to recreate the matrix */
5571     PetscCall(MatDestroy(P_oth));
5572     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth));
5573     PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map));
5574     PetscCall(ISDestroy(&map));
5575     PetscCall(ISDestroy(&rows));
5576   } else if (reuse == MAT_REUSE_MATRIX) {
5577     /* If matrix was already created, we simply update values using SF objects
5578      * that as attached to the matrix ealier.
5579      */
5580     const PetscScalar *pd_a,*po_a;
5581 
5582     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf));
5583     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf));
5584     PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5585     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5586     /* Update values in place */
5587     PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5588     PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5589     PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5590     PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5591     PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5592     PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5593     PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5594     PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5595   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5596   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0));
5597   PetscFunctionReturn(0);
5598 }
5599 
5600 /*@C
5601   MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5602 
5603   Collective on Mat
5604 
5605   Input Parameters:
5606 + A - the first matrix in mpiaij format
5607 . B - the second matrix in mpiaij format
5608 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5609 
5610   Output Parameters:
5611 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5612 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5613 - B_seq - the sequential matrix generated
5614 
5615   Level: developer
5616 
5617 @*/
5618 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5619 {
5620   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5621   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5622   IS             isrowb,iscolb;
5623   Mat            *bseq=NULL;
5624 
5625   PetscFunctionBegin;
5626   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5627     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5628   }
5629   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0));
5630 
5631   if (scall == MAT_INITIAL_MATRIX) {
5632     start = A->cmap->rstart;
5633     cmap  = a->garray;
5634     nzA   = a->A->cmap->n;
5635     nzB   = a->B->cmap->n;
5636     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5637     ncols = 0;
5638     for (i=0; i<nzB; i++) {  /* row < local row index */
5639       if (cmap[i] < start) idx[ncols++] = cmap[i];
5640       else break;
5641     }
5642     imark = i;
5643     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5644     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5645     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb));
5646     PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb));
5647   } else {
5648     PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5649     isrowb  = *rowb; iscolb = *colb;
5650     PetscCall(PetscMalloc1(1,&bseq));
5651     bseq[0] = *B_seq;
5652   }
5653   PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq));
5654   *B_seq = bseq[0];
5655   PetscCall(PetscFree(bseq));
5656   if (!rowb) {
5657     PetscCall(ISDestroy(&isrowb));
5658   } else {
5659     *rowb = isrowb;
5660   }
5661   if (!colb) {
5662     PetscCall(ISDestroy(&iscolb));
5663   } else {
5664     *colb = iscolb;
5665   }
5666   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0));
5667   PetscFunctionReturn(0);
5668 }
5669 
5670 /*
5671     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5672     of the OFF-DIAGONAL portion of local A
5673 
5674     Collective on Mat
5675 
5676    Input Parameters:
5677 +    A,B - the matrices in mpiaij format
5678 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5679 
5680    Output Parameter:
5681 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5682 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5683 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5684 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5685 
5686     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5687      for this matrix. This is not desirable..
5688 
5689     Level: developer
5690 
5691 */
5692 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5693 {
5694   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5695   Mat_SeqAIJ             *b_oth;
5696   VecScatter             ctx;
5697   MPI_Comm               comm;
5698   const PetscMPIInt      *rprocs,*sprocs;
5699   const PetscInt         *srow,*rstarts,*sstarts;
5700   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5701   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5702   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5703   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5704   PetscMPIInt            size,tag,rank,nreqs;
5705 
5706   PetscFunctionBegin;
5707   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5708   PetscCallMPI(MPI_Comm_size(comm,&size));
5709 
5710   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5711     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5712   }
5713   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0));
5714   PetscCallMPI(MPI_Comm_rank(comm,&rank));
5715 
5716   if (size == 1) {
5717     startsj_s = NULL;
5718     bufa_ptr  = NULL;
5719     *B_oth    = NULL;
5720     PetscFunctionReturn(0);
5721   }
5722 
5723   ctx = a->Mvctx;
5724   tag = ((PetscObject)ctx)->tag;
5725 
5726   PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs));
5727   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5728   PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs));
5729   PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs));
5730   PetscCall(PetscMalloc1(nreqs,&reqs));
5731   rwaits = reqs;
5732   swaits = reqs + nrecvs;
5733 
5734   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5735   if (scall == MAT_INITIAL_MATRIX) {
5736     /* i-array */
5737     /*---------*/
5738     /*  post receives */
5739     if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */
5740     for (i=0; i<nrecvs; i++) {
5741       rowlen = rvalues + rstarts[i]*rbs;
5742       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5743       PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5744     }
5745 
5746     /* pack the outgoing message */
5747     PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj));
5748 
5749     sstartsj[0] = 0;
5750     rstartsj[0] = 0;
5751     len         = 0; /* total length of j or a array to be sent */
5752     if (nsends) {
5753       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5754       PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues));
5755     }
5756     for (i=0; i<nsends; i++) {
5757       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5758       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5759       for (j=0; j<nrows; j++) {
5760         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5761         for (l=0; l<sbs; l++) {
5762           PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */
5763 
5764           rowlen[j*sbs+l] = ncols;
5765 
5766           len += ncols;
5767           PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL));
5768         }
5769         k++;
5770       }
5771       PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i));
5772 
5773       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5774     }
5775     /* recvs and sends of i-array are completed */
5776     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5777     PetscCall(PetscFree(svalues));
5778 
5779     /* allocate buffers for sending j and a arrays */
5780     PetscCall(PetscMalloc1(len+1,&bufj));
5781     PetscCall(PetscMalloc1(len+1,&bufa));
5782 
5783     /* create i-array of B_oth */
5784     PetscCall(PetscMalloc1(aBn+2,&b_othi));
5785 
5786     b_othi[0] = 0;
5787     len       = 0; /* total length of j or a array to be received */
5788     k         = 0;
5789     for (i=0; i<nrecvs; i++) {
5790       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5791       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5792       for (j=0; j<nrows; j++) {
5793         b_othi[k+1] = b_othi[k] + rowlen[j];
5794         PetscCall(PetscIntSumError(rowlen[j],len,&len));
5795         k++;
5796       }
5797       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5798     }
5799     PetscCall(PetscFree(rvalues));
5800 
5801     /* allocate space for j and a arrays of B_oth */
5802     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj));
5803     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha));
5804 
5805     /* j-array */
5806     /*---------*/
5807     /*  post receives of j-array */
5808     for (i=0; i<nrecvs; i++) {
5809       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5810       PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5811     }
5812 
5813     /* pack the outgoing message j-array */
5814     if (nsends) k = sstarts[0];
5815     for (i=0; i<nsends; i++) {
5816       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5817       bufJ  = bufj+sstartsj[i];
5818       for (j=0; j<nrows; j++) {
5819         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5820         for (ll=0; ll<sbs; ll++) {
5821           PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5822           for (l=0; l<ncols; l++) {
5823             *bufJ++ = cols[l];
5824           }
5825           PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5826         }
5827       }
5828       PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i));
5829     }
5830 
5831     /* recvs and sends of j-array are completed */
5832     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5833   } else if (scall == MAT_REUSE_MATRIX) {
5834     sstartsj = *startsj_s;
5835     rstartsj = *startsj_r;
5836     bufa     = *bufa_ptr;
5837     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5838     PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha));
5839   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5840 
5841   /* a-array */
5842   /*---------*/
5843   /*  post receives of a-array */
5844   for (i=0; i<nrecvs; i++) {
5845     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5846     PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i));
5847   }
5848 
5849   /* pack the outgoing message a-array */
5850   if (nsends) k = sstarts[0];
5851   for (i=0; i<nsends; i++) {
5852     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5853     bufA  = bufa+sstartsj[i];
5854     for (j=0; j<nrows; j++) {
5855       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5856       for (ll=0; ll<sbs; ll++) {
5857         PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5858         for (l=0; l<ncols; l++) {
5859           *bufA++ = vals[l];
5860         }
5861         PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5862       }
5863     }
5864     PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i));
5865   }
5866   /* recvs and sends of a-array are completed */
5867   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5868   PetscCall(PetscFree(reqs));
5869 
5870   if (scall == MAT_INITIAL_MATRIX) {
5871     /* put together the new matrix */
5872     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth));
5873 
5874     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5875     /* Since these are PETSc arrays, change flags to free them as necessary. */
5876     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5877     b_oth->free_a  = PETSC_TRUE;
5878     b_oth->free_ij = PETSC_TRUE;
5879     b_oth->nonew   = 0;
5880 
5881     PetscCall(PetscFree(bufj));
5882     if (!startsj_s || !bufa_ptr) {
5883       PetscCall(PetscFree2(sstartsj,rstartsj));
5884       PetscCall(PetscFree(bufa_ptr));
5885     } else {
5886       *startsj_s = sstartsj;
5887       *startsj_r = rstartsj;
5888       *bufa_ptr  = bufa;
5889     }
5890   } else if (scall == MAT_REUSE_MATRIX) {
5891     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha));
5892   }
5893 
5894   PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs));
5895   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs));
5896   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0));
5897   PetscFunctionReturn(0);
5898 }
5899 
5900 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5901 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5902 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5903 #if defined(PETSC_HAVE_MKL_SPARSE)
5904 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5905 #endif
5906 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5907 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5908 #if defined(PETSC_HAVE_ELEMENTAL)
5909 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5910 #endif
5911 #if defined(PETSC_HAVE_SCALAPACK)
5912 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5913 #endif
5914 #if defined(PETSC_HAVE_HYPRE)
5915 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5916 #endif
5917 #if defined(PETSC_HAVE_CUDA)
5918 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5919 #endif
5920 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5921 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5922 #endif
5923 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5924 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5925 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5926 
5927 /*
5928     Computes (B'*A')' since computing B*A directly is untenable
5929 
5930                n                       p                          p
5931         [             ]       [             ]         [                 ]
5932       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5933         [             ]       [             ]         [                 ]
5934 
5935 */
5936 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5937 {
5938   Mat            At,Bt,Ct;
5939 
5940   PetscFunctionBegin;
5941   PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At));
5942   PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt));
5943   PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct));
5944   PetscCall(MatDestroy(&At));
5945   PetscCall(MatDestroy(&Bt));
5946   PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C));
5947   PetscCall(MatDestroy(&Ct));
5948   PetscFunctionReturn(0);
5949 }
5950 
5951 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5952 {
5953   PetscBool      cisdense;
5954 
5955   PetscFunctionBegin;
5956   PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n);
5957   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N));
5958   PetscCall(MatSetBlockSizesFromMats(C,A,B));
5959   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
5960   if (!cisdense) {
5961     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
5962   }
5963   PetscCall(MatSetUp(C));
5964 
5965   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5966   PetscFunctionReturn(0);
5967 }
5968 
5969 /* ----------------------------------------------------------------*/
5970 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5971 {
5972   Mat_Product *product = C->product;
5973   Mat         A = product->A,B=product->B;
5974 
5975   PetscFunctionBegin;
5976   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5977     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5978 
5979   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5980   C->ops->productsymbolic = MatProductSymbolic_AB;
5981   PetscFunctionReturn(0);
5982 }
5983 
5984 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
5985 {
5986   Mat_Product    *product = C->product;
5987 
5988   PetscFunctionBegin;
5989   if (product->type == MATPRODUCT_AB) {
5990     PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
5991   }
5992   PetscFunctionReturn(0);
5993 }
5994 
5995 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
5996 
5997   Input Parameters:
5998 
5999     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
6000     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
6001 
6002     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6003 
6004     For Set1, j1[] contains column indices of the nonzeros.
6005     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6006     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6007     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6008 
6009     Similar for Set2.
6010 
6011     This routine merges the two sets of nonzeros row by row and removes repeats.
6012 
6013   Output Parameters: (memory is allocated by the caller)
6014 
6015     i[],j[]: the CSR of the merged matrix, which has m rows.
6016     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6017     imap2[]: similar to imap1[], but for Set2.
6018     Note we order nonzeros row-by-row and from left to right.
6019 */
6020 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[],
6021   const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[],
6022   PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[])
6023 {
6024   PetscInt       r,m; /* Row index of mat */
6025   PetscCount     t,t1,t2,b1,e1,b2,e2;
6026 
6027   PetscFunctionBegin;
6028   PetscCall(MatGetLocalSize(mat,&m,NULL));
6029   t1   = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6030   i[0] = 0;
6031   for (r=0; r<m; r++) { /* Do row by row merging */
6032     b1   = rowBegin1[r];
6033     e1   = rowEnd1[r];
6034     b2   = rowBegin2[r];
6035     e2   = rowEnd2[r];
6036     while (b1 < e1 && b2 < e2) {
6037       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6038         j[t]      = j1[b1];
6039         imap1[t1] = t;
6040         imap2[t2] = t;
6041         b1       += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */
6042         b2       += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6043         t1++; t2++; t++;
6044       } else if (j1[b1] < j2[b2]) {
6045         j[t]      = j1[b1];
6046         imap1[t1] = t;
6047         b1       += jmap1[t1+1] - jmap1[t1];
6048         t1++; t++;
6049       } else {
6050         j[t]      = j2[b2];
6051         imap2[t2] = t;
6052         b2       += jmap2[t2+1] - jmap2[t2];
6053         t2++; t++;
6054       }
6055     }
6056     /* Merge the remaining in either j1[] or j2[] */
6057     while (b1 < e1) {
6058       j[t]      = j1[b1];
6059       imap1[t1] = t;
6060       b1       += jmap1[t1+1] - jmap1[t1];
6061       t1++; t++;
6062     }
6063     while (b2 < e2) {
6064       j[t]      = j2[b2];
6065       imap2[t2] = t;
6066       b2       += jmap2[t2+1] - jmap2[t2];
6067       t2++; t++;
6068     }
6069     i[r+1] = t;
6070   }
6071   PetscFunctionReturn(0);
6072 }
6073 
6074 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6075 
6076   Input Parameters:
6077     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6078     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6079       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6080 
6081       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6082       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6083 
6084   Output Parameters:
6085     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6086     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6087       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6088       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6089 
6090     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6091       Atot: number of entries belonging to the diagonal block.
6092       Annz: number of unique nonzeros belonging to the diagonal block.
6093       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6094         repeats (i.e., same 'i,j' pair).
6095       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6096         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6097 
6098       Atot: number of entries belonging to the diagonal block
6099       Annz: number of unique nonzeros belonging to the diagonal block.
6100 
6101     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6102 
6103     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6104 */
6105 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[],
6106   PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[],
6107   PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_,
6108   PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_)
6109 {
6110   PetscInt          cstart,cend,rstart,rend,row,col;
6111   PetscCount        Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6112   PetscCount        Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6113   PetscCount        k,m,p,q,r,s,mid;
6114   PetscCount        *Aperm,*Bperm,*Ajmap,*Bjmap;
6115 
6116   PetscFunctionBegin;
6117   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6118   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6119   m    = rend - rstart;
6120 
6121   for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */
6122 
6123   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6124      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6125   */
6126   while (k<n) {
6127     row = i[k];
6128     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6129     for (s=k; s<n; s++) if (i[s] != row) break;
6130     for (p=k; p<s; p++) {
6131       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6132       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]);
6133     }
6134     PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k));
6135     PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6136     rowBegin[row-rstart] = k;
6137     rowMid[row-rstart]   = mid;
6138     rowEnd[row-rstart]   = s;
6139 
6140     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6141     Atot += mid - k;
6142     Btot += s - mid;
6143 
6144     /* Count unique nonzeros of this diag/offdiag row */
6145     for (p=k; p<mid;) {
6146       col = j[p];
6147       do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */
6148       Annz++;
6149     }
6150 
6151     for (p=mid; p<s;) {
6152       col = j[p];
6153       do {p++;} while (p<s && j[p] == col);
6154       Bnnz++;
6155     }
6156     k = s;
6157   }
6158 
6159   /* Allocation according to Atot, Btot, Annz, Bnnz */
6160   PetscCall(PetscMalloc1(Atot,&Aperm));
6161   PetscCall(PetscMalloc1(Btot,&Bperm));
6162   PetscCall(PetscMalloc1(Annz+1,&Ajmap));
6163   PetscCall(PetscMalloc1(Bnnz+1,&Bjmap));
6164 
6165   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6166   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6167   for (r=0; r<m; r++) {
6168     k     = rowBegin[r];
6169     mid   = rowMid[r];
6170     s     = rowEnd[r];
6171     PetscCall(PetscArraycpy(Aperm+Atot,perm+k,  mid-k));
6172     PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid));
6173     Atot += mid - k;
6174     Btot += s - mid;
6175 
6176     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6177     for (p=k; p<mid;) {
6178       col = j[p];
6179       q   = p;
6180       do {p++;} while (p<mid && j[p] == col);
6181       Ajmap[Annz+1] = Ajmap[Annz] + (p - q);
6182       Annz++;
6183     }
6184 
6185     for (p=mid; p<s;) {
6186       col = j[p];
6187       q   = p;
6188       do {p++;} while (p<s && j[p] == col);
6189       Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q);
6190       Bnnz++;
6191     }
6192   }
6193   /* Output */
6194   *Aperm_ = Aperm;
6195   *Annz_  = Annz;
6196   *Atot_  = Atot;
6197   *Ajmap_ = Ajmap;
6198   *Bperm_ = Bperm;
6199   *Bnnz_  = Bnnz;
6200   *Btot_  = Btot;
6201   *Bjmap_ = Bjmap;
6202   PetscFunctionReturn(0);
6203 }
6204 
6205 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6206 
6207   Input Parameters:
6208     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6209     nnz:  number of unique nonzeros in the merged matrix
6210     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6211     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6212 
6213   Output Parameter: (memory is allocated by the caller)
6214     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6215 
6216   Example:
6217     nnz1 = 4
6218     nnz  = 6
6219     imap = [1,3,4,5]
6220     jmap = [0,3,5,6,7]
6221    then,
6222     jmap_new = [0,0,3,3,5,6,7]
6223 */
6224 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[])
6225 {
6226   PetscCount k,p;
6227 
6228   PetscFunctionBegin;
6229   jmap_new[0] = 0;
6230   p = nnz; /* p loops over jmap_new[] backwards */
6231   for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */
6232     for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1];
6233   }
6234   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6235   PetscFunctionReturn(0);
6236 }
6237 
6238 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[])
6239 {
6240   MPI_Comm                  comm;
6241   PetscMPIInt               rank,size;
6242   PetscInt                  m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6243   PetscCount                k,p,q,rem; /* Loop variables over coo arrays */
6244   Mat_MPIAIJ                *mpiaij = (Mat_MPIAIJ*)mat->data;
6245 
6246   PetscFunctionBegin;
6247   PetscCall(PetscFree(mpiaij->garray));
6248   PetscCall(VecDestroy(&mpiaij->lvec));
6249 #if defined(PETSC_USE_CTABLE)
6250   PetscCall(PetscTableDestroy(&mpiaij->colmap));
6251 #else
6252   PetscCall(PetscFree(mpiaij->colmap));
6253 #endif
6254   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6255   mat->assembled = PETSC_FALSE;
6256   mat->was_assembled = PETSC_FALSE;
6257   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6258 
6259   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
6260   PetscCallMPI(MPI_Comm_size(comm,&size));
6261   PetscCallMPI(MPI_Comm_rank(comm,&rank));
6262   PetscCall(PetscLayoutSetUp(mat->rmap));
6263   PetscCall(PetscLayoutSetUp(mat->cmap));
6264   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6265   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6266   PetscCall(MatGetLocalSize(mat,&m,&n));
6267   PetscCall(MatGetSize(mat,&M,&N));
6268 
6269   /* ---------------------------------------------------------------------------*/
6270   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6271   /* entries come first, then local rows, then remote rows.                     */
6272   /* ---------------------------------------------------------------------------*/
6273   PetscCount n1 = coo_n,*perm1;
6274   PetscInt   *i1,*j1; /* Copies of input COOs along with a permutation array */
6275   PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1));
6276   PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */
6277   PetscCall(PetscArraycpy(j1,coo_j,n1));
6278   for (k=0; k<n1; k++) perm1[k] = k;
6279 
6280   /* Manipulate indices so that entries with negative row or col indices will have smallest
6281      row indices, local entries will have greater but negative row indices, and remote entries
6282      will have positive row indices.
6283   */
6284   for (k=0; k<n1; k++) {
6285     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */
6286     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6287     else {
6288       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6289       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6290     }
6291   }
6292 
6293   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6294   PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1));
6295   for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */
6296   PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */
6297   for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/
6298 
6299   /* ---------------------------------------------------------------------------*/
6300   /*           Split local rows into diag/offdiag portions                      */
6301   /* ---------------------------------------------------------------------------*/
6302   PetscCount   *rowBegin1,*rowMid1,*rowEnd1;
6303   PetscCount   *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1;
6304   PetscCount   Annz1,Bnnz1,Atot1,Btot1;
6305 
6306   PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1));
6307   PetscCall(PetscMalloc1(n1-rem,&Cperm1));
6308   PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1));
6309 
6310   /* ---------------------------------------------------------------------------*/
6311   /*           Send remote rows to their owner                                  */
6312   /* ---------------------------------------------------------------------------*/
6313   /* Find which rows should be sent to which remote ranks*/
6314   PetscInt       nsend = 0; /* Number of MPI ranks to send data to */
6315   PetscMPIInt    *sendto; /* [nsend], storing remote ranks */
6316   PetscInt       *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6317   const PetscInt *ranges;
6318   PetscInt       maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6319 
6320   PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges));
6321   PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries));
6322   for (k=rem; k<n1;) {
6323     PetscMPIInt  owner;
6324     PetscInt     firstRow,lastRow;
6325 
6326     /* Locate a row range */
6327     firstRow = i1[k]; /* first row of this owner */
6328     PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner));
6329     lastRow  = ranges[owner+1]-1; /* last row of this owner */
6330 
6331     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6332     PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p));
6333 
6334     /* All entries in [k,p) belong to this remote owner */
6335     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6336       PetscMPIInt *sendto2;
6337       PetscInt    *nentries2;
6338       PetscInt    maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size;
6339 
6340       PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2));
6341       PetscCall(PetscArraycpy(sendto2,sendto,maxNsend));
6342       PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1));
6343       PetscCall(PetscFree2(sendto,nentries2));
6344       sendto      = sendto2;
6345       nentries    = nentries2;
6346       maxNsend    = maxNsend2;
6347     }
6348     sendto[nsend]   = owner;
6349     nentries[nsend] = p - k;
6350     PetscCall(PetscCountCast(p-k,&nentries[nsend]));
6351     nsend++;
6352     k = p;
6353   }
6354 
6355   /* Build 1st SF to know offsets on remote to send data */
6356   PetscSF     sf1;
6357   PetscInt    nroots = 1,nroots2 = 0;
6358   PetscInt    nleaves = nsend,nleaves2 = 0;
6359   PetscInt    *offsets;
6360   PetscSFNode *iremote;
6361 
6362   PetscCall(PetscSFCreate(comm,&sf1));
6363   PetscCall(PetscMalloc1(nsend,&iremote));
6364   PetscCall(PetscMalloc1(nsend,&offsets));
6365   for (k=0; k<nsend; k++) {
6366     iremote[k].rank  = sendto[k];
6367     iremote[k].index = 0;
6368     nleaves2        += nentries[k];
6369     PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt");
6370   }
6371   PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6372   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM));
6373   PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6374   PetscCall(PetscSFDestroy(&sf1));
6375   PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem);
6376 
6377   /* Build 2nd SF to send remote COOs to their owner */
6378   PetscSF sf2;
6379   nroots  = nroots2;
6380   nleaves = nleaves2;
6381   PetscCall(PetscSFCreate(comm,&sf2));
6382   PetscCall(PetscSFSetFromOptions(sf2));
6383   PetscCall(PetscMalloc1(nleaves,&iremote));
6384   p       = 0;
6385   for (k=0; k<nsend; k++) {
6386     PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt");
6387     for (q=0; q<nentries[k]; q++,p++) {
6388       iremote[p].rank  = sendto[k];
6389       iremote[p].index = offsets[k] + q;
6390     }
6391   }
6392   PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6393 
6394   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6395   PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem));
6396 
6397   /* Send the remote COOs to their owner */
6398   PetscInt   n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6399   PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6400   PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2));
6401   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE));
6402   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE));
6403   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE));
6404   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE));
6405 
6406   PetscCall(PetscFree(offsets));
6407   PetscCall(PetscFree2(sendto,nentries));
6408 
6409   /* ---------------------------------------------------------------*/
6410   /* Sort received COOs by row along with the permutation array     */
6411   /* ---------------------------------------------------------------*/
6412   for (k=0; k<n2; k++) perm2[k] = k;
6413   PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2));
6414 
6415   /* ---------------------------------------------------------------*/
6416   /* Split received COOs into diag/offdiag portions                 */
6417   /* ---------------------------------------------------------------*/
6418   PetscCount  *rowBegin2,*rowMid2,*rowEnd2;
6419   PetscCount  *Ajmap2,*Aperm2,*Bjmap2,*Bperm2;
6420   PetscCount  Annz2,Bnnz2,Atot2,Btot2;
6421 
6422   PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2));
6423   PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2));
6424 
6425   /* --------------------------------------------------------------------------*/
6426   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6427   /* --------------------------------------------------------------------------*/
6428   PetscInt   *Ai,*Bi;
6429   PetscInt   *Aj,*Bj;
6430 
6431   PetscCall(PetscMalloc1(m+1,&Ai));
6432   PetscCall(PetscMalloc1(m+1,&Bi));
6433   PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6434   PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj));
6435 
6436   PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2;
6437   PetscCall(PetscMalloc1(Annz1,&Aimap1));
6438   PetscCall(PetscMalloc1(Bnnz1,&Bimap1));
6439   PetscCall(PetscMalloc1(Annz2,&Aimap2));
6440   PetscCall(PetscMalloc1(Bnnz2,&Bimap2));
6441 
6442   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj));
6443   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1,  rowEnd1,rowMid2,  rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj));
6444 
6445   /* --------------------------------------------------------------------------*/
6446   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6447   /* expect nonzeros in A/B most likely have local contributing entries        */
6448   /* --------------------------------------------------------------------------*/
6449   PetscInt Annz = Ai[m];
6450   PetscInt Bnnz = Bi[m];
6451   PetscCount *Ajmap1_new,*Bjmap1_new;
6452 
6453   PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new));
6454   PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new));
6455 
6456   PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new));
6457   PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new));
6458 
6459   PetscCall(PetscFree(Aimap1));
6460   PetscCall(PetscFree(Ajmap1));
6461   PetscCall(PetscFree(Bimap1));
6462   PetscCall(PetscFree(Bjmap1));
6463   PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1));
6464   PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2));
6465   PetscCall(PetscFree3(i1,j1,perm1));
6466   PetscCall(PetscFree3(i2,j2,perm2));
6467 
6468   Ajmap1 = Ajmap1_new;
6469   Bjmap1 = Bjmap1_new;
6470 
6471   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6472   if (Annz < Annz1 + Annz2) {
6473     PetscInt *Aj_new;
6474     PetscCall(PetscMalloc1(Annz,&Aj_new));
6475     PetscCall(PetscArraycpy(Aj_new,Aj,Annz));
6476     PetscCall(PetscFree(Aj));
6477     Aj   = Aj_new;
6478   }
6479 
6480   if (Bnnz < Bnnz1 + Bnnz2) {
6481     PetscInt *Bj_new;
6482     PetscCall(PetscMalloc1(Bnnz,&Bj_new));
6483     PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz));
6484     PetscCall(PetscFree(Bj));
6485     Bj   = Bj_new;
6486   }
6487 
6488   /* --------------------------------------------------------------------------------*/
6489   /* Create new submatrices for on-process and off-process coupling                  */
6490   /* --------------------------------------------------------------------------------*/
6491   PetscScalar   *Aa,*Ba;
6492   MatType       rtype;
6493   Mat_SeqAIJ    *a,*b;
6494   PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */
6495   PetscCall(PetscCalloc1(Bnnz,&Ba));
6496   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6497   if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;}
6498   PetscCall(MatDestroy(&mpiaij->A));
6499   PetscCall(MatDestroy(&mpiaij->B));
6500   PetscCall(MatGetRootType_Private(mat,&rtype));
6501   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A));
6502   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B));
6503   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6504 
6505   a = (Mat_SeqAIJ*)mpiaij->A->data;
6506   b = (Mat_SeqAIJ*)mpiaij->B->data;
6507   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6508   a->free_a       = b->free_a       = PETSC_TRUE;
6509   a->free_ij      = b->free_ij      = PETSC_TRUE;
6510 
6511   /* conversion must happen AFTER multiply setup */
6512   PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A));
6513   PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B));
6514   PetscCall(VecDestroy(&mpiaij->lvec));
6515   PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL));
6516   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec));
6517 
6518   mpiaij->coo_n   = coo_n;
6519   mpiaij->coo_sf  = sf2;
6520   mpiaij->sendlen = nleaves;
6521   mpiaij->recvlen = nroots;
6522 
6523   mpiaij->Annz    = Annz;
6524   mpiaij->Bnnz    = Bnnz;
6525 
6526   mpiaij->Annz2   = Annz2;
6527   mpiaij->Bnnz2   = Bnnz2;
6528 
6529   mpiaij->Atot1   = Atot1;
6530   mpiaij->Atot2   = Atot2;
6531   mpiaij->Btot1   = Btot1;
6532   mpiaij->Btot2   = Btot2;
6533 
6534   mpiaij->Ajmap1  = Ajmap1;
6535   mpiaij->Aperm1  = Aperm1;
6536 
6537   mpiaij->Bjmap1  = Bjmap1;
6538   mpiaij->Bperm1  = Bperm1;
6539 
6540   mpiaij->Aimap2  = Aimap2;
6541   mpiaij->Ajmap2  = Ajmap2;
6542   mpiaij->Aperm2  = Aperm2;
6543 
6544   mpiaij->Bimap2  = Bimap2;
6545   mpiaij->Bjmap2  = Bjmap2;
6546   mpiaij->Bperm2  = Bperm2;
6547 
6548   mpiaij->Cperm1  = Cperm1;
6549 
6550   /* Allocate in preallocation. If not used, it has zero cost on host */
6551   PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf));
6552   PetscFunctionReturn(0);
6553 }
6554 
6555 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode)
6556 {
6557   Mat_MPIAIJ           *mpiaij = (Mat_MPIAIJ*)mat->data;
6558   Mat                  A = mpiaij->A,B = mpiaij->B;
6559   PetscCount           Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2;
6560   PetscScalar          *Aa,*Ba;
6561   PetscScalar          *sendbuf = mpiaij->sendbuf;
6562   PetscScalar          *recvbuf = mpiaij->recvbuf;
6563   const PetscCount     *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2;
6564   const PetscCount     *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2;
6565   const PetscCount     *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2;
6566   const PetscCount     *Cperm1 = mpiaij->Cperm1;
6567 
6568   PetscFunctionBegin;
6569   PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */
6570   PetscCall(MatSeqAIJGetArray(B,&Ba));
6571 
6572   /* Pack entries to be sent to remote */
6573   for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6574 
6575   /* Send remote entries to their owner and overlap the communication with local computation */
6576   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE));
6577   /* Add local entries to A and B */
6578   for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6579     PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */
6580     for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]];
6581     Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum;
6582   }
6583   for (PetscCount i=0; i<Bnnz; i++) {
6584     PetscScalar sum = 0.0;
6585     for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]];
6586     Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum;
6587   }
6588   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE));
6589 
6590   /* Add received remote entries to A and B */
6591   for (PetscCount i=0; i<Annz2; i++) {
6592     for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6593   }
6594   for (PetscCount i=0; i<Bnnz2; i++) {
6595     for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6596   }
6597   PetscCall(MatSeqAIJRestoreArray(A,&Aa));
6598   PetscCall(MatSeqAIJRestoreArray(B,&Ba));
6599   PetscFunctionReturn(0);
6600 }
6601 
6602 /* ----------------------------------------------------------------*/
6603 
6604 /*MC
6605    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6606 
6607    Options Database Keys:
6608 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6609 
6610    Level: beginner
6611 
6612    Notes:
6613     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6614     in this case the values associated with the rows and columns one passes in are set to zero
6615     in the matrix
6616 
6617     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6618     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6619 
6620 .seealso: `MatCreateAIJ()`
6621 M*/
6622 
6623 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6624 {
6625   Mat_MPIAIJ     *b;
6626   PetscMPIInt    size;
6627 
6628   PetscFunctionBegin;
6629   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
6630 
6631   PetscCall(PetscNewLog(B,&b));
6632   B->data       = (void*)b;
6633   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
6634   B->assembled  = PETSC_FALSE;
6635   B->insertmode = NOT_SET_VALUES;
6636   b->size       = size;
6637 
6638   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
6639 
6640   /* build cache for off array entries formed */
6641   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
6642 
6643   b->donotstash  = PETSC_FALSE;
6644   b->colmap      = NULL;
6645   b->garray      = NULL;
6646   b->roworiented = PETSC_TRUE;
6647 
6648   /* stuff used for matrix vector multiply */
6649   b->lvec  = NULL;
6650   b->Mvctx = NULL;
6651 
6652   /* stuff for MatGetRow() */
6653   b->rowindices   = NULL;
6654   b->rowvalues    = NULL;
6655   b->getrowactive = PETSC_FALSE;
6656 
6657   /* flexible pointer used in CUSPARSE classes */
6658   b->spptr = NULL;
6659 
6660   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6661   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ));
6662   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ));
6663   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ));
6664   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ));
6665   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ));
6666   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ));
6667   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ));
6668   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM));
6669   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL));
6670 #if defined(PETSC_HAVE_CUDA)
6671   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6672 #endif
6673 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6674   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos));
6675 #endif
6676 #if defined(PETSC_HAVE_MKL_SPARSE)
6677   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL));
6678 #endif
6679   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL));
6680   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ));
6681   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ));
6682   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
6683 #if defined(PETSC_HAVE_ELEMENTAL)
6684   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental));
6685 #endif
6686 #if defined(PETSC_HAVE_SCALAPACK)
6687   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK));
6688 #endif
6689   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS));
6690   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL));
6691 #if defined(PETSC_HAVE_HYPRE)
6692   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE));
6693   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ));
6694 #endif
6695   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ));
6696   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ));
6697   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ));
6698   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ));
6699   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ));
6700   PetscFunctionReturn(0);
6701 }
6702 
6703 /*@C
6704      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6705          and "off-diagonal" part of the matrix in CSR format.
6706 
6707    Collective
6708 
6709    Input Parameters:
6710 +  comm - MPI communicator
6711 .  m - number of local rows (Cannot be PETSC_DECIDE)
6712 .  n - This value should be the same as the local size used in creating the
6713        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6714        calculated if N is given) For square matrices n is almost always m.
6715 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6716 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6717 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6718 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6719 .   a - matrix values
6720 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6721 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6722 -   oa - matrix values
6723 
6724    Output Parameter:
6725 .   mat - the matrix
6726 
6727    Level: advanced
6728 
6729    Notes:
6730        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6731        must free the arrays once the matrix has been destroyed and not before.
6732 
6733        The i and j indices are 0 based
6734 
6735        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6736 
6737        This sets local rows and cannot be used to set off-processor values.
6738 
6739        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6740        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6741        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6742        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6743        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6744        communication if it is known that only local entries will be set.
6745 
6746 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6747           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6748 @*/
6749 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6750 {
6751   Mat_MPIAIJ     *maij;
6752 
6753   PetscFunctionBegin;
6754   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6755   PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6756   PetscCheck(oi[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6757   PetscCall(MatCreate(comm,mat));
6758   PetscCall(MatSetSizes(*mat,m,n,M,N));
6759   PetscCall(MatSetType(*mat,MATMPIAIJ));
6760   maij = (Mat_MPIAIJ*) (*mat)->data;
6761 
6762   (*mat)->preallocated = PETSC_TRUE;
6763 
6764   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6765   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6766 
6767   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A));
6768   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B));
6769 
6770   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
6771   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
6772   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
6773   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
6774   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
6775   PetscFunctionReturn(0);
6776 }
6777 
6778 typedef struct {
6779   Mat       *mp;    /* intermediate products */
6780   PetscBool *mptmp; /* is the intermediate product temporary ? */
6781   PetscInt  cp;     /* number of intermediate products */
6782 
6783   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6784   PetscInt    *startsj_s,*startsj_r;
6785   PetscScalar *bufa;
6786   Mat         P_oth;
6787 
6788   /* may take advantage of merging product->B */
6789   Mat Bloc; /* B-local by merging diag and off-diag */
6790 
6791   /* cusparse does not have support to split between symbolic and numeric phases.
6792      When api_user is true, we don't need to update the numerical values
6793      of the temporary storage */
6794   PetscBool reusesym;
6795 
6796   /* support for COO values insertion */
6797   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6798   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6799   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6800   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6801   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6802   PetscMemType mtype;
6803 
6804   /* customization */
6805   PetscBool abmerge;
6806   PetscBool P_oth_bind;
6807 } MatMatMPIAIJBACKEND;
6808 
6809 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6810 {
6811   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6812   PetscInt            i;
6813 
6814   PetscFunctionBegin;
6815   PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r));
6816   PetscCall(PetscFree(mmdata->bufa));
6817   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v));
6818   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w));
6819   PetscCall(MatDestroy(&mmdata->P_oth));
6820   PetscCall(MatDestroy(&mmdata->Bloc));
6821   PetscCall(PetscSFDestroy(&mmdata->sf));
6822   for (i = 0; i < mmdata->cp; i++) {
6823     PetscCall(MatDestroy(&mmdata->mp[i]));
6824   }
6825   PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp));
6826   PetscCall(PetscFree(mmdata->own[0]));
6827   PetscCall(PetscFree(mmdata->own));
6828   PetscCall(PetscFree(mmdata->off[0]));
6829   PetscCall(PetscFree(mmdata->off));
6830   PetscCall(PetscFree(mmdata));
6831   PetscFunctionReturn(0);
6832 }
6833 
6834 /* Copy selected n entries with indices in idx[] of A to v[].
6835    If idx is NULL, copy the whole data array of A to v[]
6836  */
6837 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6838 {
6839   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6840 
6841   PetscFunctionBegin;
6842   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f));
6843   if (f) {
6844     PetscCall((*f)(A,n,idx,v));
6845   } else {
6846     const PetscScalar *vv;
6847 
6848     PetscCall(MatSeqAIJGetArrayRead(A,&vv));
6849     if (n && idx) {
6850       PetscScalar    *w = v;
6851       const PetscInt *oi = idx;
6852       PetscInt       j;
6853 
6854       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6855     } else {
6856       PetscCall(PetscArraycpy(v,vv,n));
6857     }
6858     PetscCall(MatSeqAIJRestoreArrayRead(A,&vv));
6859   }
6860   PetscFunctionReturn(0);
6861 }
6862 
6863 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6864 {
6865   MatMatMPIAIJBACKEND *mmdata;
6866   PetscInt            i,n_d,n_o;
6867 
6868   PetscFunctionBegin;
6869   MatCheckProduct(C,1);
6870   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6871   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6872   if (!mmdata->reusesym) { /* update temporary matrices */
6873     if (mmdata->P_oth) {
6874       PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
6875     }
6876     if (mmdata->Bloc) {
6877       PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc));
6878     }
6879   }
6880   mmdata->reusesym = PETSC_FALSE;
6881 
6882   for (i = 0; i < mmdata->cp; i++) {
6883     PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6884     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
6885   }
6886   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6887     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6888 
6889     if (mmdata->mptmp[i]) continue;
6890     if (noff) {
6891       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6892 
6893       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o));
6894       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d));
6895       n_o += noff;
6896       n_d += nown;
6897     } else {
6898       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6899 
6900       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d));
6901       n_d += mm->nz;
6902     }
6903   }
6904   if (mmdata->hasoffproc) { /* offprocess insertion */
6905     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6906     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6907   }
6908   PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES));
6909   PetscFunctionReturn(0);
6910 }
6911 
6912 /* Support for Pt * A, A * P, or Pt * A * P */
6913 #define MAX_NUMBER_INTERMEDIATE 4
6914 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6915 {
6916   Mat_Product            *product = C->product;
6917   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6918   Mat_MPIAIJ             *a,*p;
6919   MatMatMPIAIJBACKEND    *mmdata;
6920   ISLocalToGlobalMapping P_oth_l2g = NULL;
6921   IS                     glob = NULL;
6922   const char             *prefix;
6923   char                   pprefix[256];
6924   const PetscInt         *globidx,*P_oth_idx;
6925   PetscInt               i,j,cp,m,n,M,N,*coo_i,*coo_j;
6926   PetscCount             ncoo,ncoo_d,ncoo_o,ncoo_oown;
6927   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6928                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
6929                                                                                         /* a base offset; type-2: sparse with a local to global map table */
6930   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6931 
6932   MatProductType         ptype;
6933   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6934   PetscMPIInt            size;
6935 
6936   PetscFunctionBegin;
6937   MatCheckProduct(C,1);
6938   PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6939   ptype = product->type;
6940   if (product->A->symmetric && ptype == MATPRODUCT_AtB) {
6941     ptype = MATPRODUCT_AB;
6942     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6943   }
6944   switch (ptype) {
6945   case MATPRODUCT_AB:
6946     A = product->A;
6947     P = product->B;
6948     m = A->rmap->n;
6949     n = P->cmap->n;
6950     M = A->rmap->N;
6951     N = P->cmap->N;
6952     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6953     break;
6954   case MATPRODUCT_AtB:
6955     P = product->A;
6956     A = product->B;
6957     m = P->cmap->n;
6958     n = A->cmap->n;
6959     M = P->cmap->N;
6960     N = A->cmap->N;
6961     hasoffproc = PETSC_TRUE;
6962     break;
6963   case MATPRODUCT_PtAP:
6964     A = product->A;
6965     P = product->B;
6966     m = P->cmap->n;
6967     n = P->cmap->n;
6968     M = P->cmap->N;
6969     N = P->cmap->N;
6970     hasoffproc = PETSC_TRUE;
6971     break;
6972   default:
6973     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6974   }
6975   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size));
6976   if (size == 1) hasoffproc = PETSC_FALSE;
6977 
6978   /* defaults */
6979   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6980     mp[i]    = NULL;
6981     mptmp[i] = PETSC_FALSE;
6982     rmapt[i] = -1;
6983     cmapt[i] = -1;
6984     rmapa[i] = NULL;
6985     cmapa[i] = NULL;
6986   }
6987 
6988   /* customization */
6989   PetscCall(PetscNew(&mmdata));
6990   mmdata->reusesym = product->api_user;
6991   if (ptype == MATPRODUCT_AB) {
6992     if (product->api_user) {
6993       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");
6994       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
6995       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6996       PetscOptionsEnd();
6997     } else {
6998       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");
6999       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
7000       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7001       PetscOptionsEnd();
7002     }
7003   } else if (ptype == MATPRODUCT_PtAP) {
7004     if (product->api_user) {
7005       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");
7006       PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7007       PetscOptionsEnd();
7008     } else {
7009       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
7010       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7011       PetscOptionsEnd();
7012     }
7013   }
7014   a = (Mat_MPIAIJ*)A->data;
7015   p = (Mat_MPIAIJ*)P->data;
7016   PetscCall(MatSetSizes(C,m,n,M,N));
7017   PetscCall(PetscLayoutSetUp(C->rmap));
7018   PetscCall(PetscLayoutSetUp(C->cmap));
7019   PetscCall(MatSetType(C,((PetscObject)A)->type_name));
7020   PetscCall(MatGetOptionsPrefix(C,&prefix));
7021 
7022   cp   = 0;
7023   switch (ptype) {
7024   case MATPRODUCT_AB: /* A * P */
7025     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
7026 
7027     /* A_diag * P_local (merged or not) */
7028     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7029       /* P is product->B */
7030       PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7031       PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
7032       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7033       PetscCall(MatProductSetFill(mp[cp],product->fill));
7034       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7035       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7036       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7037       mp[cp]->product->api_user = product->api_user;
7038       PetscCall(MatProductSetFromOptions(mp[cp]));
7039       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7040       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7041       PetscCall(ISGetIndices(glob,&globidx));
7042       rmapt[cp] = 1;
7043       cmapt[cp] = 2;
7044       cmapa[cp] = globidx;
7045       mptmp[cp] = PETSC_FALSE;
7046       cp++;
7047     } else { /* A_diag * P_diag and A_diag * P_off */
7048       PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp]));
7049       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7050       PetscCall(MatProductSetFill(mp[cp],product->fill));
7051       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7052       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7053       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7054       mp[cp]->product->api_user = product->api_user;
7055       PetscCall(MatProductSetFromOptions(mp[cp]));
7056       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7057       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7058       rmapt[cp] = 1;
7059       cmapt[cp] = 1;
7060       mptmp[cp] = PETSC_FALSE;
7061       cp++;
7062       PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp]));
7063       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7064       PetscCall(MatProductSetFill(mp[cp],product->fill));
7065       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7066       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7067       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7068       mp[cp]->product->api_user = product->api_user;
7069       PetscCall(MatProductSetFromOptions(mp[cp]));
7070       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7071       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7072       rmapt[cp] = 1;
7073       cmapt[cp] = 2;
7074       cmapa[cp] = p->garray;
7075       mptmp[cp] = PETSC_FALSE;
7076       cp++;
7077     }
7078 
7079     /* A_off * P_other */
7080     if (mmdata->P_oth) {
7081       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */
7082       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7083       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7084       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7085       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7086       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7087       PetscCall(MatProductSetFill(mp[cp],product->fill));
7088       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7089       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7090       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7091       mp[cp]->product->api_user = product->api_user;
7092       PetscCall(MatProductSetFromOptions(mp[cp]));
7093       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7094       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7095       rmapt[cp] = 1;
7096       cmapt[cp] = 2;
7097       cmapa[cp] = P_oth_idx;
7098       mptmp[cp] = PETSC_FALSE;
7099       cp++;
7100     }
7101     break;
7102 
7103   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7104     /* A is product->B */
7105     PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7106     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7107       PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]));
7108       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7109       PetscCall(MatProductSetFill(mp[cp],product->fill));
7110       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7111       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7112       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7113       mp[cp]->product->api_user = product->api_user;
7114       PetscCall(MatProductSetFromOptions(mp[cp]));
7115       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7116       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7117       PetscCall(ISGetIndices(glob,&globidx));
7118       rmapt[cp] = 2;
7119       rmapa[cp] = globidx;
7120       cmapt[cp] = 2;
7121       cmapa[cp] = globidx;
7122       mptmp[cp] = PETSC_FALSE;
7123       cp++;
7124     } else {
7125       PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]));
7126       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7127       PetscCall(MatProductSetFill(mp[cp],product->fill));
7128       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7129       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7130       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7131       mp[cp]->product->api_user = product->api_user;
7132       PetscCall(MatProductSetFromOptions(mp[cp]));
7133       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7134       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7135       PetscCall(ISGetIndices(glob,&globidx));
7136       rmapt[cp] = 1;
7137       cmapt[cp] = 2;
7138       cmapa[cp] = globidx;
7139       mptmp[cp] = PETSC_FALSE;
7140       cp++;
7141       PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]));
7142       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7143       PetscCall(MatProductSetFill(mp[cp],product->fill));
7144       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7145       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7146       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7147       mp[cp]->product->api_user = product->api_user;
7148       PetscCall(MatProductSetFromOptions(mp[cp]));
7149       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7150       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7151       rmapt[cp] = 2;
7152       rmapa[cp] = p->garray;
7153       cmapt[cp] = 2;
7154       cmapa[cp] = globidx;
7155       mptmp[cp] = PETSC_FALSE;
7156       cp++;
7157     }
7158     break;
7159   case MATPRODUCT_PtAP:
7160     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
7161     /* P is product->B */
7162     PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7163     PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
7164     PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP));
7165     PetscCall(MatProductSetFill(mp[cp],product->fill));
7166     PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7167     PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7168     PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7169     mp[cp]->product->api_user = product->api_user;
7170     PetscCall(MatProductSetFromOptions(mp[cp]));
7171     PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7172     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7173     PetscCall(ISGetIndices(glob,&globidx));
7174     rmapt[cp] = 2;
7175     rmapa[cp] = globidx;
7176     cmapt[cp] = 2;
7177     cmapa[cp] = globidx;
7178     mptmp[cp] = PETSC_FALSE;
7179     cp++;
7180     if (mmdata->P_oth) {
7181       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g));
7182       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7183       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7184       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7185       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7186       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7187       PetscCall(MatProductSetFill(mp[cp],product->fill));
7188       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7189       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7190       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7191       mp[cp]->product->api_user = product->api_user;
7192       PetscCall(MatProductSetFromOptions(mp[cp]));
7193       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7194       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7195       mptmp[cp] = PETSC_TRUE;
7196       cp++;
7197       PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]));
7198       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7199       PetscCall(MatProductSetFill(mp[cp],product->fill));
7200       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7201       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7202       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7203       mp[cp]->product->api_user = product->api_user;
7204       PetscCall(MatProductSetFromOptions(mp[cp]));
7205       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7206       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7207       rmapt[cp] = 2;
7208       rmapa[cp] = globidx;
7209       cmapt[cp] = 2;
7210       cmapa[cp] = P_oth_idx;
7211       mptmp[cp] = PETSC_FALSE;
7212       cp++;
7213     }
7214     break;
7215   default:
7216     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
7217   }
7218   /* sanity check */
7219   if (size > 1) for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i);
7220 
7221   PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp));
7222   for (i = 0; i < cp; i++) {
7223     mmdata->mp[i]    = mp[i];
7224     mmdata->mptmp[i] = mptmp[i];
7225   }
7226   mmdata->cp = cp;
7227   C->product->data       = mmdata;
7228   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7229   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7230 
7231   /* memory type */
7232   mmdata->mtype = PETSC_MEMTYPE_HOST;
7233   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,""));
7234   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,""));
7235   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7236   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7237 
7238   /* prepare coo coordinates for values insertion */
7239 
7240   /* count total nonzeros of those intermediate seqaij Mats
7241     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7242     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7243     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7244   */
7245   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7246     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7247     if (mptmp[cp]) continue;
7248     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7249       const PetscInt *rmap = rmapa[cp];
7250       const PetscInt mr = mp[cp]->rmap->n;
7251       const PetscInt rs = C->rmap->rstart;
7252       const PetscInt re = C->rmap->rend;
7253       const PetscInt *ii  = mm->i;
7254       for (i = 0; i < mr; i++) {
7255         const PetscInt gr = rmap[i];
7256         const PetscInt nz = ii[i+1] - ii[i];
7257         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7258         else ncoo_oown += nz; /* this row is local */
7259       }
7260     } else ncoo_d += mm->nz;
7261   }
7262 
7263   /*
7264     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7265 
7266     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7267 
7268     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
7269 
7270     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7271     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7272     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7273 
7274     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7275     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7276   */
7277   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */
7278   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own));
7279 
7280   /* gather (i,j) of nonzeros inserted by remote procs */
7281   if (hasoffproc) {
7282     PetscSF  msf;
7283     PetscInt ncoo2,*coo_i2,*coo_j2;
7284 
7285     PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0]));
7286     PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0]));
7287     PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */
7288 
7289     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7290       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7291       PetscInt   *idxoff = mmdata->off[cp];
7292       PetscInt   *idxown = mmdata->own[cp];
7293       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7294         const PetscInt *rmap = rmapa[cp];
7295         const PetscInt *cmap = cmapa[cp];
7296         const PetscInt *ii  = mm->i;
7297         PetscInt       *coi = coo_i + ncoo_o;
7298         PetscInt       *coj = coo_j + ncoo_o;
7299         const PetscInt mr = mp[cp]->rmap->n;
7300         const PetscInt rs = C->rmap->rstart;
7301         const PetscInt re = C->rmap->rend;
7302         const PetscInt cs = C->cmap->rstart;
7303         for (i = 0; i < mr; i++) {
7304           const PetscInt *jj = mm->j + ii[i];
7305           const PetscInt gr  = rmap[i];
7306           const PetscInt nz  = ii[i+1] - ii[i];
7307           if (gr < rs || gr >= re) { /* this is an offproc row */
7308             for (j = ii[i]; j < ii[i+1]; j++) {
7309               *coi++ = gr;
7310               *idxoff++ = j;
7311             }
7312             if (!cmapt[cp]) { /* already global */
7313               for (j = 0; j < nz; j++) *coj++ = jj[j];
7314             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7315               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7316             } else { /* offdiag */
7317               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7318             }
7319             ncoo_o += nz;
7320           } else { /* this is a local row */
7321             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
7322           }
7323         }
7324       }
7325       mmdata->off[cp + 1] = idxoff;
7326       mmdata->own[cp + 1] = idxown;
7327     }
7328 
7329     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7330     PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i));
7331     PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf));
7332     PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL));
7333     ncoo = ncoo_d + ncoo_oown + ncoo2;
7334     PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2));
7335     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7336     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown));
7337     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7338     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7339     PetscCall(PetscFree2(coo_i,coo_j));
7340     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7341     PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w));
7342     coo_i = coo_i2;
7343     coo_j = coo_j2;
7344   } else { /* no offproc values insertion */
7345     ncoo = ncoo_d;
7346     PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j));
7347 
7348     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7349     PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER));
7350     PetscCall(PetscSFSetUp(mmdata->sf));
7351   }
7352   mmdata->hasoffproc = hasoffproc;
7353 
7354   /* gather (i,j) of nonzeros inserted locally */
7355   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7356     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
7357     PetscInt       *coi = coo_i + ncoo_d;
7358     PetscInt       *coj = coo_j + ncoo_d;
7359     const PetscInt *jj  = mm->j;
7360     const PetscInt *ii  = mm->i;
7361     const PetscInt *cmap = cmapa[cp];
7362     const PetscInt *rmap = rmapa[cp];
7363     const PetscInt mr = mp[cp]->rmap->n;
7364     const PetscInt rs = C->rmap->rstart;
7365     const PetscInt re = C->rmap->rend;
7366     const PetscInt cs = C->cmap->rstart;
7367 
7368     if (mptmp[cp]) continue;
7369     if (rmapt[cp] == 1) { /* consecutive rows */
7370       /* fill coo_i */
7371       for (i = 0; i < mr; i++) {
7372         const PetscInt gr = i + rs;
7373         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
7374       }
7375       /* fill coo_j */
7376       if (!cmapt[cp]) { /* type-0, already global */
7377         PetscCall(PetscArraycpy(coj,jj,mm->nz));
7378       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
7379         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7380       } else { /* type-2, local to global for sparse columns */
7381         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7382       }
7383       ncoo_d += mm->nz;
7384     } else if (rmapt[cp] == 2) { /* sparse rows */
7385       for (i = 0; i < mr; i++) {
7386         const PetscInt *jj = mm->j + ii[i];
7387         const PetscInt gr  = rmap[i];
7388         const PetscInt nz  = ii[i+1] - ii[i];
7389         if (gr >= rs && gr < re) { /* local rows */
7390           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
7391           if (!cmapt[cp]) { /* type-0, already global */
7392             for (j = 0; j < nz; j++) *coj++ = jj[j];
7393           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7394             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7395           } else { /* type-2, local to global for sparse columns */
7396             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7397           }
7398           ncoo_d += nz;
7399         }
7400       }
7401     }
7402   }
7403   if (glob) {
7404     PetscCall(ISRestoreIndices(glob,&globidx));
7405   }
7406   PetscCall(ISDestroy(&glob));
7407   if (P_oth_l2g) {
7408     PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx));
7409   }
7410   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7411   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7412   PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v));
7413 
7414   /* preallocate with COO data */
7415   PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j));
7416   PetscCall(PetscFree2(coo_i,coo_j));
7417   PetscFunctionReturn(0);
7418 }
7419 
7420 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7421 {
7422   Mat_Product *product = mat->product;
7423 #if defined(PETSC_HAVE_DEVICE)
7424   PetscBool    match   = PETSC_FALSE;
7425   PetscBool    usecpu  = PETSC_FALSE;
7426 #else
7427   PetscBool    match   = PETSC_TRUE;
7428 #endif
7429 
7430   PetscFunctionBegin;
7431   MatCheckProduct(mat,1);
7432 #if defined(PETSC_HAVE_DEVICE)
7433   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7434     PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match));
7435   }
7436   if (match) { /* we can always fallback to the CPU if requested */
7437     switch (product->type) {
7438     case MATPRODUCT_AB:
7439       if (product->api_user) {
7440         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");
7441         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7442         PetscOptionsEnd();
7443       } else {
7444         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");
7445         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7446         PetscOptionsEnd();
7447       }
7448       break;
7449     case MATPRODUCT_AtB:
7450       if (product->api_user) {
7451         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");
7452         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7453         PetscOptionsEnd();
7454       } else {
7455         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");
7456         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7457         PetscOptionsEnd();
7458       }
7459       break;
7460     case MATPRODUCT_PtAP:
7461       if (product->api_user) {
7462         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");
7463         PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7464         PetscOptionsEnd();
7465       } else {
7466         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");
7467         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7468         PetscOptionsEnd();
7469       }
7470       break;
7471     default:
7472       break;
7473     }
7474     match = (PetscBool)!usecpu;
7475   }
7476 #endif
7477   if (match) {
7478     switch (product->type) {
7479     case MATPRODUCT_AB:
7480     case MATPRODUCT_AtB:
7481     case MATPRODUCT_PtAP:
7482       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7483       break;
7484     default:
7485       break;
7486     }
7487   }
7488   /* fallback to MPIAIJ ops */
7489   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7490   PetscFunctionReturn(0);
7491 }
7492 
7493 /*
7494    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7495 
7496    n - the number of block indices in cc[]
7497    cc - the block indices (must be large enough to contain the indices)
7498 */
7499 static inline PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc)
7500 {
7501   PetscInt       cnt = -1,nidx,j;
7502   const PetscInt *idx;
7503 
7504   PetscFunctionBegin;
7505   PetscCall(MatGetRow(Amat,row,&nidx,&idx,NULL));
7506   if (nidx) {
7507     cnt = 0;
7508     cc[cnt] = idx[0]/bs;
7509     for (j=1; j<nidx; j++) {
7510       if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs;
7511     }
7512   }
7513   PetscCall(MatRestoreRow(Amat,row,&nidx,&idx,NULL));
7514   *n = cnt+1;
7515   PetscFunctionReturn(0);
7516 }
7517 
7518 /*
7519     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7520 
7521     ncollapsed - the number of block indices
7522     collapsed - the block indices (must be large enough to contain the indices)
7523 */
7524 static inline PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed)
7525 {
7526   PetscInt       i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp;
7527 
7528   PetscFunctionBegin;
7529   PetscCall(MatCollapseRow(Amat,start,bs,&nprev,cprev));
7530   for (i=start+1; i<start+bs; i++) {
7531     PetscCall(MatCollapseRow(Amat,i,bs,&ncur,ccur));
7532     PetscCall(PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged));
7533     cprevtmp = cprev; cprev = merged; merged = cprevtmp;
7534   }
7535   *ncollapsed = nprev;
7536   if (collapsed) *collapsed  = cprev;
7537   PetscFunctionReturn(0);
7538 }
7539 
7540 /* -------------------------------------------------------------------------- */
7541 /*
7542  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7543 
7544  Input Parameter:
7545  . Amat - matrix
7546  - symmetrize - make the result symmetric
7547  + scale - scale with diagonal
7548 
7549  Output Parameter:
7550  . a_Gmat - output scalar graph >= 0
7551 
7552  */
7553 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, Mat *a_Gmat)
7554 {
7555   PetscInt       Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs;
7556   MPI_Comm       comm;
7557   Mat            Gmat;
7558   PetscBool      ismpiaij,isseqaij;
7559   Mat            a, b, c;
7560   MatType        jtype;
7561 
7562   PetscFunctionBegin;
7563   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
7564   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7565   PetscCall(MatGetSize(Amat, &MM, &NN));
7566   PetscCall(MatGetBlockSize(Amat, &bs));
7567   nloc = (Iend-Istart)/bs;
7568 
7569   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij));
7570   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij));
7571   PetscCheck(isseqaij || ismpiaij,comm,PETSC_ERR_USER,"Require (MPI)AIJ matrix type");
7572 
7573   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7574   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7575      implementation */
7576   if (bs > 1) {
7577     PetscCall(MatGetType(Amat,&jtype));
7578     PetscCall(MatCreate(comm, &Gmat));
7579     PetscCall(MatSetType(Gmat, jtype));
7580     PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE));
7581     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7582     if (isseqaij || ((Mat_MPIAIJ*)Amat->data)->garray) {
7583       PetscInt  *d_nnz, *o_nnz;
7584       MatScalar *aa,val,AA[4096];
7585       PetscInt  *aj,*ai,AJ[4096],nc;
7586       if (isseqaij) { a = Amat; b = NULL; }
7587       else {
7588         Mat_MPIAIJ *d = (Mat_MPIAIJ*)Amat->data;
7589         a = d->A; b = d->B;
7590       }
7591       PetscCall(PetscInfo(Amat,"New bs>1 Graph. nloc=%" PetscInt_FMT "\n",nloc));
7592       PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz));
7593       for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
7594         PetscInt       *nnz = (c==a) ? d_nnz : o_nnz, nmax=0;
7595         const PetscInt *cols;
7596         for (PetscInt brow=0,jj,ok=1,j0; brow < nloc*bs; brow += bs) { // block rows
7597           PetscCall(MatGetRow(c,brow,&jj,&cols,NULL));
7598           nnz[brow/bs] = jj/bs;
7599           if (jj%bs) ok = 0;
7600           if (cols) j0 = cols[0];
7601           else j0 = -1;
7602           PetscCall(MatRestoreRow(c,brow,&jj,&cols,NULL));
7603           if (nnz[brow/bs]>nmax) nmax = nnz[brow/bs];
7604           for (PetscInt ii=1; ii < bs && nnz[brow/bs] ; ii++) { // check for non-dense blocks
7605             PetscCall(MatGetRow(c,brow+ii,&jj,&cols,NULL));
7606             if (jj%bs) ok = 0;
7607             if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0;
7608             if (nnz[brow/bs] != jj/bs) ok = 0;
7609             PetscCall(MatRestoreRow(c,brow+ii,&jj,&cols,NULL));
7610           }
7611           if (!ok) {
7612             PetscCall(PetscFree2(d_nnz,o_nnz));
7613             goto old_bs;
7614           }
7615         }
7616         PetscCheck(nmax<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer %" PetscInt_FMT " too small 4096.",nmax);
7617       }
7618       PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz));
7619       PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz));
7620       PetscCall(PetscFree2(d_nnz,o_nnz));
7621       // diag
7622       for (PetscInt brow=0,n,grow; brow < nloc*bs; brow += bs) { // block rows
7623         Mat_SeqAIJ *aseq  = (Mat_SeqAIJ*)a->data;
7624         ai = aseq->i;
7625         n  = ai[brow+1] - ai[brow];
7626         aj = aseq->j + ai[brow];
7627         for (int k=0; k<n; k += bs) { // block columns
7628           AJ[k/bs] = aj[k]/bs + Istart/bs; // diag starts at (Istart,Istart)
7629           val = 0;
7630           for (int ii=0; ii<bs; ii++) { // rows in block
7631             aa = aseq->a + ai[brow+ii] + k;
7632             for (int jj=0; jj<bs; jj++) { // columns in block
7633               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7634             }
7635           }
7636           AA[k/bs] = val;
7637         }
7638         grow = Istart/bs + brow/bs;
7639         PetscCall(MatSetValues(Gmat,1,&grow,n/bs,AJ,AA,INSERT_VALUES));
7640       }
7641       // off-diag
7642       if (ismpiaij) {
7643         Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)Amat->data;
7644         const PetscScalar *vals;
7645         const PetscInt    *cols, *garray = aij->garray;
7646         PetscCheck(garray,PETSC_COMM_SELF,PETSC_ERR_USER,"No garray ?");
7647         for (PetscInt brow=0,grow; brow < nloc*bs; brow += bs) { // block rows
7648           PetscCall(MatGetRow(b,brow,&ncols,&cols,NULL));
7649           for (int k=0,cidx=0 ; k < ncols ; k += bs, cidx++) {
7650             AA[k/bs] = 0;
7651             AJ[cidx] = garray[cols[k]]/bs;
7652           }
7653           nc = ncols/bs;
7654           PetscCall(MatRestoreRow(b,brow,&ncols,&cols,NULL));
7655           for (int ii=0; ii<bs; ii++) { // rows in block
7656             PetscCall(MatGetRow(b,brow+ii,&ncols,&cols,&vals));
7657             for (int k=0; k<ncols; k += bs) {
7658               for (int jj=0; jj<bs; jj++) { // cols in block
7659                 AA[k/bs] += PetscAbs(PetscRealPart(vals[k+jj]));
7660               }
7661             }
7662             PetscCall(MatRestoreRow(b,brow+ii,&ncols,&cols,&vals));
7663           }
7664           grow = Istart/bs + brow/bs;
7665           PetscCall(MatSetValues(Gmat,1,&grow,nc,AJ,AA,INSERT_VALUES));
7666         }
7667       }
7668       PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY));
7669       PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY));
7670     } else {
7671       const PetscScalar *vals;
7672       const PetscInt    *idx;
7673       PetscInt          *d_nnz, *o_nnz,*w0,*w1,*w2;
7674       old_bs:
7675       /*
7676        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7677        */
7678       PetscCall(PetscInfo(Amat,"OLD bs>1 CreateGraph\n"));
7679       PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz));
7680       if (isseqaij) {
7681         PetscInt max_d_nnz;
7682         /*
7683          Determine exact preallocation count for (sequential) scalar matrix
7684          */
7685         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz));
7686         max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
7687         PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2));
7688         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) {
7689           PetscCall(MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL));
7690         }
7691         PetscCall(PetscFree3(w0,w1,w2));
7692       } else if (ismpiaij) {
7693         Mat            Daij,Oaij;
7694         const PetscInt *garray;
7695         PetscInt       max_d_nnz;
7696         PetscCall(MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray));
7697         /*
7698          Determine exact preallocation count for diagonal block portion of scalar matrix
7699          */
7700         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz));
7701         max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
7702         PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2));
7703         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7704           PetscCall(MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL));
7705         }
7706         PetscCall(PetscFree3(w0,w1,w2));
7707         /*
7708          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7709          */
7710         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7711           o_nnz[jj] = 0;
7712           for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */
7713             PetscCall(MatGetRow(Oaij,Ii+kk,&ncols,NULL,NULL));
7714             o_nnz[jj] += ncols;
7715             PetscCall(MatRestoreRow(Oaij,Ii+kk,&ncols,NULL,NULL));
7716           }
7717           if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc;
7718         }
7719       } else SETERRQ(comm,PETSC_ERR_USER,"Require AIJ matrix type");
7720       /* get scalar copy (norms) of matrix */
7721       PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz));
7722       PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz));
7723       PetscCall(PetscFree2(d_nnz,o_nnz));
7724       for (Ii = Istart; Ii < Iend; Ii++) {
7725         PetscInt dest_row = Ii/bs;
7726         PetscCall(MatGetRow(Amat,Ii,&ncols,&idx,&vals));
7727         for (jj=0; jj<ncols; jj++) {
7728           PetscInt    dest_col = idx[jj]/bs;
7729           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7730           PetscCall(MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES));
7731         }
7732         PetscCall(MatRestoreRow(Amat,Ii,&ncols,&idx,&vals));
7733       }
7734       PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY));
7735       PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY));
7736     }
7737   } else {
7738     /* TODO GPU: optimization proposal, each class provides fast implementation of this
7739      procedure via MatAbs API */
7740     /* just copy scalar matrix & abs() */
7741     PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7742     if (isseqaij) { a = Gmat; b = NULL; }
7743     else {
7744       Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data;
7745       a = d->A; b = d->B;
7746     }
7747     /* abs */
7748     for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
7749       MatInfo     info;
7750       PetscScalar *avals;
7751       PetscCall(MatGetInfo(c,MAT_LOCAL,&info));
7752       PetscCall(MatSeqAIJGetArray(c,&avals));
7753       for (int jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7754       PetscCall(MatSeqAIJRestoreArray(c,&avals));
7755     }
7756   }
7757   if (symmetrize) {
7758     PetscBool issym;
7759     PetscCall(MatGetOption(Amat,MAT_SYMMETRIC,&issym));
7760     if (!issym) {
7761       Mat matTrans;
7762       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
7763       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
7764       PetscCall(MatDestroy(&matTrans));
7765     }
7766     PetscCall(MatSetOption(Gmat,MAT_SYMMETRIC,PETSC_TRUE));
7767   } else {
7768     PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
7769   }
7770   if (scale) {
7771     /* scale c for all diagonal values = 1 or -1 */
7772     Vec               diag;
7773     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
7774     PetscCall(MatGetDiagonal(Gmat, diag));
7775     PetscCall(VecReciprocal(diag));
7776     PetscCall(VecSqrtAbs(diag));
7777     PetscCall(MatDiagonalScale(Gmat, diag, diag));
7778     PetscCall(VecDestroy(&diag));
7779   }
7780   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
7781   *a_Gmat = Gmat;
7782   PetscFunctionReturn(0);
7783 }
7784 
7785 /* -------------------------------------------------------------------------- */
7786 /*@C
7787    MatFilter_AIJ - filter values with small absolute values
7788      With vfilter < 0 does nothing so should not be called.
7789 
7790    Collective on Mat
7791 
7792    Input Parameters:
7793 +   Gmat - the graph
7794 .   vfilter - threshold parameter [0,1)
7795 
7796  Output Parameter:
7797  .  filteredG - output filtered scalar graph
7798 
7799    Level: developer
7800 
7801    Notes:
7802     This is called before graph coarsers are called.
7803     This could go into Mat, move 'symm' to GAMG
7804 
7805 .seealso: `PCGAMGSetThreshold()`
7806 @*/
7807 PETSC_INTERN PetscErrorCode MatFilter_AIJ(Mat Gmat,PetscReal vfilter, Mat *filteredG)
7808 {
7809   PetscInt          Istart,Iend,ncols,nnz0,nnz1, NN, MM, nloc;
7810   Mat               tGmat;
7811   MPI_Comm          comm;
7812   const PetscScalar *vals;
7813   const PetscInt    *idx;
7814   PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols=0;
7815   MatScalar         *AA; // this is checked in graph
7816   PetscBool         isseqaij;
7817   Mat               a, b, c;
7818   MatType           jtype;
7819 
7820   PetscFunctionBegin;
7821   PetscCall(PetscObjectGetComm((PetscObject)Gmat,&comm));
7822   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isseqaij));
7823   PetscCall(MatGetType(Gmat,&jtype));
7824   PetscCall(MatCreate(comm, &tGmat));
7825   PetscCall(MatSetType(tGmat, jtype));
7826 
7827   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
7828                Also, if the matrix is symmetric, can we skip this
7829                operation? It can be very expensive on large matrices. */
7830 
7831   // global sizes
7832   PetscCall(MatGetSize(Gmat, &MM, &NN));
7833   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
7834   nloc = Iend - Istart;
7835   PetscCall(PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz));
7836   if (isseqaij) { a = Gmat; b = NULL; }
7837   else {
7838     Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data;
7839     a = d->A; b = d->B;
7840     garray = d->garray;
7841   }
7842   /* Determine upper bound on non-zeros needed in new filtered matrix */
7843   for (PetscInt row=0; row < nloc; row++) {
7844     PetscCall(MatGetRow(a,row,&ncols,NULL,NULL));
7845     d_nnz[row] = ncols;
7846     if (ncols>maxcols) maxcols=ncols;
7847     PetscCall(MatRestoreRow(a,row,&ncols,NULL,NULL));
7848   }
7849   if (b) {
7850     for (PetscInt row=0; row < nloc; row++) {
7851       PetscCall(MatGetRow(b,row,&ncols,NULL,NULL));
7852       o_nnz[row] = ncols;
7853       if (ncols>maxcols) maxcols=ncols;
7854       PetscCall(MatRestoreRow(b,row,&ncols,NULL,NULL));
7855     }
7856   }
7857   PetscCall(MatSetSizes(tGmat,nloc,nloc,MM,MM));
7858   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
7859   PetscCall(MatSeqAIJSetPreallocation(tGmat,0,d_nnz));
7860   PetscCall(MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz));
7861   PetscCall(MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
7862   PetscCall(PetscFree2(d_nnz,o_nnz));
7863   //
7864   PetscCall(PetscMalloc2(maxcols, &AA,maxcols, &AJ));
7865   nnz0 = nnz1 = 0;
7866   for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
7867     for (PetscInt row=0, grow=Istart, ncol_row, jj ; row < nloc; row++,grow++) {
7868       PetscCall(MatGetRow(c,row,&ncols,&idx,&vals));
7869       for (ncol_row=jj=0; jj<ncols; jj++,nnz0++) {
7870         PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
7871         if (PetscRealPart(sv) > vfilter) {
7872           nnz1++;
7873           PetscInt cid = idx[jj] + Istart; //diag
7874           if (c!=a) cid = garray[idx[jj]];
7875           AA[ncol_row] = vals[jj];
7876           AJ[ncol_row] = cid;
7877           ncol_row++;
7878         }
7879       }
7880       PetscCall(MatRestoreRow(c,row,&ncols,&idx,&vals));
7881       PetscCall(MatSetValues(tGmat,1,&grow,ncol_row,AJ,AA,INSERT_VALUES));
7882     }
7883   }
7884   PetscCall(PetscFree2(AA,AJ));
7885   PetscCall(MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY));
7886   PetscCall(MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY));
7887   PetscCall(MatPropagateSymmetryOptions(Gmat,tGmat)); /* Normal Mat options are not relevant ? */
7888 
7889   PetscCall(PetscInfo(tGmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n",
7890                       (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, (double)vfilter,
7891                       (!nloc) ? 1. : (double)nnz0/(double)nloc,MM,(int)maxcols));
7892 
7893   *filteredG = tGmat;
7894   PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view"));
7895   PetscFunctionReturn(0);
7896 }
7897 
7898 /*
7899     Special version for direct calls from Fortran
7900 */
7901 #include <petsc/private/fortranimpl.h>
7902 
7903 /* Change these macros so can be used in void function */
7904 /* Identical to PetscCallVoid, except it assigns to *_ierr */
7905 #undef  PetscCall
7906 #define PetscCall(...) do {                                                                    \
7907     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__;                                              \
7908     if (PetscUnlikely(ierr_msv_mpiaij)) {                                                      \
7909       *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \
7910       return;                                                                                  \
7911     }                                                                                          \
7912   } while (0)
7913 
7914 #undef SETERRQ
7915 #define SETERRQ(comm,ierr,...) do {                                                            \
7916     *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \
7917     return;                                                                                    \
7918   } while (0)
7919 
7920 #if defined(PETSC_HAVE_FORTRAN_CAPS)
7921 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
7922 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
7923 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
7924 #else
7925 #endif
7926 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
7927 {
7928   Mat          mat  = *mmat;
7929   PetscInt     m    = *mm, n = *mn;
7930   InsertMode   addv = *maddv;
7931   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ*)mat->data;
7932   PetscScalar  value;
7933 
7934   MatCheckPreallocated(mat,1);
7935   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
7936   else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
7937   {
7938     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
7939     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
7940     PetscBool roworiented = aij->roworiented;
7941 
7942     /* Some Variables required in the macro */
7943     Mat        A                    = aij->A;
7944     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
7945     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
7946     MatScalar  *aa;
7947     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
7948     Mat        B                    = aij->B;
7949     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
7950     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
7951     MatScalar  *ba;
7952     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
7953      * cannot use "#if defined" inside a macro. */
7954     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
7955 
7956     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
7957     PetscInt  nonew = a->nonew;
7958     MatScalar *ap1,*ap2;
7959 
7960     PetscFunctionBegin;
7961     PetscCall(MatSeqAIJGetArray(A,&aa));
7962     PetscCall(MatSeqAIJGetArray(B,&ba));
7963     for (i=0; i<m; i++) {
7964       if (im[i] < 0) continue;
7965       PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
7966       if (im[i] >= rstart && im[i] < rend) {
7967         row      = im[i] - rstart;
7968         lastcol1 = -1;
7969         rp1      = aj + ai[row];
7970         ap1      = aa + ai[row];
7971         rmax1    = aimax[row];
7972         nrow1    = ailen[row];
7973         low1     = 0;
7974         high1    = nrow1;
7975         lastcol2 = -1;
7976         rp2      = bj + bi[row];
7977         ap2      = ba + bi[row];
7978         rmax2    = bimax[row];
7979         nrow2    = bilen[row];
7980         low2     = 0;
7981         high2    = nrow2;
7982 
7983         for (j=0; j<n; j++) {
7984           if (roworiented) value = v[i*n+j];
7985           else value = v[i+j*m];
7986           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
7987           if (in[j] >= cstart && in[j] < cend) {
7988             col = in[j] - cstart;
7989             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
7990           } else if (in[j] < 0) continue;
7991           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
7992             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
7993             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
7994           } else {
7995             if (mat->was_assembled) {
7996               if (!aij->colmap) {
7997                 PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
7998               }
7999 #if defined(PETSC_USE_CTABLE)
8000               PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col));
8001               col--;
8002 #else
8003               col = aij->colmap[in[j]] - 1;
8004 #endif
8005               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
8006                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8007                 col  =  in[j];
8008                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8009                 B        = aij->B;
8010                 b        = (Mat_SeqAIJ*)B->data;
8011                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
8012                 rp2      = bj + bi[row];
8013                 ap2      = ba + bi[row];
8014                 rmax2    = bimax[row];
8015                 nrow2    = bilen[row];
8016                 low2     = 0;
8017                 high2    = nrow2;
8018                 bm       = aij->B->rmap->n;
8019                 ba       = b->a;
8020                 inserted = PETSC_FALSE;
8021               }
8022             } else col = in[j];
8023             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
8024           }
8025         }
8026       } else if (!aij->donotstash) {
8027         if (roworiented) {
8028           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8029         } else {
8030           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8031         }
8032       }
8033     }
8034     PetscCall(MatSeqAIJRestoreArray(A,&aa));
8035     PetscCall(MatSeqAIJRestoreArray(B,&ba));
8036   }
8037   PetscFunctionReturnVoid();
8038 }
8039 
8040 /* Undefining these here since they were redefined from their original definition above! No
8041  * other PETSc functions should be defined past this point, as it is impossible to recover the
8042  * original definitions */
8043 #undef PetscCall
8044 #undef SETERRQ
8045