xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 1179163e0bf5c4dd309079707fd3c0dfe8d44eee)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
10 {
11   Mat            B;
12 
13   PetscFunctionBegin;
14   PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&B));
15   PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject)B));
16   PetscCall(MatGetRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done));
17   PetscFunctionReturn(0);
18 }
19 
20 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
21 {
22   Mat            B;
23 
24   PetscFunctionBegin;
25   PetscCall(PetscObjectQuery((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject*)&B));
26   PetscCall(MatRestoreRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done));
27   PetscCall(MatDestroy(&B));
28   PetscFunctionReturn(0);
29 }
30 
31 /*MC
32    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
33 
34    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
35    and MATMPIAIJ otherwise.  As a result, for single process communicators,
36   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
37   for communicators controlling multiple processes.  It is recommended that you call both of
38   the above preallocation routines for simplicity.
39 
40    Options Database Keys:
41 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
42 
43   Developer Notes:
44     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
45    enough exist.
46 
47   Level: beginner
48 
49 .seealso: `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
50 M*/
51 
52 /*MC
53    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
54 
55    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
56    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
57    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
58   for communicators controlling multiple processes.  It is recommended that you call both of
59   the above preallocation routines for simplicity.
60 
61    Options Database Keys:
62 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
63 
64   Level: beginner
65 
66 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
67 M*/
68 
69 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
70 {
71   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
72 
73   PetscFunctionBegin;
74 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
75   A->boundtocpu = flg;
76 #endif
77   if (a->A) PetscCall(MatBindToCPU(a->A,flg));
78   if (a->B) PetscCall(MatBindToCPU(a->B,flg));
79 
80   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
81    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
82    * to differ from the parent matrix. */
83   if (a->lvec) PetscCall(VecBindToCPU(a->lvec,flg));
84   if (a->diag) PetscCall(VecBindToCPU(a->diag,flg));
85 
86   PetscFunctionReturn(0);
87 }
88 
89 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
90 {
91   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
92 
93   PetscFunctionBegin;
94   if (mat->A) {
95     PetscCall(MatSetBlockSizes(mat->A,rbs,cbs));
96     PetscCall(MatSetBlockSizes(mat->B,rbs,1));
97   }
98   PetscFunctionReturn(0);
99 }
100 
101 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
102 {
103   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
104   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
105   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
106   const PetscInt  *ia,*ib;
107   const MatScalar *aa,*bb,*aav,*bav;
108   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
109   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
110 
111   PetscFunctionBegin;
112   *keptrows = NULL;
113 
114   ia   = a->i;
115   ib   = b->i;
116   PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav));
117   PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav));
118   for (i=0; i<m; i++) {
119     na = ia[i+1] - ia[i];
120     nb = ib[i+1] - ib[i];
121     if (!na && !nb) {
122       cnt++;
123       goto ok1;
124     }
125     aa = aav + ia[i];
126     for (j=0; j<na; j++) {
127       if (aa[j] != 0.0) goto ok1;
128     }
129     bb = bav + ib[i];
130     for (j=0; j <nb; j++) {
131       if (bb[j] != 0.0) goto ok1;
132     }
133     cnt++;
134 ok1:;
135   }
136   PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M)));
137   if (!n0rows) {
138     PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
139     PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
140     PetscFunctionReturn(0);
141   }
142   PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows));
143   cnt  = 0;
144   for (i=0; i<m; i++) {
145     na = ia[i+1] - ia[i];
146     nb = ib[i+1] - ib[i];
147     if (!na && !nb) continue;
148     aa = aav + ia[i];
149     for (j=0; j<na;j++) {
150       if (aa[j] != 0.0) {
151         rows[cnt++] = rstart + i;
152         goto ok2;
153       }
154     }
155     bb = bav + ib[i];
156     for (j=0; j<nb; j++) {
157       if (bb[j] != 0.0) {
158         rows[cnt++] = rstart + i;
159         goto ok2;
160       }
161     }
162 ok2:;
163   }
164   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows));
165   PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
166   PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
167   PetscFunctionReturn(0);
168 }
169 
170 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
171 {
172   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
173   PetscBool         cong;
174 
175   PetscFunctionBegin;
176   PetscCall(MatHasCongruentLayouts(Y,&cong));
177   if (Y->assembled && cong) {
178     PetscCall(MatDiagonalSet(aij->A,D,is));
179   } else {
180     PetscCall(MatDiagonalSet_Default(Y,D,is));
181   }
182   PetscFunctionReturn(0);
183 }
184 
185 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
188   PetscInt       i,rstart,nrows,*rows;
189 
190   PetscFunctionBegin;
191   *zrows = NULL;
192   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows));
193   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
194   for (i=0; i<nrows; i++) rows[i] += rstart;
195   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows));
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
200 {
201   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
202   PetscInt          i,m,n,*garray = aij->garray;
203   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
204   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
205   PetscReal         *work;
206   const PetscScalar *dummy;
207 
208   PetscFunctionBegin;
209   PetscCall(MatGetSize(A,&m,&n));
210   PetscCall(PetscCalloc1(n,&work));
211   PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy));
212   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy));
213   PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy));
214   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy));
215   if (type == NORM_2) {
216     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
217       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
218     }
219     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
220       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
221     }
222   } else if (type == NORM_1) {
223     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
224       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
225     }
226     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
227       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
228     }
229   } else if (type == NORM_INFINITY) {
230     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
231       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
232     }
233     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
234       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
235     }
236   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
237     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
238       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
239     }
240     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
241       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
242     }
243   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
244     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
245       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
246     }
247     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
248       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
249     }
250   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
251   if (type == NORM_INFINITY) {
252     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
253   } else {
254     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
255   }
256   PetscCall(PetscFree(work));
257   if (type == NORM_2) {
258     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
259   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
260     for (i=0; i<n; i++) reductions[i] /= m;
261   }
262   PetscFunctionReturn(0);
263 }
264 
265 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
266 {
267   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
268   IS              sis,gis;
269   const PetscInt  *isis,*igis;
270   PetscInt        n,*iis,nsis,ngis,rstart,i;
271 
272   PetscFunctionBegin;
273   PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis));
274   PetscCall(MatFindNonzeroRows(a->B,&gis));
275   PetscCall(ISGetSize(gis,&ngis));
276   PetscCall(ISGetSize(sis,&nsis));
277   PetscCall(ISGetIndices(sis,&isis));
278   PetscCall(ISGetIndices(gis,&igis));
279 
280   PetscCall(PetscMalloc1(ngis+nsis,&iis));
281   PetscCall(PetscArraycpy(iis,igis,ngis));
282   PetscCall(PetscArraycpy(iis+ngis,isis,nsis));
283   n    = ngis + nsis;
284   PetscCall(PetscSortRemoveDupsInt(&n,iis));
285   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
286   for (i=0; i<n; i++) iis[i] += rstart;
287   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is));
288 
289   PetscCall(ISRestoreIndices(sis,&isis));
290   PetscCall(ISRestoreIndices(gis,&igis));
291   PetscCall(ISDestroy(&sis));
292   PetscCall(ISDestroy(&gis));
293   PetscFunctionReturn(0);
294 }
295 
296 /*
297   Local utility routine that creates a mapping from the global column
298 number to the local number in the off-diagonal part of the local
299 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
300 a slightly higher hash table cost; without it it is not scalable (each processor
301 has an order N integer array but is fast to access.
302 */
303 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscInt       n = aij->B->cmap->n,i;
307 
308   PetscFunctionBegin;
309   PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
310 #if defined(PETSC_USE_CTABLE)
311   PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap));
312   for (i=0; i<n; i++) {
313     PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES));
314   }
315 #else
316   PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap));
317   PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt)));
318   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
319 #endif
320   PetscFunctionReturn(0);
321 }
322 
323 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
324 { \
325     if (col <= lastcol1)  low1 = 0;     \
326     else                 high1 = nrow1; \
327     lastcol1 = col;\
328     while (high1-low1 > 5) { \
329       t = (low1+high1)/2; \
330       if (rp1[t] > col) high1 = t; \
331       else              low1  = t; \
332     } \
333       for (_i=low1; _i<high1; _i++) { \
334         if (rp1[_i] > col) break; \
335         if (rp1[_i] == col) { \
336           if (addv == ADD_VALUES) { \
337             ap1[_i] += value;   \
338             /* Not sure LogFlops will slow dow the code or not */ \
339             (void)PetscLogFlops(1.0);   \
340            } \
341           else                    ap1[_i] = value; \
342           goto a_noinsert; \
343         } \
344       }  \
345       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
346       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
347       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
348       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
349       N = nrow1++ - 1; a->nz++; high1++; \
350       /* shift up all the later entries in this row */ \
351       PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\
352       PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\
353       rp1[_i] = col;  \
354       ap1[_i] = value;  \
355       A->nonzerostate++;\
356       a_noinsert: ; \
357       ailen[row] = nrow1; \
358 }
359 
360 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
361   { \
362     if (col <= lastcol2) low2 = 0;                        \
363     else high2 = nrow2;                                   \
364     lastcol2 = col;                                       \
365     while (high2-low2 > 5) {                              \
366       t = (low2+high2)/2;                                 \
367       if (rp2[t] > col) high2 = t;                        \
368       else             low2  = t;                         \
369     }                                                     \
370     for (_i=low2; _i<high2; _i++) {                       \
371       if (rp2[_i] > col) break;                           \
372       if (rp2[_i] == col) {                               \
373         if (addv == ADD_VALUES) {                         \
374           ap2[_i] += value;                               \
375           (void)PetscLogFlops(1.0);                       \
376         }                                                 \
377         else                    ap2[_i] = value;          \
378         goto b_noinsert;                                  \
379       }                                                   \
380     }                                                     \
381     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
382     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
383     PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
384     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
385     N = nrow2++ - 1; b->nz++; high2++;                    \
386     /* shift up all the later entries in this row */      \
387     PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\
388     PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\
389     rp2[_i] = col;                                        \
390     ap2[_i] = value;                                      \
391     B->nonzerostate++;                                    \
392     b_noinsert: ;                                         \
393     bilen[row] = nrow2;                                   \
394   }
395 
396 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
397 {
398   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
399   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
400   PetscInt       l,*garray = mat->garray,diag;
401   PetscScalar    *aa,*ba;
402 
403   PetscFunctionBegin;
404   /* code only works for square matrices A */
405 
406   /* find size of row to the left of the diagonal part */
407   PetscCall(MatGetOwnershipRange(A,&diag,NULL));
408   row  = row - diag;
409   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
410     if (garray[b->j[b->i[row]+l]] > diag) break;
411   }
412   if (l) {
413     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
414     PetscCall(PetscArraycpy(ba+b->i[row],v,l));
415     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
416   }
417 
418   /* diagonal part */
419   if (a->i[row+1]-a->i[row]) {
420     PetscCall(MatSeqAIJGetArray(mat->A,&aa));
421     PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row])));
422     PetscCall(MatSeqAIJRestoreArray(mat->A,&aa));
423   }
424 
425   /* right of diagonal part */
426   if (b->i[row+1]-b->i[row]-l) {
427     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
428     PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l));
429     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
430   }
431   PetscFunctionReturn(0);
432 }
433 
434 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
435 {
436   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
437   PetscScalar    value = 0.0;
438   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
439   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
440   PetscBool      roworiented = aij->roworiented;
441 
442   /* Some Variables required in the macro */
443   Mat        A                    = aij->A;
444   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
445   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
446   PetscBool  ignorezeroentries    = a->ignorezeroentries;
447   Mat        B                    = aij->B;
448   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
449   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
450   MatScalar  *aa,*ba;
451   PetscInt   *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
452   PetscInt   nonew;
453   MatScalar  *ap1,*ap2;
454 
455   PetscFunctionBegin;
456   PetscCall(MatSeqAIJGetArray(A,&aa));
457   PetscCall(MatSeqAIJGetArray(B,&ba));
458   for (i=0; i<m; i++) {
459     if (im[i] < 0) continue;
460     PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
461     if (im[i] >= rstart && im[i] < rend) {
462       row      = im[i] - rstart;
463       lastcol1 = -1;
464       rp1      = aj + ai[row];
465       ap1      = aa + ai[row];
466       rmax1    = aimax[row];
467       nrow1    = ailen[row];
468       low1     = 0;
469       high1    = nrow1;
470       lastcol2 = -1;
471       rp2      = bj + bi[row];
472       ap2      = ba + bi[row];
473       rmax2    = bimax[row];
474       nrow2    = bilen[row];
475       low2     = 0;
476       high2    = nrow2;
477 
478       for (j=0; j<n; j++) {
479         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
480         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
481         if (in[j] >= cstart && in[j] < cend) {
482           col   = in[j] - cstart;
483           nonew = a->nonew;
484           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
485         } else if (in[j] < 0) continue;
486         else PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
487         else {
488           if (mat->was_assembled) {
489             if (!aij->colmap) {
490               PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
491             }
492 #if defined(PETSC_USE_CTABLE)
493             PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */
494             col--;
495 #else
496             col = aij->colmap[in[j]] - 1;
497 #endif
498             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
499               PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */
500               col  =  in[j];
501               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
502               B        = aij->B;
503               b        = (Mat_SeqAIJ*)B->data;
504               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
505               rp2      = bj + bi[row];
506               ap2      = ba + bi[row];
507               rmax2    = bimax[row];
508               nrow2    = bilen[row];
509               low2     = 0;
510               high2    = nrow2;
511               bm       = aij->B->rmap->n;
512               ba       = b->a;
513             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
514               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
515                 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j]));
516               } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
517             }
518           } else col = in[j];
519           nonew = b->nonew;
520           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
521         }
522       }
523     } else {
524       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
525       if (!aij->donotstash) {
526         mat->assembled = PETSC_FALSE;
527         if (roworiented) {
528           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
529         } else {
530           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
531         }
532       }
533     }
534   }
535   PetscCall(MatSeqAIJRestoreArray(A,&aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
536   PetscCall(MatSeqAIJRestoreArray(B,&ba));
537   PetscFunctionReturn(0);
538 }
539 
540 /*
541     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
542     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
543     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
544 */
545 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
546 {
547   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
548   Mat            A           = aij->A; /* diagonal part of the matrix */
549   Mat            B           = aij->B; /* offdiagonal part of the matrix */
550   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
551   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
552   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
553   PetscInt       *ailen      = a->ilen,*aj = a->j;
554   PetscInt       *bilen      = b->ilen,*bj = b->j;
555   PetscInt       am          = aij->A->rmap->n,j;
556   PetscInt       diag_so_far = 0,dnz;
557   PetscInt       offd_so_far = 0,onz;
558 
559   PetscFunctionBegin;
560   /* Iterate over all rows of the matrix */
561   for (j=0; j<am; j++) {
562     dnz = onz = 0;
563     /*  Iterate over all non-zero columns of the current row */
564     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
565       /* If column is in the diagonal */
566       if (mat_j[col] >= cstart && mat_j[col] < cend) {
567         aj[diag_so_far++] = mat_j[col] - cstart;
568         dnz++;
569       } else { /* off-diagonal entries */
570         bj[offd_so_far++] = mat_j[col];
571         onz++;
572       }
573     }
574     ailen[j] = dnz;
575     bilen[j] = onz;
576   }
577   PetscFunctionReturn(0);
578 }
579 
580 /*
581     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
582     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
583     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
584     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
585     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
586 */
587 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
588 {
589   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
590   Mat            A      = aij->A; /* diagonal part of the matrix */
591   Mat            B      = aij->B; /* offdiagonal part of the matrix */
592   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
593   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
594   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
595   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
596   PetscInt       *ailen = a->ilen,*aj = a->j;
597   PetscInt       *bilen = b->ilen,*bj = b->j;
598   PetscInt       am     = aij->A->rmap->n,j;
599   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
600   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
601   PetscScalar    *aa = a->a,*ba = b->a;
602 
603   PetscFunctionBegin;
604   /* Iterate over all rows of the matrix */
605   for (j=0; j<am; j++) {
606     dnz_row = onz_row = 0;
607     rowstart_offd = full_offd_i[j];
608     rowstart_diag = full_diag_i[j];
609     /*  Iterate over all non-zero columns of the current row */
610     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
611       /* If column is in the diagonal */
612       if (mat_j[col] >= cstart && mat_j[col] < cend) {
613         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
614         aa[rowstart_diag+dnz_row] = mat_a[col];
615         dnz_row++;
616       } else { /* off-diagonal entries */
617         bj[rowstart_offd+onz_row] = mat_j[col];
618         ba[rowstart_offd+onz_row] = mat_a[col];
619         onz_row++;
620       }
621     }
622     ailen[j] = dnz_row;
623     bilen[j] = onz_row;
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
632   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
633 
634   PetscFunctionBegin;
635   for (i=0; i<m; i++) {
636     if (idxm[i] < 0) continue; /* negative row */
637     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
638     if (idxm[i] >= rstart && idxm[i] < rend) {
639       row = idxm[i] - rstart;
640       for (j=0; j<n; j++) {
641         if (idxn[j] < 0) continue; /* negative column */
642         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
643         if (idxn[j] >= cstart && idxn[j] < cend) {
644           col  = idxn[j] - cstart;
645           PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j));
646         } else {
647           if (!aij->colmap) {
648             PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
649           }
650 #if defined(PETSC_USE_CTABLE)
651           PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col));
652           col--;
653 #else
654           col = aij->colmap[idxn[j]] - 1;
655 #endif
656           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
657           else {
658             PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j));
659           }
660         }
661       }
662     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
663   }
664   PetscFunctionReturn(0);
665 }
666 
667 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
668 {
669   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
670   PetscInt       nstash,reallocs;
671 
672   PetscFunctionBegin;
673   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
674 
675   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
676   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
677   PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
678   PetscFunctionReturn(0);
679 }
680 
681 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
682 {
683   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
684   PetscMPIInt    n;
685   PetscInt       i,j,rstart,ncols,flg;
686   PetscInt       *row,*col;
687   PetscBool      other_disassembled;
688   PetscScalar    *val;
689 
690   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
691 
692   PetscFunctionBegin;
693   if (!aij->donotstash && !mat->nooffprocentries) {
694     while (1) {
695       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
696       if (!flg) break;
697 
698       for (i=0; i<n;) {
699         /* Now identify the consecutive vals belonging to the same row */
700         for (j=i,rstart=row[j]; j<n; j++) {
701           if (row[j] != rstart) break;
702         }
703         if (j < n) ncols = j-i;
704         else       ncols = n-i;
705         /* Now assemble all these values with a single function call */
706         PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
707         i    = j;
708       }
709     }
710     PetscCall(MatStashScatterEnd_Private(&mat->stash));
711   }
712 #if defined(PETSC_HAVE_DEVICE)
713   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
714   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
715   if (mat->boundtocpu) {
716     PetscCall(MatBindToCPU(aij->A,PETSC_TRUE));
717     PetscCall(MatBindToCPU(aij->B,PETSC_TRUE));
718   }
719 #endif
720   PetscCall(MatAssemblyBegin(aij->A,mode));
721   PetscCall(MatAssemblyEnd(aij->A,mode));
722 
723   /* determine if any processor has disassembled, if so we must
724      also disassemble ourself, in order that we may reassemble. */
725   /*
726      if nonzero structure of submatrix B cannot change then we know that
727      no processor disassembled thus we can skip this stuff
728   */
729   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
730     PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat)));
731     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
732       PetscCall(MatDisAssemble_MPIAIJ(mat));
733     }
734   }
735   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
736     PetscCall(MatSetUpMultiply_MPIAIJ(mat));
737   }
738   PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE));
739 #if defined(PETSC_HAVE_DEVICE)
740   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
741 #endif
742   PetscCall(MatAssemblyBegin(aij->B,mode));
743   PetscCall(MatAssemblyEnd(aij->B,mode));
744 
745   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
746 
747   aij->rowvalues = NULL;
748 
749   PetscCall(VecDestroy(&aij->diag));
750 
751   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
752   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
753     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
754     PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
755   }
756 #if defined(PETSC_HAVE_DEVICE)
757   mat->offloadmask = PETSC_OFFLOAD_BOTH;
758 #endif
759   PetscFunctionReturn(0);
760 }
761 
762 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
763 {
764   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
765 
766   PetscFunctionBegin;
767   PetscCall(MatZeroEntries(l->A));
768   PetscCall(MatZeroEntries(l->B));
769   PetscFunctionReturn(0);
770 }
771 
772 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
773 {
774   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
775   PetscObjectState sA, sB;
776   PetscInt        *lrows;
777   PetscInt         r, len;
778   PetscBool        cong, lch, gch;
779 
780   PetscFunctionBegin;
781   /* get locally owned rows */
782   PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows));
783   PetscCall(MatHasCongruentLayouts(A,&cong));
784   /* fix right hand side if needed */
785   if (x && b) {
786     const PetscScalar *xx;
787     PetscScalar       *bb;
788 
789     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
790     PetscCall(VecGetArrayRead(x, &xx));
791     PetscCall(VecGetArray(b, &bb));
792     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
793     PetscCall(VecRestoreArrayRead(x, &xx));
794     PetscCall(VecRestoreArray(b, &bb));
795   }
796 
797   sA = mat->A->nonzerostate;
798   sB = mat->B->nonzerostate;
799 
800   if (diag != 0.0 && cong) {
801     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
802     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
803   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
804     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
805     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
806     PetscInt   nnwA, nnwB;
807     PetscBool  nnzA, nnzB;
808 
809     nnwA = aijA->nonew;
810     nnwB = aijB->nonew;
811     nnzA = aijA->keepnonzeropattern;
812     nnzB = aijB->keepnonzeropattern;
813     if (!nnzA) {
814       PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
815       aijA->nonew = 0;
816     }
817     if (!nnzB) {
818       PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
819       aijB->nonew = 0;
820     }
821     /* Must zero here before the next loop */
822     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
823     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
824     for (r = 0; r < len; ++r) {
825       const PetscInt row = lrows[r] + A->rmap->rstart;
826       if (row >= A->cmap->N) continue;
827       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
828     }
829     aijA->nonew = nnwA;
830     aijB->nonew = nnwB;
831   } else {
832     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
833     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
834   }
835   PetscCall(PetscFree(lrows));
836   PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
837   PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
838 
839   /* reduce nonzerostate */
840   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
841   PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A)));
842   if (gch) A->nonzerostate++;
843   PetscFunctionReturn(0);
844 }
845 
846 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
847 {
848   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
849   PetscMPIInt       n = A->rmap->n;
850   PetscInt          i,j,r,m,len = 0;
851   PetscInt          *lrows,*owners = A->rmap->range;
852   PetscMPIInt       p = 0;
853   PetscSFNode       *rrows;
854   PetscSF           sf;
855   const PetscScalar *xx;
856   PetscScalar       *bb,*mask,*aij_a;
857   Vec               xmask,lmask;
858   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
859   const PetscInt    *aj, *ii,*ridx;
860   PetscScalar       *aa;
861 
862   PetscFunctionBegin;
863   /* Create SF where leaves are input rows and roots are owned rows */
864   PetscCall(PetscMalloc1(n, &lrows));
865   for (r = 0; r < n; ++r) lrows[r] = -1;
866   PetscCall(PetscMalloc1(N, &rrows));
867   for (r = 0; r < N; ++r) {
868     const PetscInt idx   = rows[r];
869     PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
870     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
871       PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p));
872     }
873     rrows[r].rank  = p;
874     rrows[r].index = rows[r] - owners[p];
875   }
876   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf));
877   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
878   /* Collect flags for rows to be zeroed */
879   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
880   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
881   PetscCall(PetscSFDestroy(&sf));
882   /* Compress and put in row numbers */
883   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
884   /* zero diagonal part of matrix */
885   PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b));
886   /* handle off diagonal part of matrix */
887   PetscCall(MatCreateVecs(A,&xmask,NULL));
888   PetscCall(VecDuplicate(l->lvec,&lmask));
889   PetscCall(VecGetArray(xmask,&bb));
890   for (i=0; i<len; i++) bb[lrows[i]] = 1;
891   PetscCall(VecRestoreArray(xmask,&bb));
892   PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
893   PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
894   PetscCall(VecDestroy(&xmask));
895   if (x && b) { /* this code is buggy when the row and column layout don't match */
896     PetscBool cong;
897 
898     PetscCall(MatHasCongruentLayouts(A,&cong));
899     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
900     PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
901     PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
902     PetscCall(VecGetArrayRead(l->lvec,&xx));
903     PetscCall(VecGetArray(b,&bb));
904   }
905   PetscCall(VecGetArray(lmask,&mask));
906   /* remove zeroed rows of off diagonal matrix */
907   PetscCall(MatSeqAIJGetArray(l->B,&aij_a));
908   ii = aij->i;
909   for (i=0; i<len; i++) {
910     PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]));
911   }
912   /* loop over all elements of off process part of matrix zeroing removed columns*/
913   if (aij->compressedrow.use) {
914     m    = aij->compressedrow.nrows;
915     ii   = aij->compressedrow.i;
916     ridx = aij->compressedrow.rindex;
917     for (i=0; i<m; i++) {
918       n  = ii[i+1] - ii[i];
919       aj = aij->j + ii[i];
920       aa = aij_a + ii[i];
921 
922       for (j=0; j<n; j++) {
923         if (PetscAbsScalar(mask[*aj])) {
924           if (b) bb[*ridx] -= *aa*xx[*aj];
925           *aa = 0.0;
926         }
927         aa++;
928         aj++;
929       }
930       ridx++;
931     }
932   } else { /* do not use compressed row format */
933     m = l->B->rmap->n;
934     for (i=0; i<m; i++) {
935       n  = ii[i+1] - ii[i];
936       aj = aij->j + ii[i];
937       aa = aij_a + ii[i];
938       for (j=0; j<n; j++) {
939         if (PetscAbsScalar(mask[*aj])) {
940           if (b) bb[i] -= *aa*xx[*aj];
941           *aa = 0.0;
942         }
943         aa++;
944         aj++;
945       }
946     }
947   }
948   if (x && b) {
949     PetscCall(VecRestoreArray(b,&bb));
950     PetscCall(VecRestoreArrayRead(l->lvec,&xx));
951   }
952   PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a));
953   PetscCall(VecRestoreArray(lmask,&mask));
954   PetscCall(VecDestroy(&lmask));
955   PetscCall(PetscFree(lrows));
956 
957   /* only change matrix nonzero state if pattern was allowed to be changed */
958   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
959     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
960     PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A)));
961   }
962   PetscFunctionReturn(0);
963 }
964 
965 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
966 {
967   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
968   PetscInt       nt;
969   VecScatter     Mvctx = a->Mvctx;
970 
971   PetscFunctionBegin;
972   PetscCall(VecGetLocalSize(xx,&nt));
973   PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt);
974   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
975   PetscCall((*a->A->ops->mult)(a->A,xx,yy));
976   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
977   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy));
978   PetscFunctionReturn(0);
979 }
980 
981 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
982 {
983   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
984 
985   PetscFunctionBegin;
986   PetscCall(MatMultDiagonalBlock(a->A,bb,xx));
987   PetscFunctionReturn(0);
988 }
989 
990 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
991 {
992   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
993   VecScatter     Mvctx = a->Mvctx;
994 
995   PetscFunctionBegin;
996   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
997   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz));
998   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
999   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz));
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1004 {
1005   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1006 
1007   PetscFunctionBegin;
1008   /* do nondiagonal part */
1009   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1010   /* do local part */
1011   PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy));
1012   /* add partial results together */
1013   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1014   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1019 {
1020   MPI_Comm       comm;
1021   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1022   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1023   IS             Me,Notme;
1024   PetscInt       M,N,first,last,*notme,i;
1025   PetscBool      lf;
1026   PetscMPIInt    size;
1027 
1028   PetscFunctionBegin;
1029   /* Easy test: symmetric diagonal block */
1030   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1031   PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf));
1032   PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat)));
1033   if (!*f) PetscFunctionReturn(0);
1034   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
1035   PetscCallMPI(MPI_Comm_size(comm,&size));
1036   if (size == 1) PetscFunctionReturn(0);
1037 
1038   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1039   PetscCall(MatGetSize(Amat,&M,&N));
1040   PetscCall(MatGetOwnershipRange(Amat,&first,&last));
1041   PetscCall(PetscMalloc1(N-last+first,&notme));
1042   for (i=0; i<first; i++) notme[i] = i;
1043   for (i=last; i<M; i++) notme[i-last+first] = i;
1044   PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme));
1045   PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me));
1046   PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs));
1047   Aoff = Aoffs[0];
1048   PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs));
1049   Boff = Boffs[0];
1050   PetscCall(MatIsTranspose(Aoff,Boff,tol,f));
1051   PetscCall(MatDestroyMatrices(1,&Aoffs));
1052   PetscCall(MatDestroyMatrices(1,&Boffs));
1053   PetscCall(ISDestroy(&Me));
1054   PetscCall(ISDestroy(&Notme));
1055   PetscCall(PetscFree(notme));
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1060 {
1061   PetscFunctionBegin;
1062   PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f));
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1067 {
1068   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1069 
1070   PetscFunctionBegin;
1071   /* do nondiagonal part */
1072   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1073   /* do local part */
1074   PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz));
1075   /* add partial results together */
1076   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1077   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 /*
1082   This only works correctly for square matrices where the subblock A->A is the
1083    diagonal block
1084 */
1085 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1086 {
1087   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1088 
1089   PetscFunctionBegin;
1090   PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1091   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1092   PetscCall(MatGetDiagonal(a->A,v));
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1097 {
1098   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1099 
1100   PetscFunctionBegin;
1101   PetscCall(MatScale(a->A,aa));
1102   PetscCall(MatScale(a->B,aa));
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1107 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1108 {
1109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1110 
1111   PetscFunctionBegin;
1112   PetscCall(PetscSFDestroy(&aij->coo_sf));
1113   PetscCall(PetscFree(aij->Aperm1));
1114   PetscCall(PetscFree(aij->Bperm1));
1115   PetscCall(PetscFree(aij->Ajmap1));
1116   PetscCall(PetscFree(aij->Bjmap1));
1117 
1118   PetscCall(PetscFree(aij->Aimap2));
1119   PetscCall(PetscFree(aij->Bimap2));
1120   PetscCall(PetscFree(aij->Aperm2));
1121   PetscCall(PetscFree(aij->Bperm2));
1122   PetscCall(PetscFree(aij->Ajmap2));
1123   PetscCall(PetscFree(aij->Bjmap2));
1124 
1125   PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf));
1126   PetscCall(PetscFree(aij->Cperm1));
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1131 {
1132   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1133 
1134   PetscFunctionBegin;
1135 #if defined(PETSC_USE_LOG)
1136   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1137 #endif
1138   PetscCall(MatStashDestroy_Private(&mat->stash));
1139   PetscCall(VecDestroy(&aij->diag));
1140   PetscCall(MatDestroy(&aij->A));
1141   PetscCall(MatDestroy(&aij->B));
1142 #if defined(PETSC_USE_CTABLE)
1143   PetscCall(PetscTableDestroy(&aij->colmap));
1144 #else
1145   PetscCall(PetscFree(aij->colmap));
1146 #endif
1147   PetscCall(PetscFree(aij->garray));
1148   PetscCall(VecDestroy(&aij->lvec));
1149   PetscCall(VecScatterDestroy(&aij->Mvctx));
1150   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
1151   PetscCall(PetscFree(aij->ld));
1152 
1153   /* Free COO */
1154   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
1155 
1156   PetscCall(PetscFree(mat->data));
1157 
1158   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1159   PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL));
1160 
1161   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
1162   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
1163   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
1164   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL));
1165   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL));
1166   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL));
1167   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL));
1168   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL));
1169   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL));
1170   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL));
1171 #if defined(PETSC_HAVE_CUDA)
1172   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL));
1173 #endif
1174 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1175   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL));
1176 #endif
1177   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
1178 #if defined(PETSC_HAVE_ELEMENTAL)
1179   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL));
1180 #endif
1181 #if defined(PETSC_HAVE_SCALAPACK)
1182   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL));
1183 #endif
1184 #if defined(PETSC_HAVE_HYPRE)
1185   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL));
1186   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL));
1187 #endif
1188   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1189   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL));
1190   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL));
1191   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL));
1192   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL));
1193   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL));
1194 #if defined(PETSC_HAVE_MKL_SPARSE)
1195   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL));
1196 #endif
1197   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL));
1198   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1199   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL));
1200   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL));
1201   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL));
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1206 {
1207   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1208   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1209   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1210   const PetscInt    *garray = aij->garray;
1211   const PetscScalar *aa,*ba;
1212   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1213   PetscInt          *rowlens;
1214   PetscInt          *colidxs;
1215   PetscScalar       *matvals;
1216 
1217   PetscFunctionBegin;
1218   PetscCall(PetscViewerSetUp(viewer));
1219 
1220   M  = mat->rmap->N;
1221   N  = mat->cmap->N;
1222   m  = mat->rmap->n;
1223   rs = mat->rmap->rstart;
1224   cs = mat->cmap->rstart;
1225   nz = A->nz + B->nz;
1226 
1227   /* write matrix header */
1228   header[0] = MAT_FILE_CLASSID;
1229   header[1] = M; header[2] = N; header[3] = nz;
1230   PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat)));
1231   PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT));
1232 
1233   /* fill in and store row lengths  */
1234   PetscCall(PetscMalloc1(m,&rowlens));
1235   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1236   PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT));
1237   PetscCall(PetscFree(rowlens));
1238 
1239   /* fill in and store column indices */
1240   PetscCall(PetscMalloc1(nz,&colidxs));
1241   for (cnt=0, i=0; i<m; i++) {
1242     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1243       if (garray[B->j[jb]] > cs) break;
1244       colidxs[cnt++] = garray[B->j[jb]];
1245     }
1246     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1247       colidxs[cnt++] = A->j[ja] + cs;
1248     for (; jb<B->i[i+1]; jb++)
1249       colidxs[cnt++] = garray[B->j[jb]];
1250   }
1251   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1252   PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
1253   PetscCall(PetscFree(colidxs));
1254 
1255   /* fill in and store nonzero values */
1256   PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa));
1257   PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba));
1258   PetscCall(PetscMalloc1(nz,&matvals));
1259   for (cnt=0, i=0; i<m; i++) {
1260     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1261       if (garray[B->j[jb]] > cs) break;
1262       matvals[cnt++] = ba[jb];
1263     }
1264     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1265       matvals[cnt++] = aa[ja];
1266     for (; jb<B->i[i+1]; jb++)
1267       matvals[cnt++] = ba[jb];
1268   }
1269   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa));
1270   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba));
1271   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1272   PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
1273   PetscCall(PetscFree(matvals));
1274 
1275   /* write block size option to the viewer's .info file */
1276   PetscCall(MatView_Binary_BlockSizes(mat,viewer));
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 #include <petscdraw.h>
1281 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1282 {
1283   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1284   PetscMPIInt       rank = aij->rank,size = aij->size;
1285   PetscBool         isdraw,iascii,isbinary;
1286   PetscViewer       sviewer;
1287   PetscViewerFormat format;
1288 
1289   PetscFunctionBegin;
1290   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1291   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1292   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1293   if (iascii) {
1294     PetscCall(PetscViewerGetFormat(viewer,&format));
1295     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1296       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1297       PetscCall(PetscMalloc1(size,&nz));
1298       PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat)));
1299       for (i=0; i<(PetscInt)size; i++) {
1300         nmax = PetscMax(nmax,nz[i]);
1301         nmin = PetscMin(nmin,nz[i]);
1302         navg += nz[i];
1303       }
1304       PetscCall(PetscFree(nz));
1305       navg = navg/size;
1306       PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n",nmin,navg,nmax));
1307       PetscFunctionReturn(0);
1308     }
1309     PetscCall(PetscViewerGetFormat(viewer,&format));
1310     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1311       MatInfo   info;
1312       PetscInt *inodes=NULL;
1313 
1314       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
1315       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
1316       PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL));
1317       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1318       if (!inodes) {
1319         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n",
1320                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1321       } else {
1322         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n",
1323                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1324       }
1325       PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info));
1326       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1327       PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info));
1328       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1329       PetscCall(PetscViewerFlush(viewer));
1330       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1331       PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
1332       PetscCall(VecScatterView(aij->Mvctx,viewer));
1333       PetscFunctionReturn(0);
1334     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1335       PetscInt inodecount,inodelimit,*inodes;
1336       PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit));
1337       if (inodes) {
1338         PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit));
1339       } else {
1340         PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n"));
1341       }
1342       PetscFunctionReturn(0);
1343     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1344       PetscFunctionReturn(0);
1345     }
1346   } else if (isbinary) {
1347     if (size == 1) {
1348       PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1349       PetscCall(MatView(aij->A,viewer));
1350     } else {
1351       PetscCall(MatView_MPIAIJ_Binary(mat,viewer));
1352     }
1353     PetscFunctionReturn(0);
1354   } else if (iascii && size == 1) {
1355     PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1356     PetscCall(MatView(aij->A,viewer));
1357     PetscFunctionReturn(0);
1358   } else if (isdraw) {
1359     PetscDraw draw;
1360     PetscBool isnull;
1361     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
1362     PetscCall(PetscDrawIsNull(draw,&isnull));
1363     if (isnull) PetscFunctionReturn(0);
1364   }
1365 
1366   { /* assemble the entire matrix onto first processor */
1367     Mat A = NULL, Av;
1368     IS  isrow,iscol;
1369 
1370     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1371     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1372     PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A));
1373     PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL));
1374 /*  The commented code uses MatCreateSubMatrices instead */
1375 /*
1376     Mat *AA, A = NULL, Av;
1377     IS  isrow,iscol;
1378 
1379     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1380     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1381     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1382     if (rank == 0) {
1383        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1384        A    = AA[0];
1385        Av   = AA[0];
1386     }
1387     PetscCall(MatDestroySubMatrices(1,&AA));
1388 */
1389     PetscCall(ISDestroy(&iscol));
1390     PetscCall(ISDestroy(&isrow));
1391     /*
1392        Everyone has to call to draw the matrix since the graphics waits are
1393        synchronized across all processors that share the PetscDraw object
1394     */
1395     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1396     if (rank == 0) {
1397       if (((PetscObject)mat)->name) {
1398         PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name));
1399       }
1400       PetscCall(MatView_SeqAIJ(Av,sviewer));
1401     }
1402     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1403     PetscCall(PetscViewerFlush(viewer));
1404     PetscCall(MatDestroy(&A));
1405   }
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1410 {
1411   PetscBool      iascii,isdraw,issocket,isbinary;
1412 
1413   PetscFunctionBegin;
1414   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1415   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1416   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1417   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
1418   if (iascii || isdraw || isbinary || issocket) {
1419     PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer));
1420   }
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1425 {
1426   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1427   Vec            bb1 = NULL;
1428   PetscBool      hasop;
1429 
1430   PetscFunctionBegin;
1431   if (flag == SOR_APPLY_UPPER) {
1432     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1433     PetscFunctionReturn(0);
1434   }
1435 
1436   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1437     PetscCall(VecDuplicate(bb,&bb1));
1438   }
1439 
1440   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1441     if (flag & SOR_ZERO_INITIAL_GUESS) {
1442       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1443       its--;
1444     }
1445 
1446     while (its--) {
1447       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1448       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1449 
1450       /* update rhs: bb1 = bb - B*x */
1451       PetscCall(VecScale(mat->lvec,-1.0));
1452       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1453 
1454       /* local sweep */
1455       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx));
1456     }
1457   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1458     if (flag & SOR_ZERO_INITIAL_GUESS) {
1459       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1460       its--;
1461     }
1462     while (its--) {
1463       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1464       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1465 
1466       /* update rhs: bb1 = bb - B*x */
1467       PetscCall(VecScale(mat->lvec,-1.0));
1468       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1469 
1470       /* local sweep */
1471       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx));
1472     }
1473   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1474     if (flag & SOR_ZERO_INITIAL_GUESS) {
1475       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1476       its--;
1477     }
1478     while (its--) {
1479       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1480       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1481 
1482       /* update rhs: bb1 = bb - B*x */
1483       PetscCall(VecScale(mat->lvec,-1.0));
1484       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1485 
1486       /* local sweep */
1487       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx));
1488     }
1489   } else if (flag & SOR_EISENSTAT) {
1490     Vec xx1;
1491 
1492     PetscCall(VecDuplicate(bb,&xx1));
1493     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx));
1494 
1495     PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1496     PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1497     if (!mat->diag) {
1498       PetscCall(MatCreateVecs(matin,&mat->diag,NULL));
1499       PetscCall(MatGetDiagonal(matin,mat->diag));
1500     }
1501     PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop));
1502     if (hasop) {
1503       PetscCall(MatMultDiagonalBlock(matin,xx,bb1));
1504     } else {
1505       PetscCall(VecPointwiseMult(bb1,mat->diag,xx));
1506     }
1507     PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb));
1508 
1509     PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1));
1510 
1511     /* local sweep */
1512     PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1));
1513     PetscCall(VecAXPY(xx,1.0,xx1));
1514     PetscCall(VecDestroy(&xx1));
1515   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1516 
1517   PetscCall(VecDestroy(&bb1));
1518 
1519   matin->factorerrortype = mat->A->factorerrortype;
1520   PetscFunctionReturn(0);
1521 }
1522 
1523 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1524 {
1525   Mat            aA,aB,Aperm;
1526   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1527   PetscScalar    *aa,*ba;
1528   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1529   PetscSF        rowsf,sf;
1530   IS             parcolp = NULL;
1531   PetscBool      done;
1532 
1533   PetscFunctionBegin;
1534   PetscCall(MatGetLocalSize(A,&m,&n));
1535   PetscCall(ISGetIndices(rowp,&rwant));
1536   PetscCall(ISGetIndices(colp,&cwant));
1537   PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest));
1538 
1539   /* Invert row permutation to find out where my rows should go */
1540   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf));
1541   PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant));
1542   PetscCall(PetscSFSetFromOptions(rowsf));
1543   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1544   PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1545   PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1546 
1547   /* Invert column permutation to find out where my columns should go */
1548   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1549   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant));
1550   PetscCall(PetscSFSetFromOptions(sf));
1551   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1552   PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1553   PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1554   PetscCall(PetscSFDestroy(&sf));
1555 
1556   PetscCall(ISRestoreIndices(rowp,&rwant));
1557   PetscCall(ISRestoreIndices(colp,&cwant));
1558   PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols));
1559 
1560   /* Find out where my gcols should go */
1561   PetscCall(MatGetSize(aB,NULL,&ng));
1562   PetscCall(PetscMalloc1(ng,&gcdest));
1563   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1564   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols));
1565   PetscCall(PetscSFSetFromOptions(sf));
1566   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1567   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1568   PetscCall(PetscSFDestroy(&sf));
1569 
1570   PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz));
1571   PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1572   PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1573   for (i=0; i<m; i++) {
1574     PetscInt    row = rdest[i];
1575     PetscMPIInt rowner;
1576     PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner));
1577     for (j=ai[i]; j<ai[i+1]; j++) {
1578       PetscInt    col = cdest[aj[j]];
1579       PetscMPIInt cowner;
1580       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */
1581       if (rowner == cowner) dnnz[i]++;
1582       else onnz[i]++;
1583     }
1584     for (j=bi[i]; j<bi[i+1]; j++) {
1585       PetscInt    col = gcdest[bj[j]];
1586       PetscMPIInt cowner;
1587       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner));
1588       if (rowner == cowner) dnnz[i]++;
1589       else onnz[i]++;
1590     }
1591   }
1592   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1593   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1594   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1595   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1596   PetscCall(PetscSFDestroy(&rowsf));
1597 
1598   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm));
1599   PetscCall(MatSeqAIJGetArray(aA,&aa));
1600   PetscCall(MatSeqAIJGetArray(aB,&ba));
1601   for (i=0; i<m; i++) {
1602     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1603     PetscInt j0,rowlen;
1604     rowlen = ai[i+1] - ai[i];
1605     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1606       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1607       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES));
1608     }
1609     rowlen = bi[i+1] - bi[i];
1610     for (j0=j=0; j<rowlen; j0=j) {
1611       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1612       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES));
1613     }
1614   }
1615   PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY));
1616   PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY));
1617   PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1618   PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1619   PetscCall(MatSeqAIJRestoreArray(aA,&aa));
1620   PetscCall(MatSeqAIJRestoreArray(aB,&ba));
1621   PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz));
1622   PetscCall(PetscFree3(work,rdest,cdest));
1623   PetscCall(PetscFree(gcdest));
1624   if (parcolp) PetscCall(ISDestroy(&colp));
1625   *B = Aperm;
1626   PetscFunctionReturn(0);
1627 }
1628 
1629 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1630 {
1631   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1632 
1633   PetscFunctionBegin;
1634   PetscCall(MatGetSize(aij->B,NULL,nghosts));
1635   if (ghosts) *ghosts = aij->garray;
1636   PetscFunctionReturn(0);
1637 }
1638 
1639 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1640 {
1641   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1642   Mat            A    = mat->A,B = mat->B;
1643   PetscLogDouble isend[5],irecv[5];
1644 
1645   PetscFunctionBegin;
1646   info->block_size = 1.0;
1647   PetscCall(MatGetInfo(A,MAT_LOCAL,info));
1648 
1649   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1650   isend[3] = info->memory;  isend[4] = info->mallocs;
1651 
1652   PetscCall(MatGetInfo(B,MAT_LOCAL,info));
1653 
1654   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1655   isend[3] += info->memory;  isend[4] += info->mallocs;
1656   if (flag == MAT_LOCAL) {
1657     info->nz_used      = isend[0];
1658     info->nz_allocated = isend[1];
1659     info->nz_unneeded  = isend[2];
1660     info->memory       = isend[3];
1661     info->mallocs      = isend[4];
1662   } else if (flag == MAT_GLOBAL_MAX) {
1663     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
1664 
1665     info->nz_used      = irecv[0];
1666     info->nz_allocated = irecv[1];
1667     info->nz_unneeded  = irecv[2];
1668     info->memory       = irecv[3];
1669     info->mallocs      = irecv[4];
1670   } else if (flag == MAT_GLOBAL_SUM) {
1671     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
1672 
1673     info->nz_used      = irecv[0];
1674     info->nz_allocated = irecv[1];
1675     info->nz_unneeded  = irecv[2];
1676     info->memory       = irecv[3];
1677     info->mallocs      = irecv[4];
1678   }
1679   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1680   info->fill_ratio_needed = 0;
1681   info->factor_mallocs    = 0;
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1686 {
1687   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1688 
1689   PetscFunctionBegin;
1690   switch (op) {
1691   case MAT_NEW_NONZERO_LOCATIONS:
1692   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1693   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1694   case MAT_KEEP_NONZERO_PATTERN:
1695   case MAT_NEW_NONZERO_LOCATION_ERR:
1696   case MAT_USE_INODES:
1697   case MAT_IGNORE_ZERO_ENTRIES:
1698   case MAT_FORM_EXPLICIT_TRANSPOSE:
1699     MatCheckPreallocated(A,1);
1700     PetscCall(MatSetOption(a->A,op,flg));
1701     PetscCall(MatSetOption(a->B,op,flg));
1702     break;
1703   case MAT_ROW_ORIENTED:
1704     MatCheckPreallocated(A,1);
1705     a->roworiented = flg;
1706 
1707     PetscCall(MatSetOption(a->A,op,flg));
1708     PetscCall(MatSetOption(a->B,op,flg));
1709     break;
1710   case MAT_FORCE_DIAGONAL_ENTRIES:
1711   case MAT_SORTED_FULL:
1712     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1713     break;
1714   case MAT_IGNORE_OFF_PROC_ENTRIES:
1715     a->donotstash = flg;
1716     break;
1717   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1718   case MAT_SPD:
1719   case MAT_SYMMETRIC:
1720   case MAT_STRUCTURALLY_SYMMETRIC:
1721   case MAT_HERMITIAN:
1722   case MAT_SYMMETRY_ETERNAL:
1723     break;
1724   case MAT_SUBMAT_SINGLEIS:
1725     A->submat_singleis = flg;
1726     break;
1727   case MAT_STRUCTURE_ONLY:
1728     /* The option is handled directly by MatSetOption() */
1729     break;
1730   default:
1731     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1732   }
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1737 {
1738   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1739   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1740   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1741   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1742   PetscInt       *cmap,*idx_p;
1743 
1744   PetscFunctionBegin;
1745   PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1746   mat->getrowactive = PETSC_TRUE;
1747 
1748   if (!mat->rowvalues && (idx || v)) {
1749     /*
1750         allocate enough space to hold information from the longest row.
1751     */
1752     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1753     PetscInt   max = 1,tmp;
1754     for (i=0; i<matin->rmap->n; i++) {
1755       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1756       if (max < tmp) max = tmp;
1757     }
1758     PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices));
1759   }
1760 
1761   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1762   lrow = row - rstart;
1763 
1764   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1765   if (!v)   {pvA = NULL; pvB = NULL;}
1766   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1767   PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
1768   PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1769   nztot = nzA + nzB;
1770 
1771   cmap = mat->garray;
1772   if (v  || idx) {
1773     if (nztot) {
1774       /* Sort by increasing column numbers, assuming A and B already sorted */
1775       PetscInt imark = -1;
1776       if (v) {
1777         *v = v_p = mat->rowvalues;
1778         for (i=0; i<nzB; i++) {
1779           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1780           else break;
1781         }
1782         imark = i;
1783         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1784         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1785       }
1786       if (idx) {
1787         *idx = idx_p = mat->rowindices;
1788         if (imark > -1) {
1789           for (i=0; i<imark; i++) {
1790             idx_p[i] = cmap[cworkB[i]];
1791           }
1792         } else {
1793           for (i=0; i<nzB; i++) {
1794             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1795             else break;
1796           }
1797           imark = i;
1798         }
1799         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1800         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1801       }
1802     } else {
1803       if (idx) *idx = NULL;
1804       if (v)   *v   = NULL;
1805     }
1806   }
1807   *nz  = nztot;
1808   PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
1809   PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1814 {
1815   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1816 
1817   PetscFunctionBegin;
1818   PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1819   aij->getrowactive = PETSC_FALSE;
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1824 {
1825   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ*)mat->data;
1826   Mat_SeqAIJ      *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1827   PetscInt        i,j,cstart = mat->cmap->rstart;
1828   PetscReal       sum = 0.0;
1829   const MatScalar *v,*amata,*bmata;
1830 
1831   PetscFunctionBegin;
1832   if (aij->size == 1) {
1833     PetscCall(MatNorm(aij->A,type,norm));
1834   } else {
1835     PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata));
1836     PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata));
1837     if (type == NORM_FROBENIUS) {
1838       v = amata;
1839       for (i=0; i<amat->nz; i++) {
1840         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1841       }
1842       v = bmata;
1843       for (i=0; i<bmat->nz; i++) {
1844         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1845       }
1846       PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1847       *norm = PetscSqrtReal(*norm);
1848       PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz));
1849     } else if (type == NORM_1) { /* max column norm */
1850       PetscReal *tmp,*tmp2;
1851       PetscInt  *jj,*garray = aij->garray;
1852       PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp));
1853       PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2));
1854       *norm = 0.0;
1855       v     = amata; jj = amat->j;
1856       for (j=0; j<amat->nz; j++) {
1857         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1858       }
1859       v = bmata; jj = bmat->j;
1860       for (j=0; j<bmat->nz; j++) {
1861         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1862       }
1863       PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1864       for (j=0; j<mat->cmap->N; j++) {
1865         if (tmp2[j] > *norm) *norm = tmp2[j];
1866       }
1867       PetscCall(PetscFree(tmp));
1868       PetscCall(PetscFree(tmp2));
1869       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1870     } else if (type == NORM_INFINITY) { /* max row norm */
1871       PetscReal ntemp = 0.0;
1872       for (j=0; j<aij->A->rmap->n; j++) {
1873         v   = amata + amat->i[j];
1874         sum = 0.0;
1875         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1876           sum += PetscAbsScalar(*v); v++;
1877         }
1878         v = bmata + bmat->i[j];
1879         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1880           sum += PetscAbsScalar(*v); v++;
1881         }
1882         if (sum > ntemp) ntemp = sum;
1883       }
1884       PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat)));
1885       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1886     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1887     PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata));
1888     PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata));
1889   }
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1894 {
1895   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1896   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1897   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1898   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1899   Mat             B,A_diag,*B_diag;
1900   const MatScalar *pbv,*bv;
1901 
1902   PetscFunctionBegin;
1903   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1904   ai = Aloc->i; aj = Aloc->j;
1905   bi = Bloc->i; bj = Bloc->j;
1906   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1907     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1908     PetscSFNode          *oloc;
1909     PETSC_UNUSED PetscSF sf;
1910 
1911     PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc));
1912     /* compute d_nnz for preallocation */
1913     PetscCall(PetscArrayzero(d_nnz,na));
1914     for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++;
1915     /* compute local off-diagonal contributions */
1916     PetscCall(PetscArrayzero(g_nnz,nb));
1917     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1918     /* map those to global */
1919     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1920     PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray));
1921     PetscCall(PetscSFSetFromOptions(sf));
1922     PetscCall(PetscArrayzero(o_nnz,na));
1923     PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1924     PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1925     PetscCall(PetscSFDestroy(&sf));
1926 
1927     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
1928     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
1929     PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs)));
1930     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
1931     PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
1932     PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc));
1933   } else {
1934     B    = *matout;
1935     PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
1936   }
1937 
1938   b           = (Mat_MPIAIJ*)B->data;
1939   A_diag      = a->A;
1940   B_diag      = &b->A;
1941   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1942   A_diag_ncol = A_diag->cmap->N;
1943   B_diag_ilen = sub_B_diag->ilen;
1944   B_diag_i    = sub_B_diag->i;
1945 
1946   /* Set ilen for diagonal of B */
1947   for (i=0; i<A_diag_ncol; i++) {
1948     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1949   }
1950 
1951   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1952   very quickly (=without using MatSetValues), because all writes are local. */
1953   PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag));
1954 
1955   /* copy over the B part */
1956   PetscCall(PetscMalloc1(bi[mb],&cols));
1957   PetscCall(MatSeqAIJGetArrayRead(a->B,&bv));
1958   pbv  = bv;
1959   row  = A->rmap->rstart;
1960   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1961   cols_tmp = cols;
1962   for (i=0; i<mb; i++) {
1963     ncol = bi[i+1]-bi[i];
1964     PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES));
1965     row++;
1966     pbv += ncol; cols_tmp += ncol;
1967   }
1968   PetscCall(PetscFree(cols));
1969   PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv));
1970 
1971   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
1972   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
1973   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1974     *matout = B;
1975   } else {
1976     PetscCall(MatHeaderMerge(A,&B));
1977   }
1978   PetscFunctionReturn(0);
1979 }
1980 
1981 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1982 {
1983   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1984   Mat            a    = aij->A,b = aij->B;
1985   PetscInt       s1,s2,s3;
1986 
1987   PetscFunctionBegin;
1988   PetscCall(MatGetLocalSize(mat,&s2,&s3));
1989   if (rr) {
1990     PetscCall(VecGetLocalSize(rr,&s1));
1991     PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1992     /* Overlap communication with computation. */
1993     PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1994   }
1995   if (ll) {
1996     PetscCall(VecGetLocalSize(ll,&s1));
1997     PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1998     PetscCall((*b->ops->diagonalscale)(b,ll,NULL));
1999   }
2000   /* scale  the diagonal block */
2001   PetscCall((*a->ops->diagonalscale)(a,ll,rr));
2002 
2003   if (rr) {
2004     /* Do a scatter end and then right scale the off-diagonal block */
2005     PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
2006     PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec));
2007   }
2008   PetscFunctionReturn(0);
2009 }
2010 
2011 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2012 {
2013   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2014 
2015   PetscFunctionBegin;
2016   PetscCall(MatSetUnfactored(a->A));
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2021 {
2022   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2023   Mat            a,b,c,d;
2024   PetscBool      flg;
2025 
2026   PetscFunctionBegin;
2027   a = matA->A; b = matA->B;
2028   c = matB->A; d = matB->B;
2029 
2030   PetscCall(MatEqual(a,c,&flg));
2031   if (flg) {
2032     PetscCall(MatEqual(b,d,&flg));
2033   }
2034   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2039 {
2040   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2041   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2042 
2043   PetscFunctionBegin;
2044   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2045   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2046     /* because of the column compression in the off-processor part of the matrix a->B,
2047        the number of columns in a->B and b->B may be different, hence we cannot call
2048        the MatCopy() directly on the two parts. If need be, we can provide a more
2049        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2050        then copying the submatrices */
2051     PetscCall(MatCopy_Basic(A,B,str));
2052   } else {
2053     PetscCall(MatCopy(a->A,b->A,str));
2054     PetscCall(MatCopy(a->B,b->B,str));
2055   }
2056   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2057   PetscFunctionReturn(0);
2058 }
2059 
2060 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2061 {
2062   PetscFunctionBegin;
2063   PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
2064   PetscFunctionReturn(0);
2065 }
2066 
2067 /*
2068    Computes the number of nonzeros per row needed for preallocation when X and Y
2069    have different nonzero structure.
2070 */
2071 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2072 {
2073   PetscInt       i,j,k,nzx,nzy;
2074 
2075   PetscFunctionBegin;
2076   /* Set the number of nonzeros in the new matrix */
2077   for (i=0; i<m; i++) {
2078     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2079     nzx = xi[i+1] - xi[i];
2080     nzy = yi[i+1] - yi[i];
2081     nnz[i] = 0;
2082     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2083       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2084       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2085       nnz[i]++;
2086     }
2087     for (; k<nzy; k++) nnz[i]++;
2088   }
2089   PetscFunctionReturn(0);
2090 }
2091 
2092 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2093 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2094 {
2095   PetscInt       m = Y->rmap->N;
2096   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2097   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2098 
2099   PetscFunctionBegin;
2100   PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz));
2101   PetscFunctionReturn(0);
2102 }
2103 
2104 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2105 {
2106   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2107 
2108   PetscFunctionBegin;
2109   if (str == SAME_NONZERO_PATTERN) {
2110     PetscCall(MatAXPY(yy->A,a,xx->A,str));
2111     PetscCall(MatAXPY(yy->B,a,xx->B,str));
2112   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2113     PetscCall(MatAXPY_Basic(Y,a,X,str));
2114   } else {
2115     Mat      B;
2116     PetscInt *nnz_d,*nnz_o;
2117 
2118     PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d));
2119     PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o));
2120     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
2121     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
2122     PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap));
2123     PetscCall(MatSetType(B,((PetscObject)Y)->type_name));
2124     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d));
2125     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
2126     PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o));
2127     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
2128     PetscCall(MatHeaderMerge(Y,&B));
2129     PetscCall(PetscFree(nnz_d));
2130     PetscCall(PetscFree(nnz_o));
2131   }
2132   PetscFunctionReturn(0);
2133 }
2134 
2135 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2136 
2137 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2138 {
2139   PetscFunctionBegin;
2140   if (PetscDefined(USE_COMPLEX)) {
2141     Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2142 
2143     PetscCall(MatConjugate_SeqAIJ(aij->A));
2144     PetscCall(MatConjugate_SeqAIJ(aij->B));
2145   }
2146   PetscFunctionReturn(0);
2147 }
2148 
2149 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2150 {
2151   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2152 
2153   PetscFunctionBegin;
2154   PetscCall(MatRealPart(a->A));
2155   PetscCall(MatRealPart(a->B));
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2160 {
2161   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2162 
2163   PetscFunctionBegin;
2164   PetscCall(MatImaginaryPart(a->A));
2165   PetscCall(MatImaginaryPart(a->B));
2166   PetscFunctionReturn(0);
2167 }
2168 
2169 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2170 {
2171   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2172   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2173   PetscScalar       *va,*vv;
2174   Vec               vB,vA;
2175   const PetscScalar *vb;
2176 
2177   PetscFunctionBegin;
2178   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA));
2179   PetscCall(MatGetRowMaxAbs(a->A,vA,idx));
2180 
2181   PetscCall(VecGetArrayWrite(vA,&va));
2182   if (idx) {
2183     for (i=0; i<m; i++) {
2184       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2185     }
2186   }
2187 
2188   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB));
2189   PetscCall(PetscMalloc1(m,&idxb));
2190   PetscCall(MatGetRowMaxAbs(a->B,vB,idxb));
2191 
2192   PetscCall(VecGetArrayWrite(v,&vv));
2193   PetscCall(VecGetArrayRead(vB,&vb));
2194   for (i=0; i<m; i++) {
2195     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2196       vv[i] = vb[i];
2197       if (idx) idx[i] = a->garray[idxb[i]];
2198     } else {
2199       vv[i] = va[i];
2200       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2201         idx[i] = a->garray[idxb[i]];
2202     }
2203   }
2204   PetscCall(VecRestoreArrayWrite(vA,&vv));
2205   PetscCall(VecRestoreArrayWrite(vA,&va));
2206   PetscCall(VecRestoreArrayRead(vB,&vb));
2207   PetscCall(PetscFree(idxb));
2208   PetscCall(VecDestroy(&vA));
2209   PetscCall(VecDestroy(&vB));
2210   PetscFunctionReturn(0);
2211 }
2212 
2213 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2214 {
2215   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2216   PetscInt          m = A->rmap->n,n = A->cmap->n;
2217   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2218   PetscInt          *cmap  = mat->garray;
2219   PetscInt          *diagIdx, *offdiagIdx;
2220   Vec               diagV, offdiagV;
2221   PetscScalar       *a, *diagA, *offdiagA;
2222   const PetscScalar *ba,*bav;
2223   PetscInt          r,j,col,ncols,*bi,*bj;
2224   Mat               B = mat->B;
2225   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2226 
2227   PetscFunctionBegin;
2228   /* When a process holds entire A and other processes have no entry */
2229   if (A->cmap->N == n) {
2230     PetscCall(VecGetArrayWrite(v,&diagA));
2231     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2232     PetscCall(MatGetRowMinAbs(mat->A,diagV,idx));
2233     PetscCall(VecDestroy(&diagV));
2234     PetscCall(VecRestoreArrayWrite(v,&diagA));
2235     PetscFunctionReturn(0);
2236   } else if (n == 0) {
2237     if (m) {
2238       PetscCall(VecGetArrayWrite(v,&a));
2239       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2240       PetscCall(VecRestoreArrayWrite(v,&a));
2241     }
2242     PetscFunctionReturn(0);
2243   }
2244 
2245   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2246   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2247   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2248   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2249 
2250   /* Get offdiagIdx[] for implicit 0.0 */
2251   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2252   ba   = bav;
2253   bi   = b->i;
2254   bj   = b->j;
2255   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2256   for (r = 0; r < m; r++) {
2257     ncols = bi[r+1] - bi[r];
2258     if (ncols == A->cmap->N - n) { /* Brow is dense */
2259       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2260     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2261       offdiagA[r] = 0.0;
2262 
2263       /* Find first hole in the cmap */
2264       for (j=0; j<ncols; j++) {
2265         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2266         if (col > j && j < cstart) {
2267           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2268           break;
2269         } else if (col > j + n && j >= cstart) {
2270           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2271           break;
2272         }
2273       }
2274       if (j == ncols && ncols < A->cmap->N - n) {
2275         /* a hole is outside compressed Bcols */
2276         if (ncols == 0) {
2277           if (cstart) {
2278             offdiagIdx[r] = 0;
2279           } else offdiagIdx[r] = cend;
2280         } else { /* ncols > 0 */
2281           offdiagIdx[r] = cmap[ncols-1] + 1;
2282           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2283         }
2284       }
2285     }
2286 
2287     for (j=0; j<ncols; j++) {
2288       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2289       ba++; bj++;
2290     }
2291   }
2292 
2293   PetscCall(VecGetArrayWrite(v, &a));
2294   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2295   for (r = 0; r < m; ++r) {
2296     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2297       a[r]   = diagA[r];
2298       if (idx) idx[r] = cstart + diagIdx[r];
2299     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2300       a[r] = diagA[r];
2301       if (idx) {
2302         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2303           idx[r] = cstart + diagIdx[r];
2304         } else idx[r] = offdiagIdx[r];
2305       }
2306     } else {
2307       a[r]   = offdiagA[r];
2308       if (idx) idx[r] = offdiagIdx[r];
2309     }
2310   }
2311   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2312   PetscCall(VecRestoreArrayWrite(v, &a));
2313   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2314   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2315   PetscCall(VecDestroy(&diagV));
2316   PetscCall(VecDestroy(&offdiagV));
2317   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2322 {
2323   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2324   PetscInt          m = A->rmap->n,n = A->cmap->n;
2325   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2326   PetscInt          *cmap  = mat->garray;
2327   PetscInt          *diagIdx, *offdiagIdx;
2328   Vec               diagV, offdiagV;
2329   PetscScalar       *a, *diagA, *offdiagA;
2330   const PetscScalar *ba,*bav;
2331   PetscInt          r,j,col,ncols,*bi,*bj;
2332   Mat               B = mat->B;
2333   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2334 
2335   PetscFunctionBegin;
2336   /* When a process holds entire A and other processes have no entry */
2337   if (A->cmap->N == n) {
2338     PetscCall(VecGetArrayWrite(v,&diagA));
2339     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2340     PetscCall(MatGetRowMin(mat->A,diagV,idx));
2341     PetscCall(VecDestroy(&diagV));
2342     PetscCall(VecRestoreArrayWrite(v,&diagA));
2343     PetscFunctionReturn(0);
2344   } else if (n == 0) {
2345     if (m) {
2346       PetscCall(VecGetArrayWrite(v,&a));
2347       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2348       PetscCall(VecRestoreArrayWrite(v,&a));
2349     }
2350     PetscFunctionReturn(0);
2351   }
2352 
2353   PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx));
2354   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2355   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2356   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2357 
2358   /* Get offdiagIdx[] for implicit 0.0 */
2359   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2360   ba   = bav;
2361   bi   = b->i;
2362   bj   = b->j;
2363   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2364   for (r = 0; r < m; r++) {
2365     ncols = bi[r+1] - bi[r];
2366     if (ncols == A->cmap->N - n) { /* Brow is dense */
2367       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2368     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2369       offdiagA[r] = 0.0;
2370 
2371       /* Find first hole in the cmap */
2372       for (j=0; j<ncols; j++) {
2373         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2374         if (col > j && j < cstart) {
2375           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2376           break;
2377         } else if (col > j + n && j >= cstart) {
2378           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2379           break;
2380         }
2381       }
2382       if (j == ncols && ncols < A->cmap->N - n) {
2383         /* a hole is outside compressed Bcols */
2384         if (ncols == 0) {
2385           if (cstart) {
2386             offdiagIdx[r] = 0;
2387           } else offdiagIdx[r] = cend;
2388         } else { /* ncols > 0 */
2389           offdiagIdx[r] = cmap[ncols-1] + 1;
2390           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2391         }
2392       }
2393     }
2394 
2395     for (j=0; j<ncols; j++) {
2396       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2397       ba++; bj++;
2398     }
2399   }
2400 
2401   PetscCall(VecGetArrayWrite(v, &a));
2402   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2403   for (r = 0; r < m; ++r) {
2404     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2405       a[r]   = diagA[r];
2406       if (idx) idx[r] = cstart + diagIdx[r];
2407     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2408       a[r] = diagA[r];
2409       if (idx) {
2410         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2411           idx[r] = cstart + diagIdx[r];
2412         } else idx[r] = offdiagIdx[r];
2413       }
2414     } else {
2415       a[r]   = offdiagA[r];
2416       if (idx) idx[r] = offdiagIdx[r];
2417     }
2418   }
2419   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2420   PetscCall(VecRestoreArrayWrite(v, &a));
2421   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2422   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2423   PetscCall(VecDestroy(&diagV));
2424   PetscCall(VecDestroy(&offdiagV));
2425   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2426   PetscFunctionReturn(0);
2427 }
2428 
2429 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2430 {
2431   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2432   PetscInt          m = A->rmap->n,n = A->cmap->n;
2433   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2434   PetscInt          *cmap  = mat->garray;
2435   PetscInt          *diagIdx, *offdiagIdx;
2436   Vec               diagV, offdiagV;
2437   PetscScalar       *a, *diagA, *offdiagA;
2438   const PetscScalar *ba,*bav;
2439   PetscInt          r,j,col,ncols,*bi,*bj;
2440   Mat               B = mat->B;
2441   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2442 
2443   PetscFunctionBegin;
2444   /* When a process holds entire A and other processes have no entry */
2445   if (A->cmap->N == n) {
2446     PetscCall(VecGetArrayWrite(v,&diagA));
2447     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2448     PetscCall(MatGetRowMax(mat->A,diagV,idx));
2449     PetscCall(VecDestroy(&diagV));
2450     PetscCall(VecRestoreArrayWrite(v,&diagA));
2451     PetscFunctionReturn(0);
2452   } else if (n == 0) {
2453     if (m) {
2454       PetscCall(VecGetArrayWrite(v,&a));
2455       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2456       PetscCall(VecRestoreArrayWrite(v,&a));
2457     }
2458     PetscFunctionReturn(0);
2459   }
2460 
2461   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2462   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2463   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2464   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2465 
2466   /* Get offdiagIdx[] for implicit 0.0 */
2467   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2468   ba   = bav;
2469   bi   = b->i;
2470   bj   = b->j;
2471   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2472   for (r = 0; r < m; r++) {
2473     ncols = bi[r+1] - bi[r];
2474     if (ncols == A->cmap->N - n) { /* Brow is dense */
2475       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2476     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2477       offdiagA[r] = 0.0;
2478 
2479       /* Find first hole in the cmap */
2480       for (j=0; j<ncols; j++) {
2481         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2482         if (col > j && j < cstart) {
2483           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2484           break;
2485         } else if (col > j + n && j >= cstart) {
2486           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2487           break;
2488         }
2489       }
2490       if (j == ncols && ncols < A->cmap->N - n) {
2491         /* a hole is outside compressed Bcols */
2492         if (ncols == 0) {
2493           if (cstart) {
2494             offdiagIdx[r] = 0;
2495           } else offdiagIdx[r] = cend;
2496         } else { /* ncols > 0 */
2497           offdiagIdx[r] = cmap[ncols-1] + 1;
2498           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2499         }
2500       }
2501     }
2502 
2503     for (j=0; j<ncols; j++) {
2504       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2505       ba++; bj++;
2506     }
2507   }
2508 
2509   PetscCall(VecGetArrayWrite(v,    &a));
2510   PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA));
2511   for (r = 0; r < m; ++r) {
2512     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2513       a[r] = diagA[r];
2514       if (idx) idx[r] = cstart + diagIdx[r];
2515     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2516       a[r] = diagA[r];
2517       if (idx) {
2518         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2519           idx[r] = cstart + diagIdx[r];
2520         } else idx[r] = offdiagIdx[r];
2521       }
2522     } else {
2523       a[r] = offdiagA[r];
2524       if (idx) idx[r] = offdiagIdx[r];
2525     }
2526   }
2527   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2528   PetscCall(VecRestoreArrayWrite(v,       &a));
2529   PetscCall(VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA));
2530   PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA));
2531   PetscCall(VecDestroy(&diagV));
2532   PetscCall(VecDestroy(&offdiagV));
2533   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2534   PetscFunctionReturn(0);
2535 }
2536 
2537 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2538 {
2539   Mat            *dummy;
2540 
2541   PetscFunctionBegin;
2542   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy));
2543   *newmat = *dummy;
2544   PetscCall(PetscFree(dummy));
2545   PetscFunctionReturn(0);
2546 }
2547 
2548 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2549 {
2550   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2551 
2552   PetscFunctionBegin;
2553   PetscCall(MatInvertBlockDiagonal(a->A,values));
2554   A->factorerrortype = a->A->factorerrortype;
2555   PetscFunctionReturn(0);
2556 }
2557 
2558 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2559 {
2560   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2561 
2562   PetscFunctionBegin;
2563   PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2564   PetscCall(MatSetRandom(aij->A,rctx));
2565   if (x->assembled) {
2566     PetscCall(MatSetRandom(aij->B,rctx));
2567   } else {
2568     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx));
2569   }
2570   PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY));
2571   PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY));
2572   PetscFunctionReturn(0);
2573 }
2574 
2575 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2576 {
2577   PetscFunctionBegin;
2578   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2579   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2580   PetscFunctionReturn(0);
2581 }
2582 
2583 /*@
2584    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2585 
2586    Collective on Mat
2587 
2588    Input Parameters:
2589 +    A - the matrix
2590 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2591 
2592  Level: advanced
2593 
2594 @*/
2595 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2596 {
2597   PetscFunctionBegin;
2598   PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));
2599   PetscFunctionReturn(0);
2600 }
2601 
2602 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2603 {
2604   PetscBool            sc = PETSC_FALSE,flg;
2605 
2606   PetscFunctionBegin;
2607   PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options");
2608   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2609   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg));
2610   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc));
2611   PetscOptionsHeadEnd();
2612   PetscFunctionReturn(0);
2613 }
2614 
2615 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2616 {
2617   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2618   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2619 
2620   PetscFunctionBegin;
2621   if (!Y->preallocated) {
2622     PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL));
2623   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2624     PetscInt nonew = aij->nonew;
2625     PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL));
2626     aij->nonew = nonew;
2627   }
2628   PetscCall(MatShift_Basic(Y,a));
2629   PetscFunctionReturn(0);
2630 }
2631 
2632 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2633 {
2634   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2635 
2636   PetscFunctionBegin;
2637   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2638   PetscCall(MatMissingDiagonal(a->A,missing,d));
2639   if (d) {
2640     PetscInt rstart;
2641     PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
2642     *d += rstart;
2643 
2644   }
2645   PetscFunctionReturn(0);
2646 }
2647 
2648 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2649 {
2650   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2651 
2652   PetscFunctionBegin;
2653   PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag));
2654   PetscFunctionReturn(0);
2655 }
2656 
2657 /* -------------------------------------------------------------------*/
2658 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2659                                        MatGetRow_MPIAIJ,
2660                                        MatRestoreRow_MPIAIJ,
2661                                        MatMult_MPIAIJ,
2662                                 /* 4*/ MatMultAdd_MPIAIJ,
2663                                        MatMultTranspose_MPIAIJ,
2664                                        MatMultTransposeAdd_MPIAIJ,
2665                                        NULL,
2666                                        NULL,
2667                                        NULL,
2668                                 /*10*/ NULL,
2669                                        NULL,
2670                                        NULL,
2671                                        MatSOR_MPIAIJ,
2672                                        MatTranspose_MPIAIJ,
2673                                 /*15*/ MatGetInfo_MPIAIJ,
2674                                        MatEqual_MPIAIJ,
2675                                        MatGetDiagonal_MPIAIJ,
2676                                        MatDiagonalScale_MPIAIJ,
2677                                        MatNorm_MPIAIJ,
2678                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2679                                        MatAssemblyEnd_MPIAIJ,
2680                                        MatSetOption_MPIAIJ,
2681                                        MatZeroEntries_MPIAIJ,
2682                                 /*24*/ MatZeroRows_MPIAIJ,
2683                                        NULL,
2684                                        NULL,
2685                                        NULL,
2686                                        NULL,
2687                                 /*29*/ MatSetUp_MPIAIJ,
2688                                        NULL,
2689                                        NULL,
2690                                        MatGetDiagonalBlock_MPIAIJ,
2691                                        NULL,
2692                                 /*34*/ MatDuplicate_MPIAIJ,
2693                                        NULL,
2694                                        NULL,
2695                                        NULL,
2696                                        NULL,
2697                                 /*39*/ MatAXPY_MPIAIJ,
2698                                        MatCreateSubMatrices_MPIAIJ,
2699                                        MatIncreaseOverlap_MPIAIJ,
2700                                        MatGetValues_MPIAIJ,
2701                                        MatCopy_MPIAIJ,
2702                                 /*44*/ MatGetRowMax_MPIAIJ,
2703                                        MatScale_MPIAIJ,
2704                                        MatShift_MPIAIJ,
2705                                        MatDiagonalSet_MPIAIJ,
2706                                        MatZeroRowsColumns_MPIAIJ,
2707                                 /*49*/ MatSetRandom_MPIAIJ,
2708                                        MatGetRowIJ_MPIAIJ,
2709                                        MatRestoreRowIJ_MPIAIJ,
2710                                        NULL,
2711                                        NULL,
2712                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2713                                        NULL,
2714                                        MatSetUnfactored_MPIAIJ,
2715                                        MatPermute_MPIAIJ,
2716                                        NULL,
2717                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2718                                        MatDestroy_MPIAIJ,
2719                                        MatView_MPIAIJ,
2720                                        NULL,
2721                                        NULL,
2722                                 /*64*/ NULL,
2723                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2724                                        NULL,
2725                                        NULL,
2726                                        NULL,
2727                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2728                                        MatGetRowMinAbs_MPIAIJ,
2729                                        NULL,
2730                                        NULL,
2731                                        NULL,
2732                                        NULL,
2733                                 /*75*/ MatFDColoringApply_AIJ,
2734                                        MatSetFromOptions_MPIAIJ,
2735                                        NULL,
2736                                        NULL,
2737                                        MatFindZeroDiagonals_MPIAIJ,
2738                                 /*80*/ NULL,
2739                                        NULL,
2740                                        NULL,
2741                                 /*83*/ MatLoad_MPIAIJ,
2742                                        MatIsSymmetric_MPIAIJ,
2743                                        NULL,
2744                                        NULL,
2745                                        NULL,
2746                                        NULL,
2747                                 /*89*/ NULL,
2748                                        NULL,
2749                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2750                                        NULL,
2751                                        NULL,
2752                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2753                                        NULL,
2754                                        NULL,
2755                                        NULL,
2756                                        MatBindToCPU_MPIAIJ,
2757                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2758                                        NULL,
2759                                        NULL,
2760                                        MatConjugate_MPIAIJ,
2761                                        NULL,
2762                                 /*104*/MatSetValuesRow_MPIAIJ,
2763                                        MatRealPart_MPIAIJ,
2764                                        MatImaginaryPart_MPIAIJ,
2765                                        NULL,
2766                                        NULL,
2767                                 /*109*/NULL,
2768                                        NULL,
2769                                        MatGetRowMin_MPIAIJ,
2770                                        NULL,
2771                                        MatMissingDiagonal_MPIAIJ,
2772                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2773                                        NULL,
2774                                        MatGetGhosts_MPIAIJ,
2775                                        NULL,
2776                                        NULL,
2777                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2778                                        NULL,
2779                                        NULL,
2780                                        NULL,
2781                                        MatGetMultiProcBlock_MPIAIJ,
2782                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2783                                        MatGetColumnReductions_MPIAIJ,
2784                                        MatInvertBlockDiagonal_MPIAIJ,
2785                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2786                                        MatCreateSubMatricesMPI_MPIAIJ,
2787                                 /*129*/NULL,
2788                                        NULL,
2789                                        NULL,
2790                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2791                                        NULL,
2792                                 /*134*/NULL,
2793                                        NULL,
2794                                        NULL,
2795                                        NULL,
2796                                        NULL,
2797                                 /*139*/MatSetBlockSizes_MPIAIJ,
2798                                        NULL,
2799                                        NULL,
2800                                        MatFDColoringSetUp_MPIXAIJ,
2801                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2802                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2803                                 /*145*/NULL,
2804                                        NULL,
2805                                        NULL
2806 };
2807 
2808 /* ----------------------------------------------------------------------------------------*/
2809 
2810 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2811 {
2812   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2813 
2814   PetscFunctionBegin;
2815   PetscCall(MatStoreValues(aij->A));
2816   PetscCall(MatStoreValues(aij->B));
2817   PetscFunctionReturn(0);
2818 }
2819 
2820 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2821 {
2822   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2823 
2824   PetscFunctionBegin;
2825   PetscCall(MatRetrieveValues(aij->A));
2826   PetscCall(MatRetrieveValues(aij->B));
2827   PetscFunctionReturn(0);
2828 }
2829 
2830 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2831 {
2832   Mat_MPIAIJ     *b;
2833   PetscMPIInt    size;
2834 
2835   PetscFunctionBegin;
2836   PetscCall(PetscLayoutSetUp(B->rmap));
2837   PetscCall(PetscLayoutSetUp(B->cmap));
2838   b = (Mat_MPIAIJ*)B->data;
2839 
2840 #if defined(PETSC_USE_CTABLE)
2841   PetscCall(PetscTableDestroy(&b->colmap));
2842 #else
2843   PetscCall(PetscFree(b->colmap));
2844 #endif
2845   PetscCall(PetscFree(b->garray));
2846   PetscCall(VecDestroy(&b->lvec));
2847   PetscCall(VecScatterDestroy(&b->Mvctx));
2848 
2849   /* Because the B will have been resized we simply destroy it and create a new one each time */
2850   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
2851   PetscCall(MatDestroy(&b->B));
2852   PetscCall(MatCreate(PETSC_COMM_SELF,&b->B));
2853   PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
2854   PetscCall(MatSetBlockSizesFromMats(b->B,B,B));
2855   PetscCall(MatSetType(b->B,MATSEQAIJ));
2856   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
2857 
2858   if (!B->preallocated) {
2859     PetscCall(MatCreate(PETSC_COMM_SELF,&b->A));
2860     PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
2861     PetscCall(MatSetBlockSizesFromMats(b->A,B,B));
2862     PetscCall(MatSetType(b->A,MATSEQAIJ));
2863     PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
2864   }
2865 
2866   PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz));
2867   PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz));
2868   B->preallocated  = PETSC_TRUE;
2869   B->was_assembled = PETSC_FALSE;
2870   B->assembled     = PETSC_FALSE;
2871   PetscFunctionReturn(0);
2872 }
2873 
2874 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2875 {
2876   Mat_MPIAIJ     *b;
2877 
2878   PetscFunctionBegin;
2879   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2880   PetscCall(PetscLayoutSetUp(B->rmap));
2881   PetscCall(PetscLayoutSetUp(B->cmap));
2882   b = (Mat_MPIAIJ*)B->data;
2883 
2884 #if defined(PETSC_USE_CTABLE)
2885   PetscCall(PetscTableDestroy(&b->colmap));
2886 #else
2887   PetscCall(PetscFree(b->colmap));
2888 #endif
2889   PetscCall(PetscFree(b->garray));
2890   PetscCall(VecDestroy(&b->lvec));
2891   PetscCall(VecScatterDestroy(&b->Mvctx));
2892 
2893   PetscCall(MatResetPreallocation(b->A));
2894   PetscCall(MatResetPreallocation(b->B));
2895   B->preallocated  = PETSC_TRUE;
2896   B->was_assembled = PETSC_FALSE;
2897   B->assembled = PETSC_FALSE;
2898   PetscFunctionReturn(0);
2899 }
2900 
2901 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2902 {
2903   Mat            mat;
2904   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2905 
2906   PetscFunctionBegin;
2907   *newmat = NULL;
2908   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
2909   PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
2910   PetscCall(MatSetBlockSizesFromMats(mat,matin,matin));
2911   PetscCall(MatSetType(mat,((PetscObject)matin)->type_name));
2912   a       = (Mat_MPIAIJ*)mat->data;
2913 
2914   mat->factortype   = matin->factortype;
2915   mat->assembled    = matin->assembled;
2916   mat->insertmode   = NOT_SET_VALUES;
2917   mat->preallocated = matin->preallocated;
2918 
2919   a->size         = oldmat->size;
2920   a->rank         = oldmat->rank;
2921   a->donotstash   = oldmat->donotstash;
2922   a->roworiented  = oldmat->roworiented;
2923   a->rowindices   = NULL;
2924   a->rowvalues    = NULL;
2925   a->getrowactive = PETSC_FALSE;
2926 
2927   PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap));
2928   PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap));
2929 
2930   if (oldmat->colmap) {
2931 #if defined(PETSC_USE_CTABLE)
2932     PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
2933 #else
2934     PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap));
2935     PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt)));
2936     PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N));
2937 #endif
2938   } else a->colmap = NULL;
2939   if (oldmat->garray) {
2940     PetscInt len;
2941     len  = oldmat->B->cmap->n;
2942     PetscCall(PetscMalloc1(len+1,&a->garray));
2943     PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
2944     if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len));
2945   } else a->garray = NULL;
2946 
2947   /* It may happen MatDuplicate is called with a non-assembled matrix
2948      In fact, MatDuplicate only requires the matrix to be preallocated
2949      This may happen inside a DMCreateMatrix_Shell */
2950   if (oldmat->lvec) {
2951     PetscCall(VecDuplicate(oldmat->lvec,&a->lvec));
2952     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
2953   }
2954   if (oldmat->Mvctx) {
2955     PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
2956     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
2957   }
2958   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2959   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2960   PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B));
2961   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
2962   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
2963   *newmat = mat;
2964   PetscFunctionReturn(0);
2965 }
2966 
2967 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2968 {
2969   PetscBool      isbinary, ishdf5;
2970 
2971   PetscFunctionBegin;
2972   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2973   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2974   /* force binary viewer to load .info file if it has not yet done so */
2975   PetscCall(PetscViewerSetUp(viewer));
2976   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2977   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
2978   if (isbinary) {
2979     PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer));
2980   } else if (ishdf5) {
2981 #if defined(PETSC_HAVE_HDF5)
2982     PetscCall(MatLoad_AIJ_HDF5(newMat,viewer));
2983 #else
2984     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2985 #endif
2986   } else {
2987     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2988   }
2989   PetscFunctionReturn(0);
2990 }
2991 
2992 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
2993 {
2994   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
2995   PetscInt       *rowidxs,*colidxs;
2996   PetscScalar    *matvals;
2997 
2998   PetscFunctionBegin;
2999   PetscCall(PetscViewerSetUp(viewer));
3000 
3001   /* read in matrix header */
3002   PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT));
3003   PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3004   M  = header[1]; N = header[2]; nz = header[3];
3005   PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
3006   PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
3007   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3008 
3009   /* set block sizes from the viewer's .info file */
3010   PetscCall(MatLoad_Binary_BlockSizes(mat,viewer));
3011   /* set global sizes if not set already */
3012   if (mat->rmap->N < 0) mat->rmap->N = M;
3013   if (mat->cmap->N < 0) mat->cmap->N = N;
3014   PetscCall(PetscLayoutSetUp(mat->rmap));
3015   PetscCall(PetscLayoutSetUp(mat->cmap));
3016 
3017   /* check if the matrix sizes are correct */
3018   PetscCall(MatGetSize(mat,&rows,&cols));
3019   PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3020 
3021   /* read in row lengths and build row indices */
3022   PetscCall(MatGetLocalSize(mat,&m,NULL));
3023   PetscCall(PetscMalloc1(m+1,&rowidxs));
3024   PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT));
3025   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3026   PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer)));
3027   PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3028   /* read in column indices and matrix values */
3029   PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals));
3030   PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
3031   PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
3032   /* store matrix indices and values */
3033   PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals));
3034   PetscCall(PetscFree(rowidxs));
3035   PetscCall(PetscFree2(colidxs,matvals));
3036   PetscFunctionReturn(0);
3037 }
3038 
3039 /* Not scalable because of ISAllGather() unless getting all columns. */
3040 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3041 {
3042   IS             iscol_local;
3043   PetscBool      isstride;
3044   PetscMPIInt    lisstride=0,gisstride;
3045 
3046   PetscFunctionBegin;
3047   /* check if we are grabbing all columns*/
3048   PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride));
3049 
3050   if (isstride) {
3051     PetscInt  start,len,mstart,mlen;
3052     PetscCall(ISStrideGetInfo(iscol,&start,NULL));
3053     PetscCall(ISGetLocalSize(iscol,&len));
3054     PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen));
3055     if (mstart == start && mlen-mstart == len) lisstride = 1;
3056   }
3057 
3058   PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat)));
3059   if (gisstride) {
3060     PetscInt N;
3061     PetscCall(MatGetSize(mat,NULL,&N));
3062     PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local));
3063     PetscCall(ISSetIdentity(iscol_local));
3064     PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3065   } else {
3066     PetscInt cbs;
3067     PetscCall(ISGetBlockSize(iscol,&cbs));
3068     PetscCall(ISAllGather(iscol,&iscol_local));
3069     PetscCall(ISSetBlockSize(iscol_local,cbs));
3070   }
3071 
3072   *isseq = iscol_local;
3073   PetscFunctionReturn(0);
3074 }
3075 
3076 /*
3077  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3078  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3079 
3080  Input Parameters:
3081    mat - matrix
3082    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3083            i.e., mat->rstart <= isrow[i] < mat->rend
3084    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3085            i.e., mat->cstart <= iscol[i] < mat->cend
3086  Output Parameter:
3087    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3088    iscol_o - sequential column index set for retrieving mat->B
3089    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3090  */
3091 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3092 {
3093   Vec            x,cmap;
3094   const PetscInt *is_idx;
3095   PetscScalar    *xarray,*cmaparray;
3096   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3097   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3098   Mat            B=a->B;
3099   Vec            lvec=a->lvec,lcmap;
3100   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3101   MPI_Comm       comm;
3102   VecScatter     Mvctx=a->Mvctx;
3103 
3104   PetscFunctionBegin;
3105   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3106   PetscCall(ISGetLocalSize(iscol,&ncols));
3107 
3108   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3109   PetscCall(MatCreateVecs(mat,&x,NULL));
3110   PetscCall(VecSet(x,-1.0));
3111   PetscCall(VecDuplicate(x,&cmap));
3112   PetscCall(VecSet(cmap,-1.0));
3113 
3114   /* Get start indices */
3115   PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm));
3116   isstart -= ncols;
3117   PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend));
3118 
3119   PetscCall(ISGetIndices(iscol,&is_idx));
3120   PetscCall(VecGetArray(x,&xarray));
3121   PetscCall(VecGetArray(cmap,&cmaparray));
3122   PetscCall(PetscMalloc1(ncols,&idx));
3123   for (i=0; i<ncols; i++) {
3124     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3125     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3126     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3127   }
3128   PetscCall(VecRestoreArray(x,&xarray));
3129   PetscCall(VecRestoreArray(cmap,&cmaparray));
3130   PetscCall(ISRestoreIndices(iscol,&is_idx));
3131 
3132   /* Get iscol_d */
3133   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d));
3134   PetscCall(ISGetBlockSize(iscol,&i));
3135   PetscCall(ISSetBlockSize(*iscol_d,i));
3136 
3137   /* Get isrow_d */
3138   PetscCall(ISGetLocalSize(isrow,&m));
3139   rstart = mat->rmap->rstart;
3140   PetscCall(PetscMalloc1(m,&idx));
3141   PetscCall(ISGetIndices(isrow,&is_idx));
3142   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3143   PetscCall(ISRestoreIndices(isrow,&is_idx));
3144 
3145   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d));
3146   PetscCall(ISGetBlockSize(isrow,&i));
3147   PetscCall(ISSetBlockSize(*isrow_d,i));
3148 
3149   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3150   PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3151   PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3152 
3153   PetscCall(VecDuplicate(lvec,&lcmap));
3154 
3155   PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3156   PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3157 
3158   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3159   /* off-process column indices */
3160   count = 0;
3161   PetscCall(PetscMalloc1(Bn,&idx));
3162   PetscCall(PetscMalloc1(Bn,&cmap1));
3163 
3164   PetscCall(VecGetArray(lvec,&xarray));
3165   PetscCall(VecGetArray(lcmap,&cmaparray));
3166   for (i=0; i<Bn; i++) {
3167     if (PetscRealPart(xarray[i]) > -1.0) {
3168       idx[count]     = i;                   /* local column index in off-diagonal part B */
3169       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3170       count++;
3171     }
3172   }
3173   PetscCall(VecRestoreArray(lvec,&xarray));
3174   PetscCall(VecRestoreArray(lcmap,&cmaparray));
3175 
3176   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o));
3177   /* cannot ensure iscol_o has same blocksize as iscol! */
3178 
3179   PetscCall(PetscFree(idx));
3180   *garray = cmap1;
3181 
3182   PetscCall(VecDestroy(&x));
3183   PetscCall(VecDestroy(&cmap));
3184   PetscCall(VecDestroy(&lcmap));
3185   PetscFunctionReturn(0);
3186 }
3187 
3188 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3189 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3190 {
3191   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3192   Mat            M = NULL;
3193   MPI_Comm       comm;
3194   IS             iscol_d,isrow_d,iscol_o;
3195   Mat            Asub = NULL,Bsub = NULL;
3196   PetscInt       n;
3197 
3198   PetscFunctionBegin;
3199   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3200 
3201   if (call == MAT_REUSE_MATRIX) {
3202     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3203     PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d));
3204     PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3205 
3206     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d));
3207     PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3208 
3209     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o));
3210     PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3211 
3212     /* Update diagonal and off-diagonal portions of submat */
3213     asub = (Mat_MPIAIJ*)(*submat)->data;
3214     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A));
3215     PetscCall(ISGetLocalSize(iscol_o,&n));
3216     if (n) {
3217       PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B));
3218     }
3219     PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY));
3220     PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY));
3221 
3222   } else { /* call == MAT_INITIAL_MATRIX) */
3223     const PetscInt *garray;
3224     PetscInt        BsubN;
3225 
3226     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3227     PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray));
3228 
3229     /* Create local submatrices Asub and Bsub */
3230     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub));
3231     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub));
3232 
3233     /* Create submatrix M */
3234     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M));
3235 
3236     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3237     asub = (Mat_MPIAIJ*)M->data;
3238 
3239     PetscCall(ISGetLocalSize(iscol_o,&BsubN));
3240     n = asub->B->cmap->N;
3241     if (BsubN > n) {
3242       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3243       const PetscInt *idx;
3244       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3245       PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN));
3246 
3247       PetscCall(PetscMalloc1(n,&idx_new));
3248       j = 0;
3249       PetscCall(ISGetIndices(iscol_o,&idx));
3250       for (i=0; i<n; i++) {
3251         if (j >= BsubN) break;
3252         while (subgarray[i] > garray[j]) j++;
3253 
3254         if (subgarray[i] == garray[j]) {
3255           idx_new[i] = idx[j++];
3256         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]);
3257       }
3258       PetscCall(ISRestoreIndices(iscol_o,&idx));
3259 
3260       PetscCall(ISDestroy(&iscol_o));
3261       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o));
3262 
3263     } else if (BsubN < n) {
3264       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N);
3265     }
3266 
3267     PetscCall(PetscFree(garray));
3268     *submat = M;
3269 
3270     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3271     PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d));
3272     PetscCall(ISDestroy(&isrow_d));
3273 
3274     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d));
3275     PetscCall(ISDestroy(&iscol_d));
3276 
3277     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o));
3278     PetscCall(ISDestroy(&iscol_o));
3279   }
3280   PetscFunctionReturn(0);
3281 }
3282 
3283 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3284 {
3285   IS             iscol_local=NULL,isrow_d;
3286   PetscInt       csize;
3287   PetscInt       n,i,j,start,end;
3288   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3289   MPI_Comm       comm;
3290 
3291   PetscFunctionBegin;
3292   /* If isrow has same processor distribution as mat,
3293      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3294   if (call == MAT_REUSE_MATRIX) {
3295     PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d));
3296     if (isrow_d) {
3297       sameRowDist  = PETSC_TRUE;
3298       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3299     } else {
3300       PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local));
3301       if (iscol_local) {
3302         sameRowDist  = PETSC_TRUE;
3303         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3304       }
3305     }
3306   } else {
3307     /* Check if isrow has same processor distribution as mat */
3308     sameDist[0] = PETSC_FALSE;
3309     PetscCall(ISGetLocalSize(isrow,&n));
3310     if (!n) {
3311       sameDist[0] = PETSC_TRUE;
3312     } else {
3313       PetscCall(ISGetMinMax(isrow,&i,&j));
3314       PetscCall(MatGetOwnershipRange(mat,&start,&end));
3315       if (i >= start && j < end) {
3316         sameDist[0] = PETSC_TRUE;
3317       }
3318     }
3319 
3320     /* Check if iscol has same processor distribution as mat */
3321     sameDist[1] = PETSC_FALSE;
3322     PetscCall(ISGetLocalSize(iscol,&n));
3323     if (!n) {
3324       sameDist[1] = PETSC_TRUE;
3325     } else {
3326       PetscCall(ISGetMinMax(iscol,&i,&j));
3327       PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end));
3328       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3329     }
3330 
3331     PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3332     PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm));
3333     sameRowDist = tsameDist[0];
3334   }
3335 
3336   if (sameRowDist) {
3337     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3338       /* isrow and iscol have same processor distribution as mat */
3339       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat));
3340       PetscFunctionReturn(0);
3341     } else { /* sameRowDist */
3342       /* isrow has same processor distribution as mat */
3343       if (call == MAT_INITIAL_MATRIX) {
3344         PetscBool sorted;
3345         PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3346         PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */
3347         PetscCall(ISGetSize(iscol,&i));
3348         PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i);
3349 
3350         PetscCall(ISSorted(iscol_local,&sorted));
3351         if (sorted) {
3352           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3353           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat));
3354           PetscFunctionReturn(0);
3355         }
3356       } else { /* call == MAT_REUSE_MATRIX */
3357         IS iscol_sub;
3358         PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3359         if (iscol_sub) {
3360           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat));
3361           PetscFunctionReturn(0);
3362         }
3363       }
3364     }
3365   }
3366 
3367   /* General case: iscol -> iscol_local which has global size of iscol */
3368   if (call == MAT_REUSE_MATRIX) {
3369     PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
3370     PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3371   } else {
3372     if (!iscol_local) {
3373       PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3374     }
3375   }
3376 
3377   PetscCall(ISGetLocalSize(iscol,&csize));
3378   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat));
3379 
3380   if (call == MAT_INITIAL_MATRIX) {
3381     PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
3382     PetscCall(ISDestroy(&iscol_local));
3383   }
3384   PetscFunctionReturn(0);
3385 }
3386 
3387 /*@C
3388      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3389          and "off-diagonal" part of the matrix in CSR format.
3390 
3391    Collective
3392 
3393    Input Parameters:
3394 +  comm - MPI communicator
3395 .  A - "diagonal" portion of matrix
3396 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3397 -  garray - global index of B columns
3398 
3399    Output Parameter:
3400 .   mat - the matrix, with input A as its local diagonal matrix
3401    Level: advanced
3402 
3403    Notes:
3404        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3405        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3406 
3407 .seealso: `MatCreateMPIAIJWithSplitArrays()`
3408 @*/
3409 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3410 {
3411   Mat_MPIAIJ        *maij;
3412   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3413   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3414   const PetscScalar *oa;
3415   Mat               Bnew;
3416   PetscInt          m,n,N;
3417 
3418   PetscFunctionBegin;
3419   PetscCall(MatCreate(comm,mat));
3420   PetscCall(MatGetSize(A,&m,&n));
3421   PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N);
3422   PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs);
3423   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3424   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3425 
3426   /* Get global columns of mat */
3427   PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm));
3428 
3429   PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N));
3430   PetscCall(MatSetType(*mat,MATMPIAIJ));
3431   PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs));
3432   maij = (Mat_MPIAIJ*)(*mat)->data;
3433 
3434   (*mat)->preallocated = PETSC_TRUE;
3435 
3436   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3437   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3438 
3439   /* Set A as diagonal portion of *mat */
3440   maij->A = A;
3441 
3442   nz = oi[m];
3443   for (i=0; i<nz; i++) {
3444     col   = oj[i];
3445     oj[i] = garray[col];
3446   }
3447 
3448   /* Set Bnew as off-diagonal portion of *mat */
3449   PetscCall(MatSeqAIJGetArrayRead(B,&oa));
3450   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew));
3451   PetscCall(MatSeqAIJRestoreArrayRead(B,&oa));
3452   bnew        = (Mat_SeqAIJ*)Bnew->data;
3453   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3454   maij->B     = Bnew;
3455 
3456   PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N);
3457 
3458   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3459   b->free_a       = PETSC_FALSE;
3460   b->free_ij      = PETSC_FALSE;
3461   PetscCall(MatDestroy(&B));
3462 
3463   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3464   bnew->free_a       = PETSC_TRUE;
3465   bnew->free_ij      = PETSC_TRUE;
3466 
3467   /* condense columns of maij->B */
3468   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
3469   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
3470   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
3471   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
3472   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3473   PetscFunctionReturn(0);
3474 }
3475 
3476 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3477 
3478 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3479 {
3480   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3481   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3482   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3483   Mat            M,Msub,B=a->B;
3484   MatScalar      *aa;
3485   Mat_SeqAIJ     *aij;
3486   PetscInt       *garray = a->garray,*colsub,Ncols;
3487   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3488   IS             iscol_sub,iscmap;
3489   const PetscInt *is_idx,*cmap;
3490   PetscBool      allcolumns=PETSC_FALSE;
3491   MPI_Comm       comm;
3492 
3493   PetscFunctionBegin;
3494   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3495   if (call == MAT_REUSE_MATRIX) {
3496     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3497     PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3498     PetscCall(ISGetLocalSize(iscol_sub,&count));
3499 
3500     PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap));
3501     PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3502 
3503     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub));
3504     PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3505 
3506     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub));
3507 
3508   } else { /* call == MAT_INITIAL_MATRIX) */
3509     PetscBool flg;
3510 
3511     PetscCall(ISGetLocalSize(iscol,&n));
3512     PetscCall(ISGetSize(iscol,&Ncols));
3513 
3514     /* (1) iscol -> nonscalable iscol_local */
3515     /* Check for special case: each processor gets entire matrix columns */
3516     PetscCall(ISIdentity(iscol_local,&flg));
3517     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3518     PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3519     if (allcolumns) {
3520       iscol_sub = iscol_local;
3521       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3522       PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap));
3523 
3524     } else {
3525       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3526       PetscInt *idx,*cmap1,k;
3527       PetscCall(PetscMalloc1(Ncols,&idx));
3528       PetscCall(PetscMalloc1(Ncols,&cmap1));
3529       PetscCall(ISGetIndices(iscol_local,&is_idx));
3530       count = 0;
3531       k     = 0;
3532       for (i=0; i<Ncols; i++) {
3533         j = is_idx[i];
3534         if (j >= cstart && j < cend) {
3535           /* diagonal part of mat */
3536           idx[count]     = j;
3537           cmap1[count++] = i; /* column index in submat */
3538         } else if (Bn) {
3539           /* off-diagonal part of mat */
3540           if (j == garray[k]) {
3541             idx[count]     = j;
3542             cmap1[count++] = i;  /* column index in submat */
3543           } else if (j > garray[k]) {
3544             while (j > garray[k] && k < Bn-1) k++;
3545             if (j == garray[k]) {
3546               idx[count]     = j;
3547               cmap1[count++] = i; /* column index in submat */
3548             }
3549           }
3550         }
3551       }
3552       PetscCall(ISRestoreIndices(iscol_local,&is_idx));
3553 
3554       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub));
3555       PetscCall(ISGetBlockSize(iscol,&cbs));
3556       PetscCall(ISSetBlockSize(iscol_sub,cbs));
3557 
3558       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap));
3559     }
3560 
3561     /* (3) Create sequential Msub */
3562     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub));
3563   }
3564 
3565   PetscCall(ISGetLocalSize(iscol_sub,&count));
3566   aij  = (Mat_SeqAIJ*)(Msub)->data;
3567   ii   = aij->i;
3568   PetscCall(ISGetIndices(iscmap,&cmap));
3569 
3570   /*
3571       m - number of local rows
3572       Ncols - number of columns (same on all processors)
3573       rstart - first row in new global matrix generated
3574   */
3575   PetscCall(MatGetSize(Msub,&m,NULL));
3576 
3577   if (call == MAT_INITIAL_MATRIX) {
3578     /* (4) Create parallel newmat */
3579     PetscMPIInt    rank,size;
3580     PetscInt       csize;
3581 
3582     PetscCallMPI(MPI_Comm_size(comm,&size));
3583     PetscCallMPI(MPI_Comm_rank(comm,&rank));
3584 
3585     /*
3586         Determine the number of non-zeros in the diagonal and off-diagonal
3587         portions of the matrix in order to do correct preallocation
3588     */
3589 
3590     /* first get start and end of "diagonal" columns */
3591     PetscCall(ISGetLocalSize(iscol,&csize));
3592     if (csize == PETSC_DECIDE) {
3593       PetscCall(ISGetSize(isrow,&mglobal));
3594       if (mglobal == Ncols) { /* square matrix */
3595         nlocal = m;
3596       } else {
3597         nlocal = Ncols/size + ((Ncols % size) > rank);
3598       }
3599     } else {
3600       nlocal = csize;
3601     }
3602     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3603     rstart = rend - nlocal;
3604     PetscCheck(rank != size - 1 || rend == Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols);
3605 
3606     /* next, compute all the lengths */
3607     jj    = aij->j;
3608     PetscCall(PetscMalloc1(2*m+1,&dlens));
3609     olens = dlens + m;
3610     for (i=0; i<m; i++) {
3611       jend = ii[i+1] - ii[i];
3612       olen = 0;
3613       dlen = 0;
3614       for (j=0; j<jend; j++) {
3615         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3616         else dlen++;
3617         jj++;
3618       }
3619       olens[i] = olen;
3620       dlens[i] = dlen;
3621     }
3622 
3623     PetscCall(ISGetBlockSize(isrow,&bs));
3624     PetscCall(ISGetBlockSize(iscol,&cbs));
3625 
3626     PetscCall(MatCreate(comm,&M));
3627     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols));
3628     PetscCall(MatSetBlockSizes(M,bs,cbs));
3629     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3630     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3631     PetscCall(PetscFree(dlens));
3632 
3633   } else { /* call == MAT_REUSE_MATRIX */
3634     M    = *newmat;
3635     PetscCall(MatGetLocalSize(M,&i,NULL));
3636     PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3637     PetscCall(MatZeroEntries(M));
3638     /*
3639          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3640        rather than the slower MatSetValues().
3641     */
3642     M->was_assembled = PETSC_TRUE;
3643     M->assembled     = PETSC_FALSE;
3644   }
3645 
3646   /* (5) Set values of Msub to *newmat */
3647   PetscCall(PetscMalloc1(count,&colsub));
3648   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
3649 
3650   jj   = aij->j;
3651   PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa));
3652   for (i=0; i<m; i++) {
3653     row = rstart + i;
3654     nz  = ii[i+1] - ii[i];
3655     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3656     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES));
3657     jj += nz; aa += nz;
3658   }
3659   PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa));
3660   PetscCall(ISRestoreIndices(iscmap,&cmap));
3661 
3662   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3663   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3664 
3665   PetscCall(PetscFree(colsub));
3666 
3667   /* save Msub, iscol_sub and iscmap used in processor for next request */
3668   if (call == MAT_INITIAL_MATRIX) {
3669     *newmat = M;
3670     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub));
3671     PetscCall(MatDestroy(&Msub));
3672 
3673     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub));
3674     PetscCall(ISDestroy(&iscol_sub));
3675 
3676     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap));
3677     PetscCall(ISDestroy(&iscmap));
3678 
3679     if (iscol_local) {
3680       PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local));
3681       PetscCall(ISDestroy(&iscol_local));
3682     }
3683   }
3684   PetscFunctionReturn(0);
3685 }
3686 
3687 /*
3688     Not great since it makes two copies of the submatrix, first an SeqAIJ
3689   in local and then by concatenating the local matrices the end result.
3690   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3691 
3692   Note: This requires a sequential iscol with all indices.
3693 */
3694 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3695 {
3696   PetscMPIInt    rank,size;
3697   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3698   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3699   Mat            M,Mreuse;
3700   MatScalar      *aa,*vwork;
3701   MPI_Comm       comm;
3702   Mat_SeqAIJ     *aij;
3703   PetscBool      colflag,allcolumns=PETSC_FALSE;
3704 
3705   PetscFunctionBegin;
3706   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3707   PetscCallMPI(MPI_Comm_rank(comm,&rank));
3708   PetscCallMPI(MPI_Comm_size(comm,&size));
3709 
3710   /* Check for special case: each processor gets entire matrix columns */
3711   PetscCall(ISIdentity(iscol,&colflag));
3712   PetscCall(ISGetLocalSize(iscol,&n));
3713   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3714   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3715 
3716   if (call ==  MAT_REUSE_MATRIX) {
3717     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse));
3718     PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3719     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse));
3720   } else {
3721     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse));
3722   }
3723 
3724   /*
3725       m - number of local rows
3726       n - number of columns (same on all processors)
3727       rstart - first row in new global matrix generated
3728   */
3729   PetscCall(MatGetSize(Mreuse,&m,&n));
3730   PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs));
3731   if (call == MAT_INITIAL_MATRIX) {
3732     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3733     ii  = aij->i;
3734     jj  = aij->j;
3735 
3736     /*
3737         Determine the number of non-zeros in the diagonal and off-diagonal
3738         portions of the matrix in order to do correct preallocation
3739     */
3740 
3741     /* first get start and end of "diagonal" columns */
3742     if (csize == PETSC_DECIDE) {
3743       PetscCall(ISGetSize(isrow,&mglobal));
3744       if (mglobal == n) { /* square matrix */
3745         nlocal = m;
3746       } else {
3747         nlocal = n/size + ((n % size) > rank);
3748       }
3749     } else {
3750       nlocal = csize;
3751     }
3752     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3753     rstart = rend - nlocal;
3754     PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
3755 
3756     /* next, compute all the lengths */
3757     PetscCall(PetscMalloc1(2*m+1,&dlens));
3758     olens = dlens + m;
3759     for (i=0; i<m; i++) {
3760       jend = ii[i+1] - ii[i];
3761       olen = 0;
3762       dlen = 0;
3763       for (j=0; j<jend; j++) {
3764         if (*jj < rstart || *jj >= rend) olen++;
3765         else dlen++;
3766         jj++;
3767       }
3768       olens[i] = olen;
3769       dlens[i] = dlen;
3770     }
3771     PetscCall(MatCreate(comm,&M));
3772     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n));
3773     PetscCall(MatSetBlockSizes(M,bs,cbs));
3774     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3775     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3776     PetscCall(PetscFree(dlens));
3777   } else {
3778     PetscInt ml,nl;
3779 
3780     M    = *newmat;
3781     PetscCall(MatGetLocalSize(M,&ml,&nl));
3782     PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3783     PetscCall(MatZeroEntries(M));
3784     /*
3785          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3786        rather than the slower MatSetValues().
3787     */
3788     M->was_assembled = PETSC_TRUE;
3789     M->assembled     = PETSC_FALSE;
3790   }
3791   PetscCall(MatGetOwnershipRange(M,&rstart,&rend));
3792   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3793   ii   = aij->i;
3794   jj   = aij->j;
3795 
3796   /* trigger copy to CPU if needed */
3797   PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa));
3798   for (i=0; i<m; i++) {
3799     row   = rstart + i;
3800     nz    = ii[i+1] - ii[i];
3801     cwork = jj; jj += nz;
3802     vwork = aa; aa += nz;
3803     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES));
3804   }
3805   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa));
3806 
3807   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3808   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3809   *newmat = M;
3810 
3811   /* save submatrix used in processor for next request */
3812   if (call ==  MAT_INITIAL_MATRIX) {
3813     PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse));
3814     PetscCall(MatDestroy(&Mreuse));
3815   }
3816   PetscFunctionReturn(0);
3817 }
3818 
3819 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3820 {
3821   PetscInt       m,cstart, cend,j,nnz,i,d;
3822   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3823   const PetscInt *JJ;
3824   PetscBool      nooffprocentries;
3825 
3826   PetscFunctionBegin;
3827   PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]);
3828 
3829   PetscCall(PetscLayoutSetUp(B->rmap));
3830   PetscCall(PetscLayoutSetUp(B->cmap));
3831   m      = B->rmap->n;
3832   cstart = B->cmap->rstart;
3833   cend   = B->cmap->rend;
3834   rstart = B->rmap->rstart;
3835 
3836   PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz));
3837 
3838   if (PetscDefined(USE_DEBUG)) {
3839     for (i=0; i<m; i++) {
3840       nnz = Ii[i+1]- Ii[i];
3841       JJ  = J + Ii[i];
3842       PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz);
3843       PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]);
3844       PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N);
3845     }
3846   }
3847 
3848   for (i=0; i<m; i++) {
3849     nnz     = Ii[i+1]- Ii[i];
3850     JJ      = J + Ii[i];
3851     nnz_max = PetscMax(nnz_max,nnz);
3852     d       = 0;
3853     for (j=0; j<nnz; j++) {
3854       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3855     }
3856     d_nnz[i] = d;
3857     o_nnz[i] = nnz - d;
3858   }
3859   PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
3860   PetscCall(PetscFree2(d_nnz,o_nnz));
3861 
3862   for (i=0; i<m; i++) {
3863     ii   = i + rstart;
3864     PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES));
3865   }
3866   nooffprocentries    = B->nooffprocentries;
3867   B->nooffprocentries = PETSC_TRUE;
3868   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
3869   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3870   B->nooffprocentries = nooffprocentries;
3871 
3872   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3873   PetscFunctionReturn(0);
3874 }
3875 
3876 /*@
3877    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3878    (the default parallel PETSc format).
3879 
3880    Collective
3881 
3882    Input Parameters:
3883 +  B - the matrix
3884 .  i - the indices into j for the start of each local row (starts with zero)
3885 .  j - the column indices for each local row (starts with zero)
3886 -  v - optional values in the matrix
3887 
3888    Level: developer
3889 
3890    Notes:
3891        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3892      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3893      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3894 
3895        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3896 
3897        The format which is used for the sparse matrix input, is equivalent to a
3898     row-major ordering.. i.e for the following matrix, the input data expected is
3899     as shown
3900 
3901 $        1 0 0
3902 $        2 0 3     P0
3903 $       -------
3904 $        4 5 6     P1
3905 $
3906 $     Process0 [P0]: rows_owned=[0,1]
3907 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3908 $        j =  {0,0,2}  [size = 3]
3909 $        v =  {1,2,3}  [size = 3]
3910 $
3911 $     Process1 [P1]: rows_owned=[2]
3912 $        i =  {0,3}    [size = nrow+1  = 1+1]
3913 $        j =  {0,1,2}  [size = 3]
3914 $        v =  {4,5,6}  [size = 3]
3915 
3916 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
3917           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
3918 @*/
3919 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3920 {
3921   PetscFunctionBegin;
3922   PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3923   PetscFunctionReturn(0);
3924 }
3925 
3926 /*@C
3927    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3928    (the default parallel PETSc format).  For good matrix assembly performance
3929    the user should preallocate the matrix storage by setting the parameters
3930    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3931    performance can be increased by more than a factor of 50.
3932 
3933    Collective
3934 
3935    Input Parameters:
3936 +  B - the matrix
3937 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3938            (same value is used for all local rows)
3939 .  d_nnz - array containing the number of nonzeros in the various rows of the
3940            DIAGONAL portion of the local submatrix (possibly different for each row)
3941            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3942            The size of this array is equal to the number of local rows, i.e 'm'.
3943            For matrices that will be factored, you must leave room for (and set)
3944            the diagonal entry even if it is zero.
3945 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3946            submatrix (same value is used for all local rows).
3947 -  o_nnz - array containing the number of nonzeros in the various rows of the
3948            OFF-DIAGONAL portion of the local submatrix (possibly different for
3949            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3950            structure. The size of this array is equal to the number
3951            of local rows, i.e 'm'.
3952 
3953    If the *_nnz parameter is given then the *_nz parameter is ignored
3954 
3955    The AIJ format (also called the Yale sparse matrix format or
3956    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3957    storage.  The stored row and column indices begin with zero.
3958    See Users-Manual: ch_mat for details.
3959 
3960    The parallel matrix is partitioned such that the first m0 rows belong to
3961    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3962    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3963 
3964    The DIAGONAL portion of the local submatrix of a processor can be defined
3965    as the submatrix which is obtained by extraction the part corresponding to
3966    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3967    first row that belongs to the processor, r2 is the last row belonging to
3968    the this processor, and c1-c2 is range of indices of the local part of a
3969    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3970    common case of a square matrix, the row and column ranges are the same and
3971    the DIAGONAL part is also square. The remaining portion of the local
3972    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3973 
3974    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3975 
3976    You can call MatGetInfo() to get information on how effective the preallocation was;
3977    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3978    You can also run with the option -info and look for messages with the string
3979    malloc in them to see if additional memory allocation was needed.
3980 
3981    Example usage:
3982 
3983    Consider the following 8x8 matrix with 34 non-zero values, that is
3984    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3985    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3986    as follows:
3987 
3988 .vb
3989             1  2  0  |  0  3  0  |  0  4
3990     Proc0   0  5  6  |  7  0  0  |  8  0
3991             9  0 10  | 11  0  0  | 12  0
3992     -------------------------------------
3993            13  0 14  | 15 16 17  |  0  0
3994     Proc1   0 18  0  | 19 20 21  |  0  0
3995             0  0  0  | 22 23  0  | 24  0
3996     -------------------------------------
3997     Proc2  25 26 27  |  0  0 28  | 29  0
3998            30  0  0  | 31 32 33  |  0 34
3999 .ve
4000 
4001    This can be represented as a collection of submatrices as:
4002 
4003 .vb
4004       A B C
4005       D E F
4006       G H I
4007 .ve
4008 
4009    Where the submatrices A,B,C are owned by proc0, D,E,F are
4010    owned by proc1, G,H,I are owned by proc2.
4011 
4012    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4013    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4014    The 'M','N' parameters are 8,8, and have the same values on all procs.
4015 
4016    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4017    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4018    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4019    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4020    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4021    matrix, ans [DF] as another SeqAIJ matrix.
4022 
4023    When d_nz, o_nz parameters are specified, d_nz storage elements are
4024    allocated for every row of the local diagonal submatrix, and o_nz
4025    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4026    One way to choose d_nz and o_nz is to use the max nonzerors per local
4027    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4028    In this case, the values of d_nz,o_nz are:
4029 .vb
4030      proc0 : dnz = 2, o_nz = 2
4031      proc1 : dnz = 3, o_nz = 2
4032      proc2 : dnz = 1, o_nz = 4
4033 .ve
4034    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4035    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4036    for proc3. i.e we are using 12+15+10=37 storage locations to store
4037    34 values.
4038 
4039    When d_nnz, o_nnz parameters are specified, the storage is specified
4040    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4041    In the above case the values for d_nnz,o_nnz are:
4042 .vb
4043      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4044      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4045      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4046 .ve
4047    Here the space allocated is sum of all the above values i.e 34, and
4048    hence pre-allocation is perfect.
4049 
4050    Level: intermediate
4051 
4052 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4053           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4054 @*/
4055 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4056 {
4057   PetscFunctionBegin;
4058   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4059   PetscValidType(B,1);
4060   PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
4061   PetscFunctionReturn(0);
4062 }
4063 
4064 /*@
4065      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4066          CSR format for the local rows.
4067 
4068    Collective
4069 
4070    Input Parameters:
4071 +  comm - MPI communicator
4072 .  m - number of local rows (Cannot be PETSC_DECIDE)
4073 .  n - This value should be the same as the local size used in creating the
4074        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4075        calculated if N is given) For square matrices n is almost always m.
4076 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4077 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4078 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4079 .   j - column indices
4080 -   a - matrix values
4081 
4082    Output Parameter:
4083 .   mat - the matrix
4084 
4085    Level: intermediate
4086 
4087    Notes:
4088        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4089      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4090      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4091 
4092        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4093 
4094        The format which is used for the sparse matrix input, is equivalent to a
4095     row-major ordering.. i.e for the following matrix, the input data expected is
4096     as shown
4097 
4098        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4099 
4100 $        1 0 0
4101 $        2 0 3     P0
4102 $       -------
4103 $        4 5 6     P1
4104 $
4105 $     Process0 [P0]: rows_owned=[0,1]
4106 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4107 $        j =  {0,0,2}  [size = 3]
4108 $        v =  {1,2,3}  [size = 3]
4109 $
4110 $     Process1 [P1]: rows_owned=[2]
4111 $        i =  {0,3}    [size = nrow+1  = 1+1]
4112 $        j =  {0,1,2}  [size = 3]
4113 $        v =  {4,5,6}  [size = 3]
4114 
4115 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4116           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4117 @*/
4118 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4119 {
4120   PetscFunctionBegin;
4121   PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4122   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4123   PetscCall(MatCreate(comm,mat));
4124   PetscCall(MatSetSizes(*mat,m,n,M,N));
4125   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4126   PetscCall(MatSetType(*mat,MATMPIAIJ));
4127   PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a));
4128   PetscFunctionReturn(0);
4129 }
4130 
4131 /*@
4132      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4133          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4134 
4135    Collective
4136 
4137    Input Parameters:
4138 +  mat - the matrix
4139 .  m - number of local rows (Cannot be PETSC_DECIDE)
4140 .  n - This value should be the same as the local size used in creating the
4141        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4142        calculated if N is given) For square matrices n is almost always m.
4143 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4144 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4145 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4146 .  J - column indices
4147 -  v - matrix values
4148 
4149    Level: intermediate
4150 
4151 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4152           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4153 @*/
4154 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4155 {
4156   PetscInt       cstart,nnz,i,j;
4157   PetscInt       *ld;
4158   PetscBool      nooffprocentries;
4159   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4160   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4161   PetscScalar    *ad,*ao;
4162   const PetscInt *Adi = Ad->i;
4163   PetscInt       ldi,Iii,md;
4164 
4165   PetscFunctionBegin;
4166   PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4167   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4168   PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4169   PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4170 
4171   PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad));
4172   PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao));
4173   cstart = mat->cmap->rstart;
4174   if (!Aij->ld) {
4175     /* count number of entries below block diagonal */
4176     PetscCall(PetscCalloc1(m,&ld));
4177     Aij->ld = ld;
4178     for (i=0; i<m; i++) {
4179       nnz  = Ii[i+1]- Ii[i];
4180       j     = 0;
4181       while  (J[j] < cstart && j < nnz) {j++;}
4182       J    += nnz;
4183       ld[i] = j;
4184     }
4185   } else {
4186     ld = Aij->ld;
4187   }
4188 
4189   for (i=0; i<m; i++) {
4190     nnz  = Ii[i+1]- Ii[i];
4191     Iii  = Ii[i];
4192     ldi  = ld[i];
4193     md   = Adi[i+1]-Adi[i];
4194     PetscCall(PetscArraycpy(ao,v + Iii,ldi));
4195     PetscCall(PetscArraycpy(ad,v + Iii + ldi,md));
4196     PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md));
4197     ad  += md;
4198     ao  += nnz - md;
4199   }
4200   nooffprocentries      = mat->nooffprocentries;
4201   mat->nooffprocentries = PETSC_TRUE;
4202   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad));
4203   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao));
4204   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4205   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4206   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4207   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
4208   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
4209   mat->nooffprocentries = nooffprocentries;
4210   PetscFunctionReturn(0);
4211 }
4212 
4213 /*@C
4214    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4215    (the default parallel PETSc format).  For good matrix assembly performance
4216    the user should preallocate the matrix storage by setting the parameters
4217    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4218    performance can be increased by more than a factor of 50.
4219 
4220    Collective
4221 
4222    Input Parameters:
4223 +  comm - MPI communicator
4224 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4225            This value should be the same as the local size used in creating the
4226            y vector for the matrix-vector product y = Ax.
4227 .  n - This value should be the same as the local size used in creating the
4228        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4229        calculated if N is given) For square matrices n is almost always m.
4230 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4231 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4232 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4233            (same value is used for all local rows)
4234 .  d_nnz - array containing the number of nonzeros in the various rows of the
4235            DIAGONAL portion of the local submatrix (possibly different for each row)
4236            or NULL, if d_nz is used to specify the nonzero structure.
4237            The size of this array is equal to the number of local rows, i.e 'm'.
4238 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4239            submatrix (same value is used for all local rows).
4240 -  o_nnz - array containing the number of nonzeros in the various rows of the
4241            OFF-DIAGONAL portion of the local submatrix (possibly different for
4242            each row) or NULL, if o_nz is used to specify the nonzero
4243            structure. The size of this array is equal to the number
4244            of local rows, i.e 'm'.
4245 
4246    Output Parameter:
4247 .  A - the matrix
4248 
4249    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4250    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4251    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4252 
4253    Notes:
4254    If the *_nnz parameter is given then the *_nz parameter is ignored
4255 
4256    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4257    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4258    storage requirements for this matrix.
4259 
4260    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4261    processor than it must be used on all processors that share the object for
4262    that argument.
4263 
4264    The user MUST specify either the local or global matrix dimensions
4265    (possibly both).
4266 
4267    The parallel matrix is partitioned across processors such that the
4268    first m0 rows belong to process 0, the next m1 rows belong to
4269    process 1, the next m2 rows belong to process 2 etc.. where
4270    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4271    values corresponding to [m x N] submatrix.
4272 
4273    The columns are logically partitioned with the n0 columns belonging
4274    to 0th partition, the next n1 columns belonging to the next
4275    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4276 
4277    The DIAGONAL portion of the local submatrix on any given processor
4278    is the submatrix corresponding to the rows and columns m,n
4279    corresponding to the given processor. i.e diagonal matrix on
4280    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4281    etc. The remaining portion of the local submatrix [m x (N-n)]
4282    constitute the OFF-DIAGONAL portion. The example below better
4283    illustrates this concept.
4284 
4285    For a square global matrix we define each processor's diagonal portion
4286    to be its local rows and the corresponding columns (a square submatrix);
4287    each processor's off-diagonal portion encompasses the remainder of the
4288    local matrix (a rectangular submatrix).
4289 
4290    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4291 
4292    When calling this routine with a single process communicator, a matrix of
4293    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4294    type of communicator, use the construction mechanism
4295 .vb
4296      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4297 .ve
4298 
4299 $     MatCreate(...,&A);
4300 $     MatSetType(A,MATMPIAIJ);
4301 $     MatSetSizes(A, m,n,M,N);
4302 $     MatMPIAIJSetPreallocation(A,...);
4303 
4304    By default, this format uses inodes (identical nodes) when possible.
4305    We search for consecutive rows with the same nonzero structure, thereby
4306    reusing matrix information to achieve increased efficiency.
4307 
4308    Options Database Keys:
4309 +  -mat_no_inode  - Do not use inodes
4310 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4311 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
4312         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4313         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.
4314 
4315    Example usage:
4316 
4317    Consider the following 8x8 matrix with 34 non-zero values, that is
4318    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4319    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4320    as follows
4321 
4322 .vb
4323             1  2  0  |  0  3  0  |  0  4
4324     Proc0   0  5  6  |  7  0  0  |  8  0
4325             9  0 10  | 11  0  0  | 12  0
4326     -------------------------------------
4327            13  0 14  | 15 16 17  |  0  0
4328     Proc1   0 18  0  | 19 20 21  |  0  0
4329             0  0  0  | 22 23  0  | 24  0
4330     -------------------------------------
4331     Proc2  25 26 27  |  0  0 28  | 29  0
4332            30  0  0  | 31 32 33  |  0 34
4333 .ve
4334 
4335    This can be represented as a collection of submatrices as
4336 
4337 .vb
4338       A B C
4339       D E F
4340       G H I
4341 .ve
4342 
4343    Where the submatrices A,B,C are owned by proc0, D,E,F are
4344    owned by proc1, G,H,I are owned by proc2.
4345 
4346    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4347    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4348    The 'M','N' parameters are 8,8, and have the same values on all procs.
4349 
4350    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4351    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4352    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4353    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4354    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4355    matrix, ans [DF] as another SeqAIJ matrix.
4356 
4357    When d_nz, o_nz parameters are specified, d_nz storage elements are
4358    allocated for every row of the local diagonal submatrix, and o_nz
4359    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4360    One way to choose d_nz and o_nz is to use the max nonzerors per local
4361    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4362    In this case, the values of d_nz,o_nz are
4363 .vb
4364      proc0 : dnz = 2, o_nz = 2
4365      proc1 : dnz = 3, o_nz = 2
4366      proc2 : dnz = 1, o_nz = 4
4367 .ve
4368    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4369    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4370    for proc3. i.e we are using 12+15+10=37 storage locations to store
4371    34 values.
4372 
4373    When d_nnz, o_nnz parameters are specified, the storage is specified
4374    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4375    In the above case the values for d_nnz,o_nnz are
4376 .vb
4377      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4378      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4379      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4380 .ve
4381    Here the space allocated is sum of all the above values i.e 34, and
4382    hence pre-allocation is perfect.
4383 
4384    Level: intermediate
4385 
4386 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4387           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4388 @*/
4389 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4390 {
4391   PetscMPIInt    size;
4392 
4393   PetscFunctionBegin;
4394   PetscCall(MatCreate(comm,A));
4395   PetscCall(MatSetSizes(*A,m,n,M,N));
4396   PetscCallMPI(MPI_Comm_size(comm,&size));
4397   if (size > 1) {
4398     PetscCall(MatSetType(*A,MATMPIAIJ));
4399     PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz));
4400   } else {
4401     PetscCall(MatSetType(*A,MATSEQAIJ));
4402     PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz));
4403   }
4404   PetscFunctionReturn(0);
4405 }
4406 
4407 /*@C
4408   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4409 
4410   Not collective
4411 
4412   Input Parameter:
4413 . A - The MPIAIJ matrix
4414 
4415   Output Parameters:
4416 + Ad - The local diagonal block as a SeqAIJ matrix
4417 . Ao - The local off-diagonal block as a SeqAIJ matrix
4418 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4419 
4420   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4421   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4422   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4423   local column numbers to global column numbers in the original matrix.
4424 
4425   Level: intermediate
4426 
4427 .seealso: `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4428 @*/
4429 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4430 {
4431   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4432   PetscBool      flg;
4433 
4434   PetscFunctionBegin;
4435   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg));
4436   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4437   if (Ad)     *Ad     = a->A;
4438   if (Ao)     *Ao     = a->B;
4439   if (colmap) *colmap = a->garray;
4440   PetscFunctionReturn(0);
4441 }
4442 
4443 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4444 {
4445   PetscInt       m,N,i,rstart,nnz,Ii;
4446   PetscInt       *indx;
4447   PetscScalar    *values;
4448   MatType        rootType;
4449 
4450   PetscFunctionBegin;
4451   PetscCall(MatGetSize(inmat,&m,&N));
4452   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4453     PetscInt       *dnz,*onz,sum,bs,cbs;
4454 
4455     if (n == PETSC_DECIDE) {
4456       PetscCall(PetscSplitOwnership(comm,&n,&N));
4457     }
4458     /* Check sum(n) = N */
4459     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
4460     PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
4461 
4462     PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm));
4463     rstart -= m;
4464 
4465     MatPreallocateBegin(comm,m,n,dnz,onz);
4466     for (i=0; i<m; i++) {
4467       PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4468       PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz));
4469       PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4470     }
4471 
4472     PetscCall(MatCreate(comm,outmat));
4473     PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4474     PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
4475     PetscCall(MatSetBlockSizes(*outmat,bs,cbs));
4476     PetscCall(MatGetRootType_Private(inmat,&rootType));
4477     PetscCall(MatSetType(*outmat,rootType));
4478     PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz));
4479     PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz));
4480     MatPreallocateEnd(dnz,onz);
4481     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
4482   }
4483 
4484   /* numeric phase */
4485   PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL));
4486   for (i=0; i<m; i++) {
4487     PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4488     Ii   = i + rstart;
4489     PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
4490     PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4491   }
4492   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
4493   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
4494   PetscFunctionReturn(0);
4495 }
4496 
4497 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4498 {
4499   PetscMPIInt       rank;
4500   PetscInt          m,N,i,rstart,nnz;
4501   size_t            len;
4502   const PetscInt    *indx;
4503   PetscViewer       out;
4504   char              *name;
4505   Mat               B;
4506   const PetscScalar *values;
4507 
4508   PetscFunctionBegin;
4509   PetscCall(MatGetLocalSize(A,&m,NULL));
4510   PetscCall(MatGetSize(A,NULL,&N));
4511   /* Should this be the type of the diagonal block of A? */
4512   PetscCall(MatCreate(PETSC_COMM_SELF,&B));
4513   PetscCall(MatSetSizes(B,m,N,m,N));
4514   PetscCall(MatSetBlockSizesFromMats(B,A,A));
4515   PetscCall(MatSetType(B,MATSEQAIJ));
4516   PetscCall(MatSeqAIJSetPreallocation(B,0,NULL));
4517   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
4518   for (i=0; i<m; i++) {
4519     PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values));
4520     PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES));
4521     PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values));
4522   }
4523   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
4524   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
4525 
4526   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
4527   PetscCall(PetscStrlen(outfile,&len));
4528   PetscCall(PetscMalloc1(len+6,&name));
4529   PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank));
4530   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out));
4531   PetscCall(PetscFree(name));
4532   PetscCall(MatView(B,out));
4533   PetscCall(PetscViewerDestroy(&out));
4534   PetscCall(MatDestroy(&B));
4535   PetscFunctionReturn(0);
4536 }
4537 
4538 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4539 {
4540   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4541 
4542   PetscFunctionBegin;
4543   if (!merge) PetscFunctionReturn(0);
4544   PetscCall(PetscFree(merge->id_r));
4545   PetscCall(PetscFree(merge->len_s));
4546   PetscCall(PetscFree(merge->len_r));
4547   PetscCall(PetscFree(merge->bi));
4548   PetscCall(PetscFree(merge->bj));
4549   PetscCall(PetscFree(merge->buf_ri[0]));
4550   PetscCall(PetscFree(merge->buf_ri));
4551   PetscCall(PetscFree(merge->buf_rj[0]));
4552   PetscCall(PetscFree(merge->buf_rj));
4553   PetscCall(PetscFree(merge->coi));
4554   PetscCall(PetscFree(merge->coj));
4555   PetscCall(PetscFree(merge->owners_co));
4556   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4557   PetscCall(PetscFree(merge));
4558   PetscFunctionReturn(0);
4559 }
4560 
4561 #include <../src/mat/utils/freespace.h>
4562 #include <petscbt.h>
4563 
4564 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4565 {
4566   MPI_Comm            comm;
4567   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4568   PetscMPIInt         size,rank,taga,*len_s;
4569   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4570   PetscInt            proc,m;
4571   PetscInt            **buf_ri,**buf_rj;
4572   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4573   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4574   MPI_Request         *s_waits,*r_waits;
4575   MPI_Status          *status;
4576   const MatScalar     *aa,*a_a;
4577   MatScalar           **abuf_r,*ba_i;
4578   Mat_Merge_SeqsToMPI *merge;
4579   PetscContainer      container;
4580 
4581   PetscFunctionBegin;
4582   PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm));
4583   PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0));
4584 
4585   PetscCallMPI(MPI_Comm_size(comm,&size));
4586   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4587 
4588   PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container));
4589   PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4590   PetscCall(PetscContainerGetPointer(container,(void**)&merge));
4591   PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a));
4592   aa   = a_a;
4593 
4594   bi     = merge->bi;
4595   bj     = merge->bj;
4596   buf_ri = merge->buf_ri;
4597   buf_rj = merge->buf_rj;
4598 
4599   PetscCall(PetscMalloc1(size,&status));
4600   owners = merge->rowmap->range;
4601   len_s  = merge->len_s;
4602 
4603   /* send and recv matrix values */
4604   /*-----------------------------*/
4605   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga));
4606   PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits));
4607 
4608   PetscCall(PetscMalloc1(merge->nsend+1,&s_waits));
4609   for (proc=0,k=0; proc<size; proc++) {
4610     if (!len_s[proc]) continue;
4611     i    = owners[proc];
4612     PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k));
4613     k++;
4614   }
4615 
4616   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status));
4617   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status));
4618   PetscCall(PetscFree(status));
4619 
4620   PetscCall(PetscFree(s_waits));
4621   PetscCall(PetscFree(r_waits));
4622 
4623   /* insert mat values of mpimat */
4624   /*----------------------------*/
4625   PetscCall(PetscMalloc1(N,&ba_i));
4626   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4627 
4628   for (k=0; k<merge->nrecv; k++) {
4629     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4630     nrows       = *(buf_ri_k[k]);
4631     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4632     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4633   }
4634 
4635   /* set values of ba */
4636   m    = merge->rowmap->n;
4637   for (i=0; i<m; i++) {
4638     arow = owners[rank] + i;
4639     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4640     bnzi = bi[i+1] - bi[i];
4641     PetscCall(PetscArrayzero(ba_i,bnzi));
4642 
4643     /* add local non-zero vals of this proc's seqmat into ba */
4644     anzi   = ai[arow+1] - ai[arow];
4645     aj     = a->j + ai[arow];
4646     aa     = a_a + ai[arow];
4647     nextaj = 0;
4648     for (j=0; nextaj<anzi; j++) {
4649       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4650         ba_i[j] += aa[nextaj++];
4651       }
4652     }
4653 
4654     /* add received vals into ba */
4655     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4656       /* i-th row */
4657       if (i == *nextrow[k]) {
4658         anzi   = *(nextai[k]+1) - *nextai[k];
4659         aj     = buf_rj[k] + *(nextai[k]);
4660         aa     = abuf_r[k] + *(nextai[k]);
4661         nextaj = 0;
4662         for (j=0; nextaj<anzi; j++) {
4663           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4664             ba_i[j] += aa[nextaj++];
4665           }
4666         }
4667         nextrow[k]++; nextai[k]++;
4668       }
4669     }
4670     PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES));
4671   }
4672   PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a));
4673   PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY));
4674   PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY));
4675 
4676   PetscCall(PetscFree(abuf_r[0]));
4677   PetscCall(PetscFree(abuf_r));
4678   PetscCall(PetscFree(ba_i));
4679   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4680   PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0));
4681   PetscFunctionReturn(0);
4682 }
4683 
4684 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4685 {
4686   Mat                 B_mpi;
4687   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4688   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4689   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4690   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4691   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4692   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4693   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4694   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4695   MPI_Status          *status;
4696   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4697   PetscBT             lnkbt;
4698   Mat_Merge_SeqsToMPI *merge;
4699   PetscContainer      container;
4700 
4701   PetscFunctionBegin;
4702   PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0));
4703 
4704   /* make sure it is a PETSc comm */
4705   PetscCall(PetscCommDuplicate(comm,&comm,NULL));
4706   PetscCallMPI(MPI_Comm_size(comm,&size));
4707   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4708 
4709   PetscCall(PetscNew(&merge));
4710   PetscCall(PetscMalloc1(size,&status));
4711 
4712   /* determine row ownership */
4713   /*---------------------------------------------------------*/
4714   PetscCall(PetscLayoutCreate(comm,&merge->rowmap));
4715   PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m));
4716   PetscCall(PetscLayoutSetSize(merge->rowmap,M));
4717   PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1));
4718   PetscCall(PetscLayoutSetUp(merge->rowmap));
4719   PetscCall(PetscMalloc1(size,&len_si));
4720   PetscCall(PetscMalloc1(size,&merge->len_s));
4721 
4722   m      = merge->rowmap->n;
4723   owners = merge->rowmap->range;
4724 
4725   /* determine the number of messages to send, their lengths */
4726   /*---------------------------------------------------------*/
4727   len_s = merge->len_s;
4728 
4729   len          = 0; /* length of buf_si[] */
4730   merge->nsend = 0;
4731   for (proc=0; proc<size; proc++) {
4732     len_si[proc] = 0;
4733     if (proc == rank) {
4734       len_s[proc] = 0;
4735     } else {
4736       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4737       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4738     }
4739     if (len_s[proc]) {
4740       merge->nsend++;
4741       nrows = 0;
4742       for (i=owners[proc]; i<owners[proc+1]; i++) {
4743         if (ai[i+1] > ai[i]) nrows++;
4744       }
4745       len_si[proc] = 2*(nrows+1);
4746       len         += len_si[proc];
4747     }
4748   }
4749 
4750   /* determine the number and length of messages to receive for ij-structure */
4751   /*-------------------------------------------------------------------------*/
4752   PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv));
4753   PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri));
4754 
4755   /* post the Irecv of j-structure */
4756   /*-------------------------------*/
4757   PetscCall(PetscCommGetNewTag(comm,&tagj));
4758   PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits));
4759 
4760   /* post the Isend of j-structure */
4761   /*--------------------------------*/
4762   PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits));
4763 
4764   for (proc=0, k=0; proc<size; proc++) {
4765     if (!len_s[proc]) continue;
4766     i    = owners[proc];
4767     PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k));
4768     k++;
4769   }
4770 
4771   /* receives and sends of j-structure are complete */
4772   /*------------------------------------------------*/
4773   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status));
4774   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status));
4775 
4776   /* send and recv i-structure */
4777   /*---------------------------*/
4778   PetscCall(PetscCommGetNewTag(comm,&tagi));
4779   PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits));
4780 
4781   PetscCall(PetscMalloc1(len+1,&buf_s));
4782   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4783   for (proc=0,k=0; proc<size; proc++) {
4784     if (!len_s[proc]) continue;
4785     /* form outgoing message for i-structure:
4786          buf_si[0]:                 nrows to be sent
4787                [1:nrows]:           row index (global)
4788                [nrows+1:2*nrows+1]: i-structure index
4789     */
4790     /*-------------------------------------------*/
4791     nrows       = len_si[proc]/2 - 1;
4792     buf_si_i    = buf_si + nrows+1;
4793     buf_si[0]   = nrows;
4794     buf_si_i[0] = 0;
4795     nrows       = 0;
4796     for (i=owners[proc]; i<owners[proc+1]; i++) {
4797       anzi = ai[i+1] - ai[i];
4798       if (anzi) {
4799         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4800         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4801         nrows++;
4802       }
4803     }
4804     PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k));
4805     k++;
4806     buf_si += len_si[proc];
4807   }
4808 
4809   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status));
4810   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status));
4811 
4812   PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv));
4813   for (i=0; i<merge->nrecv; i++) {
4814     PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));
4815   }
4816 
4817   PetscCall(PetscFree(len_si));
4818   PetscCall(PetscFree(len_ri));
4819   PetscCall(PetscFree(rj_waits));
4820   PetscCall(PetscFree2(si_waits,sj_waits));
4821   PetscCall(PetscFree(ri_waits));
4822   PetscCall(PetscFree(buf_s));
4823   PetscCall(PetscFree(status));
4824 
4825   /* compute a local seq matrix in each processor */
4826   /*----------------------------------------------*/
4827   /* allocate bi array and free space for accumulating nonzero column info */
4828   PetscCall(PetscMalloc1(m+1,&bi));
4829   bi[0] = 0;
4830 
4831   /* create and initialize a linked list */
4832   nlnk = N+1;
4833   PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt));
4834 
4835   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4836   len  = ai[owners[rank+1]] - ai[owners[rank]];
4837   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space));
4838 
4839   current_space = free_space;
4840 
4841   /* determine symbolic info for each local row */
4842   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4843 
4844   for (k=0; k<merge->nrecv; k++) {
4845     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4846     nrows       = *buf_ri_k[k];
4847     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4848     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4849   }
4850 
4851   MatPreallocateBegin(comm,m,n,dnz,onz);
4852   len  = 0;
4853   for (i=0; i<m; i++) {
4854     bnzi = 0;
4855     /* add local non-zero cols of this proc's seqmat into lnk */
4856     arow  = owners[rank] + i;
4857     anzi  = ai[arow+1] - ai[arow];
4858     aj    = a->j + ai[arow];
4859     PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4860     bnzi += nlnk;
4861     /* add received col data into lnk */
4862     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4863       if (i == *nextrow[k]) { /* i-th row */
4864         anzi  = *(nextai[k]+1) - *nextai[k];
4865         aj    = buf_rj[k] + *nextai[k];
4866         PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4867         bnzi += nlnk;
4868         nextrow[k]++; nextai[k]++;
4869       }
4870     }
4871     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4872 
4873     /* if free space is not available, make more free space */
4874     if (current_space->local_remaining<bnzi) {
4875       PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space));
4876       nspacedouble++;
4877     }
4878     /* copy data into free space, then initialize lnk */
4879     PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt));
4880     PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz));
4881 
4882     current_space->array           += bnzi;
4883     current_space->local_used      += bnzi;
4884     current_space->local_remaining -= bnzi;
4885 
4886     bi[i+1] = bi[i] + bnzi;
4887   }
4888 
4889   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4890 
4891   PetscCall(PetscMalloc1(bi[m]+1,&bj));
4892   PetscCall(PetscFreeSpaceContiguous(&free_space,bj));
4893   PetscCall(PetscLLDestroy(lnk,lnkbt));
4894 
4895   /* create symbolic parallel matrix B_mpi */
4896   /*---------------------------------------*/
4897   PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs));
4898   PetscCall(MatCreate(comm,&B_mpi));
4899   if (n==PETSC_DECIDE) {
4900     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N));
4901   } else {
4902     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4903   }
4904   PetscCall(MatSetBlockSizes(B_mpi,bs,cbs));
4905   PetscCall(MatSetType(B_mpi,MATMPIAIJ));
4906   PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz));
4907   MatPreallocateEnd(dnz,onz);
4908   PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE));
4909 
4910   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4911   B_mpi->assembled  = PETSC_FALSE;
4912   merge->bi         = bi;
4913   merge->bj         = bj;
4914   merge->buf_ri     = buf_ri;
4915   merge->buf_rj     = buf_rj;
4916   merge->coi        = NULL;
4917   merge->coj        = NULL;
4918   merge->owners_co  = NULL;
4919 
4920   PetscCall(PetscCommDestroy(&comm));
4921 
4922   /* attach the supporting struct to B_mpi for reuse */
4923   PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container));
4924   PetscCall(PetscContainerSetPointer(container,merge));
4925   PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI));
4926   PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container));
4927   PetscCall(PetscContainerDestroy(&container));
4928   *mpimat = B_mpi;
4929 
4930   PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0));
4931   PetscFunctionReturn(0);
4932 }
4933 
4934 /*@C
4935       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4936                  matrices from each processor
4937 
4938     Collective
4939 
4940    Input Parameters:
4941 +    comm - the communicators the parallel matrix will live on
4942 .    seqmat - the input sequential matrices
4943 .    m - number of local rows (or PETSC_DECIDE)
4944 .    n - number of local columns (or PETSC_DECIDE)
4945 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4946 
4947    Output Parameter:
4948 .    mpimat - the parallel matrix generated
4949 
4950     Level: advanced
4951 
4952    Notes:
4953      The dimensions of the sequential matrix in each processor MUST be the same.
4954      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4955      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4956 @*/
4957 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4958 {
4959   PetscMPIInt    size;
4960 
4961   PetscFunctionBegin;
4962   PetscCallMPI(MPI_Comm_size(comm,&size));
4963   if (size == 1) {
4964     PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
4965     if (scall == MAT_INITIAL_MATRIX) {
4966       PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat));
4967     } else {
4968       PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN));
4969     }
4970     PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
4971     PetscFunctionReturn(0);
4972   }
4973   PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
4974   if (scall == MAT_INITIAL_MATRIX) {
4975     PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat));
4976   }
4977   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat));
4978   PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
4979   PetscFunctionReturn(0);
4980 }
4981 
4982 /*@
4983      MatAIJGetLocalMat - Creates a SeqAIJ from a MATAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4984           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4985           with MatGetSize()
4986 
4987     Not Collective
4988 
4989    Input Parameters:
4990 +    A - the matrix
4991 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4992 
4993    Output Parameter:
4994 .    A_loc - the local sequential matrix generated
4995 
4996     Level: developer
4997 
4998    Notes:
4999      In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix.
5000 
5001      Destroy the matrix with MatDestroy()
5002 
5003 .seealso: MatMPIAIJGetLocalMat()
5004 
5005 @*/
5006 PetscErrorCode MatAIJGetLocalMat(Mat A,Mat *A_loc)
5007 {
5008   PetscBool      mpi;
5009 
5010   PetscFunctionBegin;
5011   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&mpi));
5012   if (mpi) {
5013     PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,A_loc));
5014   } else {
5015     *A_loc = A;
5016     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5017   }
5018   PetscFunctionReturn(0);
5019 }
5020 
5021 /*@
5022      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5023           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5024           with MatGetSize()
5025 
5026     Not Collective
5027 
5028    Input Parameters:
5029 +    A - the matrix
5030 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5031 
5032    Output Parameter:
5033 .    A_loc - the local sequential matrix generated
5034 
5035     Level: developer
5036 
5037    Notes:
5038      In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix.
5039 
5040      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5041      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5042      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5043      modify the values of the returned A_loc.
5044 
5045 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5046 @*/
5047 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5048 {
5049   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5050   Mat_SeqAIJ        *mat,*a,*b;
5051   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5052   const PetscScalar *aa,*ba,*aav,*bav;
5053   PetscScalar       *ca,*cam;
5054   PetscMPIInt       size;
5055   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5056   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5057   PetscBool         match;
5058 
5059   PetscFunctionBegin;
5060   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match));
5061   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5062   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5063   if (size == 1) {
5064     if (scall == MAT_INITIAL_MATRIX) {
5065       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5066       *A_loc = mpimat->A;
5067     } else if (scall == MAT_REUSE_MATRIX) {
5068       PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN));
5069     }
5070     PetscFunctionReturn(0);
5071   }
5072 
5073   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5074   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5075   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5076   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5077   PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav));
5078   PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav));
5079   aa   = aav;
5080   ba   = bav;
5081   if (scall == MAT_INITIAL_MATRIX) {
5082     PetscCall(PetscMalloc1(1+am,&ci));
5083     ci[0] = 0;
5084     for (i=0; i<am; i++) {
5085       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5086     }
5087     PetscCall(PetscMalloc1(1+ci[am],&cj));
5088     PetscCall(PetscMalloc1(1+ci[am],&ca));
5089     k    = 0;
5090     for (i=0; i<am; i++) {
5091       ncols_o = bi[i+1] - bi[i];
5092       ncols_d = ai[i+1] - ai[i];
5093       /* off-diagonal portion of A */
5094       for (jo=0; jo<ncols_o; jo++) {
5095         col = cmap[*bj];
5096         if (col >= cstart) break;
5097         cj[k]   = col; bj++;
5098         ca[k++] = *ba++;
5099       }
5100       /* diagonal portion of A */
5101       for (j=0; j<ncols_d; j++) {
5102         cj[k]   = cstart + *aj++;
5103         ca[k++] = *aa++;
5104       }
5105       /* off-diagonal portion of A */
5106       for (j=jo; j<ncols_o; j++) {
5107         cj[k]   = cmap[*bj++];
5108         ca[k++] = *ba++;
5109       }
5110     }
5111     /* put together the new matrix */
5112     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc));
5113     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5114     /* Since these are PETSc arrays, change flags to free them as necessary. */
5115     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5116     mat->free_a  = PETSC_TRUE;
5117     mat->free_ij = PETSC_TRUE;
5118     mat->nonew   = 0;
5119   } else if (scall == MAT_REUSE_MATRIX) {
5120     mat  =(Mat_SeqAIJ*)(*A_loc)->data;
5121     ci   = mat->i;
5122     cj   = mat->j;
5123     PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam));
5124     for (i=0; i<am; i++) {
5125       /* off-diagonal portion of A */
5126       ncols_o = bi[i+1] - bi[i];
5127       for (jo=0; jo<ncols_o; jo++) {
5128         col = cmap[*bj];
5129         if (col >= cstart) break;
5130         *cam++ = *ba++; bj++;
5131       }
5132       /* diagonal portion of A */
5133       ncols_d = ai[i+1] - ai[i];
5134       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5135       /* off-diagonal portion of A */
5136       for (j=jo; j<ncols_o; j++) {
5137         *cam++ = *ba++; bj++;
5138       }
5139     }
5140     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam));
5141   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5142   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav));
5143   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav));
5144   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5145   PetscFunctionReturn(0);
5146 }
5147 
5148 /*@
5149      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5150           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5151 
5152     Not Collective
5153 
5154    Input Parameters:
5155 +    A - the matrix
5156 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5157 
5158    Output Parameters:
5159 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5160 -    A_loc - the local sequential matrix generated
5161 
5162     Level: developer
5163 
5164    Notes:
5165      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5166 
5167 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5168 
5169 @*/
5170 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5171 {
5172   Mat            Ao,Ad;
5173   const PetscInt *cmap;
5174   PetscMPIInt    size;
5175   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5176 
5177   PetscFunctionBegin;
5178   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap));
5179   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5180   if (size == 1) {
5181     if (scall == MAT_INITIAL_MATRIX) {
5182       PetscCall(PetscObjectReference((PetscObject)Ad));
5183       *A_loc = Ad;
5184     } else if (scall == MAT_REUSE_MATRIX) {
5185       PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN));
5186     }
5187     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob));
5188     PetscFunctionReturn(0);
5189   }
5190   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f));
5191   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5192   if (f) {
5193     PetscCall((*f)(A,scall,glob,A_loc));
5194   } else {
5195     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5196     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5197     Mat_SeqAIJ        *c;
5198     PetscInt          *ai = a->i, *aj = a->j;
5199     PetscInt          *bi = b->i, *bj = b->j;
5200     PetscInt          *ci,*cj;
5201     const PetscScalar *aa,*ba;
5202     PetscScalar       *ca;
5203     PetscInt          i,j,am,dn,on;
5204 
5205     PetscCall(MatGetLocalSize(Ad,&am,&dn));
5206     PetscCall(MatGetLocalSize(Ao,NULL,&on));
5207     PetscCall(MatSeqAIJGetArrayRead(Ad,&aa));
5208     PetscCall(MatSeqAIJGetArrayRead(Ao,&ba));
5209     if (scall == MAT_INITIAL_MATRIX) {
5210       PetscInt k;
5211       PetscCall(PetscMalloc1(1+am,&ci));
5212       PetscCall(PetscMalloc1(ai[am]+bi[am],&cj));
5213       PetscCall(PetscMalloc1(ai[am]+bi[am],&ca));
5214       ci[0] = 0;
5215       for (i=0,k=0; i<am; i++) {
5216         const PetscInt ncols_o = bi[i+1] - bi[i];
5217         const PetscInt ncols_d = ai[i+1] - ai[i];
5218         ci[i+1] = ci[i] + ncols_o + ncols_d;
5219         /* diagonal portion of A */
5220         for (j=0; j<ncols_d; j++,k++) {
5221           cj[k] = *aj++;
5222           ca[k] = *aa++;
5223         }
5224         /* off-diagonal portion of A */
5225         for (j=0; j<ncols_o; j++,k++) {
5226           cj[k] = dn + *bj++;
5227           ca[k] = *ba++;
5228         }
5229       }
5230       /* put together the new matrix */
5231       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc));
5232       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5233       /* Since these are PETSc arrays, change flags to free them as necessary. */
5234       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5235       c->free_a  = PETSC_TRUE;
5236       c->free_ij = PETSC_TRUE;
5237       c->nonew   = 0;
5238       PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name));
5239     } else if (scall == MAT_REUSE_MATRIX) {
5240       PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca));
5241       for (i=0; i<am; i++) {
5242         const PetscInt ncols_d = ai[i+1] - ai[i];
5243         const PetscInt ncols_o = bi[i+1] - bi[i];
5244         /* diagonal portion of A */
5245         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5246         /* off-diagonal portion of A */
5247         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5248       }
5249       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca));
5250     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5251     PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa));
5252     PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa));
5253     if (glob) {
5254       PetscInt cst, *gidx;
5255 
5256       PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL));
5257       PetscCall(PetscMalloc1(dn+on,&gidx));
5258       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5259       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5260       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob));
5261     }
5262   }
5263   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5264   PetscFunctionReturn(0);
5265 }
5266 
5267 /*@C
5268      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5269 
5270     Not Collective
5271 
5272    Input Parameters:
5273 +    A - the matrix
5274 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5275 -    row, col - index sets of rows and columns to extract (or NULL)
5276 
5277    Output Parameter:
5278 .    A_loc - the local sequential matrix generated
5279 
5280     Level: developer
5281 
5282 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5283 
5284 @*/
5285 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5286 {
5287   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5288   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5289   IS             isrowa,iscola;
5290   Mat            *aloc;
5291   PetscBool      match;
5292 
5293   PetscFunctionBegin;
5294   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match));
5295   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5296   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0));
5297   if (!row) {
5298     start = A->rmap->rstart; end = A->rmap->rend;
5299     PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa));
5300   } else {
5301     isrowa = *row;
5302   }
5303   if (!col) {
5304     start = A->cmap->rstart;
5305     cmap  = a->garray;
5306     nzA   = a->A->cmap->n;
5307     nzB   = a->B->cmap->n;
5308     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5309     ncols = 0;
5310     for (i=0; i<nzB; i++) {
5311       if (cmap[i] < start) idx[ncols++] = cmap[i];
5312       else break;
5313     }
5314     imark = i;
5315     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5316     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5317     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola));
5318   } else {
5319     iscola = *col;
5320   }
5321   if (scall != MAT_INITIAL_MATRIX) {
5322     PetscCall(PetscMalloc1(1,&aloc));
5323     aloc[0] = *A_loc;
5324   }
5325   PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc));
5326   if (!col) { /* attach global id of condensed columns */
5327     PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola));
5328   }
5329   *A_loc = aloc[0];
5330   PetscCall(PetscFree(aloc));
5331   if (!row) {
5332     PetscCall(ISDestroy(&isrowa));
5333   }
5334   if (!col) {
5335     PetscCall(ISDestroy(&iscola));
5336   }
5337   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0));
5338   PetscFunctionReturn(0);
5339 }
5340 
5341 /*
5342  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5343  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5344  * on a global size.
5345  * */
5346 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5347 {
5348   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5349   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5350   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5351   PetscMPIInt              owner;
5352   PetscSFNode              *iremote,*oiremote;
5353   const PetscInt           *lrowindices;
5354   PetscSF                  sf,osf;
5355   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5356   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5357   MPI_Comm                 comm;
5358   ISLocalToGlobalMapping   mapping;
5359   const PetscScalar        *pd_a,*po_a;
5360 
5361   PetscFunctionBegin;
5362   PetscCall(PetscObjectGetComm((PetscObject)P,&comm));
5363   /* plocalsize is the number of roots
5364    * nrows is the number of leaves
5365    * */
5366   PetscCall(MatGetLocalSize(P,&plocalsize,NULL));
5367   PetscCall(ISGetLocalSize(rows,&nrows));
5368   PetscCall(PetscCalloc1(nrows,&iremote));
5369   PetscCall(ISGetIndices(rows,&lrowindices));
5370   for (i=0;i<nrows;i++) {
5371     /* Find a remote index and an owner for a row
5372      * The row could be local or remote
5373      * */
5374     owner = 0;
5375     lidx  = 0;
5376     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx));
5377     iremote[i].index = lidx;
5378     iremote[i].rank  = owner;
5379   }
5380   /* Create SF to communicate how many nonzero columns for each row */
5381   PetscCall(PetscSFCreate(comm,&sf));
5382   /* SF will figure out the number of nonzero colunms for each row, and their
5383    * offsets
5384    * */
5385   PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5386   PetscCall(PetscSFSetFromOptions(sf));
5387   PetscCall(PetscSFSetUp(sf));
5388 
5389   PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets));
5390   PetscCall(PetscCalloc1(2*plocalsize,&nrcols));
5391   PetscCall(PetscCalloc1(nrows,&pnnz));
5392   roffsets[0] = 0;
5393   roffsets[1] = 0;
5394   for (i=0;i<plocalsize;i++) {
5395     /* diag */
5396     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5397     /* off diag */
5398     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5399     /* compute offsets so that we relative location for each row */
5400     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5401     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5402   }
5403   PetscCall(PetscCalloc1(2*nrows,&nlcols));
5404   PetscCall(PetscCalloc1(2*nrows,&loffsets));
5405   /* 'r' means root, and 'l' means leaf */
5406   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5407   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5408   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5409   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5410   PetscCall(PetscSFDestroy(&sf));
5411   PetscCall(PetscFree(roffsets));
5412   PetscCall(PetscFree(nrcols));
5413   dntotalcols = 0;
5414   ontotalcols = 0;
5415   ncol = 0;
5416   for (i=0;i<nrows;i++) {
5417     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5418     ncol = PetscMax(pnnz[i],ncol);
5419     /* diag */
5420     dntotalcols += nlcols[i*2+0];
5421     /* off diag */
5422     ontotalcols += nlcols[i*2+1];
5423   }
5424   /* We do not need to figure the right number of columns
5425    * since all the calculations will be done by going through the raw data
5426    * */
5427   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth));
5428   PetscCall(MatSetUp(*P_oth));
5429   PetscCall(PetscFree(pnnz));
5430   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5431   /* diag */
5432   PetscCall(PetscCalloc1(dntotalcols,&iremote));
5433   /* off diag */
5434   PetscCall(PetscCalloc1(ontotalcols,&oiremote));
5435   /* diag */
5436   PetscCall(PetscCalloc1(dntotalcols,&ilocal));
5437   /* off diag */
5438   PetscCall(PetscCalloc1(ontotalcols,&oilocal));
5439   dntotalcols = 0;
5440   ontotalcols = 0;
5441   ntotalcols  = 0;
5442   for (i=0;i<nrows;i++) {
5443     owner = 0;
5444     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL));
5445     /* Set iremote for diag matrix */
5446     for (j=0;j<nlcols[i*2+0];j++) {
5447       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5448       iremote[dntotalcols].rank    = owner;
5449       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5450       ilocal[dntotalcols++]        = ntotalcols++;
5451     }
5452     /* off diag */
5453     for (j=0;j<nlcols[i*2+1];j++) {
5454       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5455       oiremote[ontotalcols].rank    = owner;
5456       oilocal[ontotalcols++]        = ntotalcols++;
5457     }
5458   }
5459   PetscCall(ISRestoreIndices(rows,&lrowindices));
5460   PetscCall(PetscFree(loffsets));
5461   PetscCall(PetscFree(nlcols));
5462   PetscCall(PetscSFCreate(comm,&sf));
5463   /* P serves as roots and P_oth is leaves
5464    * Diag matrix
5465    * */
5466   PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5467   PetscCall(PetscSFSetFromOptions(sf));
5468   PetscCall(PetscSFSetUp(sf));
5469 
5470   PetscCall(PetscSFCreate(comm,&osf));
5471   /* Off diag */
5472   PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER));
5473   PetscCall(PetscSFSetFromOptions(osf));
5474   PetscCall(PetscSFSetUp(osf));
5475   PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5476   PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5477   /* We operate on the matrix internal data for saving memory */
5478   PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5479   PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5480   PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL));
5481   /* Convert to global indices for diag matrix */
5482   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5483   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5484   /* We want P_oth store global indices */
5485   PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping));
5486   /* Use memory scalable approach */
5487   PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH));
5488   PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j));
5489   PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5490   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5491   /* Convert back to local indices */
5492   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5493   PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5494   nout = 0;
5495   PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j));
5496   PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout);
5497   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5498   /* Exchange values */
5499   PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5500   PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5501   PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5502   PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5503   /* Stop PETSc from shrinking memory */
5504   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5505   PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY));
5506   PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY));
5507   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5508   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf));
5509   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf));
5510   PetscCall(PetscSFDestroy(&sf));
5511   PetscCall(PetscSFDestroy(&osf));
5512   PetscFunctionReturn(0);
5513 }
5514 
5515 /*
5516  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5517  * This supports MPIAIJ and MAIJ
5518  * */
5519 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5520 {
5521   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5522   Mat_SeqAIJ            *p_oth;
5523   IS                    rows,map;
5524   PetscHMapI            hamp;
5525   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5526   MPI_Comm              comm;
5527   PetscSF               sf,osf;
5528   PetscBool             has;
5529 
5530   PetscFunctionBegin;
5531   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5532   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0));
5533   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5534    *  and then create a submatrix (that often is an overlapping matrix)
5535    * */
5536   if (reuse == MAT_INITIAL_MATRIX) {
5537     /* Use a hash table to figure out unique keys */
5538     PetscCall(PetscHMapICreate(&hamp));
5539     PetscCall(PetscHMapIResize(hamp,a->B->cmap->n));
5540     PetscCall(PetscCalloc1(a->B->cmap->n,&mapping));
5541     count = 0;
5542     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5543     for (i=0;i<a->B->cmap->n;i++) {
5544       key  = a->garray[i]/dof;
5545       PetscCall(PetscHMapIHas(hamp,key,&has));
5546       if (!has) {
5547         mapping[i] = count;
5548         PetscCall(PetscHMapISet(hamp,key,count++));
5549       } else {
5550         /* Current 'i' has the same value the previous step */
5551         mapping[i] = count-1;
5552       }
5553     }
5554     PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map));
5555     PetscCall(PetscHMapIGetSize(hamp,&htsize));
5556     PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count);
5557     PetscCall(PetscCalloc1(htsize,&rowindices));
5558     off = 0;
5559     PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices));
5560     PetscCall(PetscHMapIDestroy(&hamp));
5561     PetscCall(PetscSortInt(htsize,rowindices));
5562     PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows));
5563     /* In case, the matrix was already created but users want to recreate the matrix */
5564     PetscCall(MatDestroy(P_oth));
5565     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth));
5566     PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map));
5567     PetscCall(ISDestroy(&map));
5568     PetscCall(ISDestroy(&rows));
5569   } else if (reuse == MAT_REUSE_MATRIX) {
5570     /* If matrix was already created, we simply update values using SF objects
5571      * that as attached to the matrix ealier.
5572      */
5573     const PetscScalar *pd_a,*po_a;
5574 
5575     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf));
5576     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf));
5577     PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5578     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5579     /* Update values in place */
5580     PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5581     PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5582     PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5583     PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5584     PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5585     PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5586     PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5587     PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5588   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5589   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0));
5590   PetscFunctionReturn(0);
5591 }
5592 
5593 /*@C
5594   MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5595 
5596   Collective on Mat
5597 
5598   Input Parameters:
5599 + A - the first matrix in mpiaij format
5600 . B - the second matrix in mpiaij format
5601 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5602 
5603   Output Parameters:
5604 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5605 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5606 - B_seq - the sequential matrix generated
5607 
5608   Level: developer
5609 
5610 @*/
5611 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5612 {
5613   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5614   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5615   IS             isrowb,iscolb;
5616   Mat            *bseq=NULL;
5617 
5618   PetscFunctionBegin;
5619   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5620     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5621   }
5622   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0));
5623 
5624   if (scall == MAT_INITIAL_MATRIX) {
5625     start = A->cmap->rstart;
5626     cmap  = a->garray;
5627     nzA   = a->A->cmap->n;
5628     nzB   = a->B->cmap->n;
5629     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5630     ncols = 0;
5631     for (i=0; i<nzB; i++) {  /* row < local row index */
5632       if (cmap[i] < start) idx[ncols++] = cmap[i];
5633       else break;
5634     }
5635     imark = i;
5636     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5637     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5638     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb));
5639     PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb));
5640   } else {
5641     PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5642     isrowb  = *rowb; iscolb = *colb;
5643     PetscCall(PetscMalloc1(1,&bseq));
5644     bseq[0] = *B_seq;
5645   }
5646   PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq));
5647   *B_seq = bseq[0];
5648   PetscCall(PetscFree(bseq));
5649   if (!rowb) {
5650     PetscCall(ISDestroy(&isrowb));
5651   } else {
5652     *rowb = isrowb;
5653   }
5654   if (!colb) {
5655     PetscCall(ISDestroy(&iscolb));
5656   } else {
5657     *colb = iscolb;
5658   }
5659   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0));
5660   PetscFunctionReturn(0);
5661 }
5662 
5663 /*
5664     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5665     of the OFF-DIAGONAL portion of local A
5666 
5667     Collective on Mat
5668 
5669    Input Parameters:
5670 +    A,B - the matrices in mpiaij format
5671 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5672 
5673    Output Parameter:
5674 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5675 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5676 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5677 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5678 
5679     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5680      for this matrix. This is not desirable..
5681 
5682     Level: developer
5683 
5684 */
5685 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5686 {
5687   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5688   Mat_SeqAIJ             *b_oth;
5689   VecScatter             ctx;
5690   MPI_Comm               comm;
5691   const PetscMPIInt      *rprocs,*sprocs;
5692   const PetscInt         *srow,*rstarts,*sstarts;
5693   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5694   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5695   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5696   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5697   PetscMPIInt            size,tag,rank,nreqs;
5698 
5699   PetscFunctionBegin;
5700   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5701   PetscCallMPI(MPI_Comm_size(comm,&size));
5702 
5703   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5704     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5705   }
5706   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0));
5707   PetscCallMPI(MPI_Comm_rank(comm,&rank));
5708 
5709   if (size == 1) {
5710     startsj_s = NULL;
5711     bufa_ptr  = NULL;
5712     *B_oth    = NULL;
5713     PetscFunctionReturn(0);
5714   }
5715 
5716   ctx = a->Mvctx;
5717   tag = ((PetscObject)ctx)->tag;
5718 
5719   PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs));
5720   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5721   PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs));
5722   PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs));
5723   PetscCall(PetscMalloc1(nreqs,&reqs));
5724   rwaits = reqs;
5725   swaits = reqs + nrecvs;
5726 
5727   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5728   if (scall == MAT_INITIAL_MATRIX) {
5729     /* i-array */
5730     /*---------*/
5731     /*  post receives */
5732     if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */
5733     for (i=0; i<nrecvs; i++) {
5734       rowlen = rvalues + rstarts[i]*rbs;
5735       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5736       PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5737     }
5738 
5739     /* pack the outgoing message */
5740     PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj));
5741 
5742     sstartsj[0] = 0;
5743     rstartsj[0] = 0;
5744     len         = 0; /* total length of j or a array to be sent */
5745     if (nsends) {
5746       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5747       PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues));
5748     }
5749     for (i=0; i<nsends; i++) {
5750       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5751       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5752       for (j=0; j<nrows; j++) {
5753         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5754         for (l=0; l<sbs; l++) {
5755           PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */
5756 
5757           rowlen[j*sbs+l] = ncols;
5758 
5759           len += ncols;
5760           PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL));
5761         }
5762         k++;
5763       }
5764       PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i));
5765 
5766       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5767     }
5768     /* recvs and sends of i-array are completed */
5769     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5770     PetscCall(PetscFree(svalues));
5771 
5772     /* allocate buffers for sending j and a arrays */
5773     PetscCall(PetscMalloc1(len+1,&bufj));
5774     PetscCall(PetscMalloc1(len+1,&bufa));
5775 
5776     /* create i-array of B_oth */
5777     PetscCall(PetscMalloc1(aBn+2,&b_othi));
5778 
5779     b_othi[0] = 0;
5780     len       = 0; /* total length of j or a array to be received */
5781     k         = 0;
5782     for (i=0; i<nrecvs; i++) {
5783       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5784       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5785       for (j=0; j<nrows; j++) {
5786         b_othi[k+1] = b_othi[k] + rowlen[j];
5787         PetscCall(PetscIntSumError(rowlen[j],len,&len));
5788         k++;
5789       }
5790       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5791     }
5792     PetscCall(PetscFree(rvalues));
5793 
5794     /* allocate space for j and a arrays of B_oth */
5795     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj));
5796     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha));
5797 
5798     /* j-array */
5799     /*---------*/
5800     /*  post receives of j-array */
5801     for (i=0; i<nrecvs; i++) {
5802       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5803       PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5804     }
5805 
5806     /* pack the outgoing message j-array */
5807     if (nsends) k = sstarts[0];
5808     for (i=0; i<nsends; i++) {
5809       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5810       bufJ  = bufj+sstartsj[i];
5811       for (j=0; j<nrows; j++) {
5812         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5813         for (ll=0; ll<sbs; ll++) {
5814           PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5815           for (l=0; l<ncols; l++) {
5816             *bufJ++ = cols[l];
5817           }
5818           PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5819         }
5820       }
5821       PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i));
5822     }
5823 
5824     /* recvs and sends of j-array are completed */
5825     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5826   } else if (scall == MAT_REUSE_MATRIX) {
5827     sstartsj = *startsj_s;
5828     rstartsj = *startsj_r;
5829     bufa     = *bufa_ptr;
5830     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5831     PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha));
5832   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5833 
5834   /* a-array */
5835   /*---------*/
5836   /*  post receives of a-array */
5837   for (i=0; i<nrecvs; i++) {
5838     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5839     PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i));
5840   }
5841 
5842   /* pack the outgoing message a-array */
5843   if (nsends) k = sstarts[0];
5844   for (i=0; i<nsends; i++) {
5845     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5846     bufA  = bufa+sstartsj[i];
5847     for (j=0; j<nrows; j++) {
5848       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5849       for (ll=0; ll<sbs; ll++) {
5850         PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5851         for (l=0; l<ncols; l++) {
5852           *bufA++ = vals[l];
5853         }
5854         PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5855       }
5856     }
5857     PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i));
5858   }
5859   /* recvs and sends of a-array are completed */
5860   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5861   PetscCall(PetscFree(reqs));
5862 
5863   if (scall == MAT_INITIAL_MATRIX) {
5864     /* put together the new matrix */
5865     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth));
5866 
5867     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5868     /* Since these are PETSc arrays, change flags to free them as necessary. */
5869     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5870     b_oth->free_a  = PETSC_TRUE;
5871     b_oth->free_ij = PETSC_TRUE;
5872     b_oth->nonew   = 0;
5873 
5874     PetscCall(PetscFree(bufj));
5875     if (!startsj_s || !bufa_ptr) {
5876       PetscCall(PetscFree2(sstartsj,rstartsj));
5877       PetscCall(PetscFree(bufa_ptr));
5878     } else {
5879       *startsj_s = sstartsj;
5880       *startsj_r = rstartsj;
5881       *bufa_ptr  = bufa;
5882     }
5883   } else if (scall == MAT_REUSE_MATRIX) {
5884     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha));
5885   }
5886 
5887   PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs));
5888   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs));
5889   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0));
5890   PetscFunctionReturn(0);
5891 }
5892 
5893 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5894 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5895 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5896 #if defined(PETSC_HAVE_MKL_SPARSE)
5897 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5898 #endif
5899 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5900 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5901 #if defined(PETSC_HAVE_ELEMENTAL)
5902 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5903 #endif
5904 #if defined(PETSC_HAVE_SCALAPACK)
5905 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5906 #endif
5907 #if defined(PETSC_HAVE_HYPRE)
5908 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5909 #endif
5910 #if defined(PETSC_HAVE_CUDA)
5911 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5912 #endif
5913 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5914 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5915 #endif
5916 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5917 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5918 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5919 
5920 /*
5921     Computes (B'*A')' since computing B*A directly is untenable
5922 
5923                n                       p                          p
5924         [             ]       [             ]         [                 ]
5925       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5926         [             ]       [             ]         [                 ]
5927 
5928 */
5929 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5930 {
5931   Mat            At,Bt,Ct;
5932 
5933   PetscFunctionBegin;
5934   PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At));
5935   PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt));
5936   PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct));
5937   PetscCall(MatDestroy(&At));
5938   PetscCall(MatDestroy(&Bt));
5939   PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C));
5940   PetscCall(MatDestroy(&Ct));
5941   PetscFunctionReturn(0);
5942 }
5943 
5944 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5945 {
5946   PetscBool      cisdense;
5947 
5948   PetscFunctionBegin;
5949   PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n);
5950   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N));
5951   PetscCall(MatSetBlockSizesFromMats(C,A,B));
5952   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
5953   if (!cisdense) {
5954     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
5955   }
5956   PetscCall(MatSetUp(C));
5957 
5958   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5959   PetscFunctionReturn(0);
5960 }
5961 
5962 /* ----------------------------------------------------------------*/
5963 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5964 {
5965   Mat_Product *product = C->product;
5966   Mat         A = product->A,B=product->B;
5967 
5968   PetscFunctionBegin;
5969   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5970     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5971 
5972   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5973   C->ops->productsymbolic = MatProductSymbolic_AB;
5974   PetscFunctionReturn(0);
5975 }
5976 
5977 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
5978 {
5979   Mat_Product    *product = C->product;
5980 
5981   PetscFunctionBegin;
5982   if (product->type == MATPRODUCT_AB) {
5983     PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
5984   }
5985   PetscFunctionReturn(0);
5986 }
5987 
5988 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
5989 
5990   Input Parameters:
5991 
5992     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
5993     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
5994 
5995     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
5996 
5997     For Set1, j1[] contains column indices of the nonzeros.
5998     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
5999     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6000     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6001 
6002     Similar for Set2.
6003 
6004     This routine merges the two sets of nonzeros row by row and removes repeats.
6005 
6006   Output Parameters: (memory is allocated by the caller)
6007 
6008     i[],j[]: the CSR of the merged matrix, which has m rows.
6009     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6010     imap2[]: similar to imap1[], but for Set2.
6011     Note we order nonzeros row-by-row and from left to right.
6012 */
6013 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[],
6014   const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[],
6015   PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[])
6016 {
6017   PetscInt       r,m; /* Row index of mat */
6018   PetscCount     t,t1,t2,b1,e1,b2,e2;
6019 
6020   PetscFunctionBegin;
6021   PetscCall(MatGetLocalSize(mat,&m,NULL));
6022   t1   = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6023   i[0] = 0;
6024   for (r=0; r<m; r++) { /* Do row by row merging */
6025     b1   = rowBegin1[r];
6026     e1   = rowEnd1[r];
6027     b2   = rowBegin2[r];
6028     e2   = rowEnd2[r];
6029     while (b1 < e1 && b2 < e2) {
6030       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6031         j[t]      = j1[b1];
6032         imap1[t1] = t;
6033         imap2[t2] = t;
6034         b1       += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */
6035         b2       += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6036         t1++; t2++; t++;
6037       } else if (j1[b1] < j2[b2]) {
6038         j[t]      = j1[b1];
6039         imap1[t1] = t;
6040         b1       += jmap1[t1+1] - jmap1[t1];
6041         t1++; t++;
6042       } else {
6043         j[t]      = j2[b2];
6044         imap2[t2] = t;
6045         b2       += jmap2[t2+1] - jmap2[t2];
6046         t2++; t++;
6047       }
6048     }
6049     /* Merge the remaining in either j1[] or j2[] */
6050     while (b1 < e1) {
6051       j[t]      = j1[b1];
6052       imap1[t1] = t;
6053       b1       += jmap1[t1+1] - jmap1[t1];
6054       t1++; t++;
6055     }
6056     while (b2 < e2) {
6057       j[t]      = j2[b2];
6058       imap2[t2] = t;
6059       b2       += jmap2[t2+1] - jmap2[t2];
6060       t2++; t++;
6061     }
6062     i[r+1] = t;
6063   }
6064   PetscFunctionReturn(0);
6065 }
6066 
6067 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6068 
6069   Input Parameters:
6070     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6071     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6072       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6073 
6074       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6075       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6076 
6077   Output Parameters:
6078     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6079     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6080       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6081       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6082 
6083     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6084       Atot: number of entries belonging to the diagonal block.
6085       Annz: number of unique nonzeros belonging to the diagonal block.
6086       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6087         repeats (i.e., same 'i,j' pair).
6088       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6089         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6090 
6091       Atot: number of entries belonging to the diagonal block
6092       Annz: number of unique nonzeros belonging to the diagonal block.
6093 
6094     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6095 
6096     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6097 */
6098 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[],
6099   PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[],
6100   PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_,
6101   PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_)
6102 {
6103   PetscInt          cstart,cend,rstart,rend,row,col;
6104   PetscCount        Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6105   PetscCount        Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6106   PetscCount        k,m,p,q,r,s,mid;
6107   PetscCount        *Aperm,*Bperm,*Ajmap,*Bjmap;
6108 
6109   PetscFunctionBegin;
6110   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6111   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6112   m    = rend - rstart;
6113 
6114   for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */
6115 
6116   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6117      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6118   */
6119   while (k<n) {
6120     row = i[k];
6121     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6122     for (s=k; s<n; s++) if (i[s] != row) break;
6123     for (p=k; p<s; p++) {
6124       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6125       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]);
6126     }
6127     PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k));
6128     PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6129     rowBegin[row-rstart] = k;
6130     rowMid[row-rstart]   = mid;
6131     rowEnd[row-rstart]   = s;
6132 
6133     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6134     Atot += mid - k;
6135     Btot += s - mid;
6136 
6137     /* Count unique nonzeros of this diag/offdiag row */
6138     for (p=k; p<mid;) {
6139       col = j[p];
6140       do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */
6141       Annz++;
6142     }
6143 
6144     for (p=mid; p<s;) {
6145       col = j[p];
6146       do {p++;} while (p<s && j[p] == col);
6147       Bnnz++;
6148     }
6149     k = s;
6150   }
6151 
6152   /* Allocation according to Atot, Btot, Annz, Bnnz */
6153   PetscCall(PetscMalloc1(Atot,&Aperm));
6154   PetscCall(PetscMalloc1(Btot,&Bperm));
6155   PetscCall(PetscMalloc1(Annz+1,&Ajmap));
6156   PetscCall(PetscMalloc1(Bnnz+1,&Bjmap));
6157 
6158   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6159   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6160   for (r=0; r<m; r++) {
6161     k     = rowBegin[r];
6162     mid   = rowMid[r];
6163     s     = rowEnd[r];
6164     PetscCall(PetscArraycpy(Aperm+Atot,perm+k,  mid-k));
6165     PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid));
6166     Atot += mid - k;
6167     Btot += s - mid;
6168 
6169     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6170     for (p=k; p<mid;) {
6171       col = j[p];
6172       q   = p;
6173       do {p++;} while (p<mid && j[p] == col);
6174       Ajmap[Annz+1] = Ajmap[Annz] + (p - q);
6175       Annz++;
6176     }
6177 
6178     for (p=mid; p<s;) {
6179       col = j[p];
6180       q   = p;
6181       do {p++;} while (p<s && j[p] == col);
6182       Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q);
6183       Bnnz++;
6184     }
6185   }
6186   /* Output */
6187   *Aperm_ = Aperm;
6188   *Annz_  = Annz;
6189   *Atot_  = Atot;
6190   *Ajmap_ = Ajmap;
6191   *Bperm_ = Bperm;
6192   *Bnnz_  = Bnnz;
6193   *Btot_  = Btot;
6194   *Bjmap_ = Bjmap;
6195   PetscFunctionReturn(0);
6196 }
6197 
6198 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6199 
6200   Input Parameters:
6201     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6202     nnz:  number of unique nonzeros in the merged matrix
6203     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6204     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6205 
6206   Output Parameter: (memory is allocated by the caller)
6207     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6208 
6209   Example:
6210     nnz1 = 4
6211     nnz  = 6
6212     imap = [1,3,4,5]
6213     jmap = [0,3,5,6,7]
6214    then,
6215     jmap_new = [0,0,3,3,5,6,7]
6216 */
6217 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[])
6218 {
6219   PetscCount k,p;
6220 
6221   PetscFunctionBegin;
6222   jmap_new[0] = 0;
6223   p = nnz; /* p loops over jmap_new[] backwards */
6224   for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */
6225     for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1];
6226   }
6227   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6228   PetscFunctionReturn(0);
6229 }
6230 
6231 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[])
6232 {
6233   MPI_Comm                  comm;
6234   PetscMPIInt               rank,size;
6235   PetscInt                  m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6236   PetscCount                k,p,q,rem; /* Loop variables over coo arrays */
6237   Mat_MPIAIJ                *mpiaij = (Mat_MPIAIJ*)mat->data;
6238 
6239   PetscFunctionBegin;
6240   PetscCall(PetscFree(mpiaij->garray));
6241   PetscCall(VecDestroy(&mpiaij->lvec));
6242 #if defined(PETSC_USE_CTABLE)
6243   PetscCall(PetscTableDestroy(&mpiaij->colmap));
6244 #else
6245   PetscCall(PetscFree(mpiaij->colmap));
6246 #endif
6247   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6248   mat->assembled = PETSC_FALSE;
6249   mat->was_assembled = PETSC_FALSE;
6250   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6251 
6252   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
6253   PetscCallMPI(MPI_Comm_size(comm,&size));
6254   PetscCallMPI(MPI_Comm_rank(comm,&rank));
6255   PetscCall(PetscLayoutSetUp(mat->rmap));
6256   PetscCall(PetscLayoutSetUp(mat->cmap));
6257   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6258   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6259   PetscCall(MatGetLocalSize(mat,&m,&n));
6260   PetscCall(MatGetSize(mat,&M,&N));
6261 
6262   /* ---------------------------------------------------------------------------*/
6263   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6264   /* entries come first, then local rows, then remote rows.                     */
6265   /* ---------------------------------------------------------------------------*/
6266   PetscCount n1 = coo_n,*perm1;
6267   PetscInt   *i1,*j1; /* Copies of input COOs along with a permutation array */
6268   PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1));
6269   PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */
6270   PetscCall(PetscArraycpy(j1,coo_j,n1));
6271   for (k=0; k<n1; k++) perm1[k] = k;
6272 
6273   /* Manipulate indices so that entries with negative row or col indices will have smallest
6274      row indices, local entries will have greater but negative row indices, and remote entries
6275      will have positive row indices.
6276   */
6277   for (k=0; k<n1; k++) {
6278     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */
6279     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6280     else PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6281     else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6282   }
6283 
6284   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6285   PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1));
6286   for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */
6287   PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */
6288   for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/
6289 
6290   /* ---------------------------------------------------------------------------*/
6291   /*           Split local rows into diag/offdiag portions                      */
6292   /* ---------------------------------------------------------------------------*/
6293   PetscCount   *rowBegin1,*rowMid1,*rowEnd1;
6294   PetscCount   *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1;
6295   PetscCount   Annz1,Bnnz1,Atot1,Btot1;
6296 
6297   PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1));
6298   PetscCall(PetscMalloc1(n1-rem,&Cperm1));
6299   PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1));
6300 
6301   /* ---------------------------------------------------------------------------*/
6302   /*           Send remote rows to their owner                                  */
6303   /* ---------------------------------------------------------------------------*/
6304   /* Find which rows should be sent to which remote ranks*/
6305   PetscInt       nsend = 0; /* Number of MPI ranks to send data to */
6306   PetscMPIInt    *sendto; /* [nsend], storing remote ranks */
6307   PetscInt       *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6308   const PetscInt *ranges;
6309   PetscInt       maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6310 
6311   PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges));
6312   PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries));
6313   for (k=rem; k<n1;) {
6314     PetscMPIInt  owner;
6315     PetscInt     firstRow,lastRow;
6316 
6317     /* Locate a row range */
6318     firstRow = i1[k]; /* first row of this owner */
6319     PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner));
6320     lastRow  = ranges[owner+1]-1; /* last row of this owner */
6321 
6322     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6323     PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p));
6324 
6325     /* All entries in [k,p) belong to this remote owner */
6326     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6327       PetscMPIInt *sendto2;
6328       PetscInt    *nentries2;
6329       PetscInt    maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size;
6330 
6331       PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2));
6332       PetscCall(PetscArraycpy(sendto2,sendto,maxNsend));
6333       PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1));
6334       PetscCall(PetscFree2(sendto,nentries2));
6335       sendto      = sendto2;
6336       nentries    = nentries2;
6337       maxNsend    = maxNsend2;
6338     }
6339     sendto[nsend]   = owner;
6340     nentries[nsend] = p - k;
6341     PetscCall(PetscCountCast(p-k,&nentries[nsend]));
6342     nsend++;
6343     k = p;
6344   }
6345 
6346   /* Build 1st SF to know offsets on remote to send data */
6347   PetscSF     sf1;
6348   PetscInt    nroots = 1,nroots2 = 0;
6349   PetscInt    nleaves = nsend,nleaves2 = 0;
6350   PetscInt    *offsets;
6351   PetscSFNode *iremote;
6352 
6353   PetscCall(PetscSFCreate(comm,&sf1));
6354   PetscCall(PetscMalloc1(nsend,&iremote));
6355   PetscCall(PetscMalloc1(nsend,&offsets));
6356   for (k=0; k<nsend; k++) {
6357     iremote[k].rank  = sendto[k];
6358     iremote[k].index = 0;
6359     nleaves2        += nentries[k];
6360     PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt");
6361   }
6362   PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6363   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM));
6364   PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6365   PetscCall(PetscSFDestroy(&sf1));
6366   PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem);
6367 
6368   /* Build 2nd SF to send remote COOs to their owner */
6369   PetscSF sf2;
6370   nroots  = nroots2;
6371   nleaves = nleaves2;
6372   PetscCall(PetscSFCreate(comm,&sf2));
6373   PetscCall(PetscSFSetFromOptions(sf2));
6374   PetscCall(PetscMalloc1(nleaves,&iremote));
6375   p       = 0;
6376   for (k=0; k<nsend; k++) {
6377     PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt");
6378     for (q=0; q<nentries[k]; q++,p++) {
6379       iremote[p].rank  = sendto[k];
6380       iremote[p].index = offsets[k] + q;
6381     }
6382   }
6383   PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6384 
6385   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6386   PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem));
6387 
6388   /* Send the remote COOs to their owner */
6389   PetscInt   n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6390   PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6391   PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2));
6392   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE));
6393   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE));
6394   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE));
6395   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE));
6396 
6397   PetscCall(PetscFree(offsets));
6398   PetscCall(PetscFree2(sendto,nentries));
6399 
6400   /* ---------------------------------------------------------------*/
6401   /* Sort received COOs by row along with the permutation array     */
6402   /* ---------------------------------------------------------------*/
6403   for (k=0; k<n2; k++) perm2[k] = k;
6404   PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2));
6405 
6406   /* ---------------------------------------------------------------*/
6407   /* Split received COOs into diag/offdiag portions                 */
6408   /* ---------------------------------------------------------------*/
6409   PetscCount  *rowBegin2,*rowMid2,*rowEnd2;
6410   PetscCount  *Ajmap2,*Aperm2,*Bjmap2,*Bperm2;
6411   PetscCount  Annz2,Bnnz2,Atot2,Btot2;
6412 
6413   PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2));
6414   PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2));
6415 
6416   /* --------------------------------------------------------------------------*/
6417   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6418   /* --------------------------------------------------------------------------*/
6419   PetscInt   *Ai,*Bi;
6420   PetscInt   *Aj,*Bj;
6421 
6422   PetscCall(PetscMalloc1(m+1,&Ai));
6423   PetscCall(PetscMalloc1(m+1,&Bi));
6424   PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6425   PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj));
6426 
6427   PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2;
6428   PetscCall(PetscMalloc1(Annz1,&Aimap1));
6429   PetscCall(PetscMalloc1(Bnnz1,&Bimap1));
6430   PetscCall(PetscMalloc1(Annz2,&Aimap2));
6431   PetscCall(PetscMalloc1(Bnnz2,&Bimap2));
6432 
6433   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj));
6434   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1,  rowEnd1,rowMid2,  rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj));
6435 
6436   /* --------------------------------------------------------------------------*/
6437   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6438   /* expect nonzeros in A/B most likely have local contributing entries        */
6439   /* --------------------------------------------------------------------------*/
6440   PetscInt Annz = Ai[m];
6441   PetscInt Bnnz = Bi[m];
6442   PetscCount *Ajmap1_new,*Bjmap1_new;
6443 
6444   PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new));
6445   PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new));
6446 
6447   PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new));
6448   PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new));
6449 
6450   PetscCall(PetscFree(Aimap1));
6451   PetscCall(PetscFree(Ajmap1));
6452   PetscCall(PetscFree(Bimap1));
6453   PetscCall(PetscFree(Bjmap1));
6454   PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1));
6455   PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2));
6456   PetscCall(PetscFree3(i1,j1,perm1));
6457   PetscCall(PetscFree3(i2,j2,perm2));
6458 
6459   Ajmap1 = Ajmap1_new;
6460   Bjmap1 = Bjmap1_new;
6461 
6462   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6463   if (Annz < Annz1 + Annz2) {
6464     PetscInt *Aj_new;
6465     PetscCall(PetscMalloc1(Annz,&Aj_new));
6466     PetscCall(PetscArraycpy(Aj_new,Aj,Annz));
6467     PetscCall(PetscFree(Aj));
6468     Aj   = Aj_new;
6469   }
6470 
6471   if (Bnnz < Bnnz1 + Bnnz2) {
6472     PetscInt *Bj_new;
6473     PetscCall(PetscMalloc1(Bnnz,&Bj_new));
6474     PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz));
6475     PetscCall(PetscFree(Bj));
6476     Bj   = Bj_new;
6477   }
6478 
6479   /* --------------------------------------------------------------------------------*/
6480   /* Create new submatrices for on-process and off-process coupling                  */
6481   /* --------------------------------------------------------------------------------*/
6482   PetscScalar   *Aa,*Ba;
6483   MatType       rtype;
6484   Mat_SeqAIJ    *a,*b;
6485   PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */
6486   PetscCall(PetscCalloc1(Bnnz,&Ba));
6487   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6488   if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;}
6489   PetscCall(MatDestroy(&mpiaij->A));
6490   PetscCall(MatDestroy(&mpiaij->B));
6491   PetscCall(MatGetRootType_Private(mat,&rtype));
6492   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A));
6493   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B));
6494   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6495 
6496   a = (Mat_SeqAIJ*)mpiaij->A->data;
6497   b = (Mat_SeqAIJ*)mpiaij->B->data;
6498   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6499   a->free_a       = b->free_a       = PETSC_TRUE;
6500   a->free_ij      = b->free_ij      = PETSC_TRUE;
6501 
6502   /* conversion must happen AFTER multiply setup */
6503   PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A));
6504   PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B));
6505   PetscCall(VecDestroy(&mpiaij->lvec));
6506   PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL));
6507   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec));
6508 
6509   mpiaij->coo_n   = coo_n;
6510   mpiaij->coo_sf  = sf2;
6511   mpiaij->sendlen = nleaves;
6512   mpiaij->recvlen = nroots;
6513 
6514   mpiaij->Annz    = Annz;
6515   mpiaij->Bnnz    = Bnnz;
6516 
6517   mpiaij->Annz2   = Annz2;
6518   mpiaij->Bnnz2   = Bnnz2;
6519 
6520   mpiaij->Atot1   = Atot1;
6521   mpiaij->Atot2   = Atot2;
6522   mpiaij->Btot1   = Btot1;
6523   mpiaij->Btot2   = Btot2;
6524 
6525   mpiaij->Ajmap1  = Ajmap1;
6526   mpiaij->Aperm1  = Aperm1;
6527 
6528   mpiaij->Bjmap1  = Bjmap1;
6529   mpiaij->Bperm1  = Bperm1;
6530 
6531   mpiaij->Aimap2  = Aimap2;
6532   mpiaij->Ajmap2  = Ajmap2;
6533   mpiaij->Aperm2  = Aperm2;
6534 
6535   mpiaij->Bimap2  = Bimap2;
6536   mpiaij->Bjmap2  = Bjmap2;
6537   mpiaij->Bperm2  = Bperm2;
6538 
6539   mpiaij->Cperm1  = Cperm1;
6540 
6541   /* Allocate in preallocation. If not used, it has zero cost on host */
6542   PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf));
6543   PetscFunctionReturn(0);
6544 }
6545 
6546 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode)
6547 {
6548   Mat_MPIAIJ           *mpiaij = (Mat_MPIAIJ*)mat->data;
6549   Mat                  A = mpiaij->A,B = mpiaij->B;
6550   PetscCount           Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2;
6551   PetscScalar          *Aa,*Ba;
6552   PetscScalar          *sendbuf = mpiaij->sendbuf;
6553   PetscScalar          *recvbuf = mpiaij->recvbuf;
6554   const PetscCount     *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2;
6555   const PetscCount     *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2;
6556   const PetscCount     *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2;
6557   const PetscCount     *Cperm1 = mpiaij->Cperm1;
6558 
6559   PetscFunctionBegin;
6560   PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */
6561   PetscCall(MatSeqAIJGetArray(B,&Ba));
6562 
6563   /* Pack entries to be sent to remote */
6564   for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6565 
6566   /* Send remote entries to their owner and overlap the communication with local computation */
6567   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE));
6568   /* Add local entries to A and B */
6569   for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6570     PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */
6571     for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]];
6572     Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum;
6573   }
6574   for (PetscCount i=0; i<Bnnz; i++) {
6575     PetscScalar sum = 0.0;
6576     for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]];
6577     Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum;
6578   }
6579   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE));
6580 
6581   /* Add received remote entries to A and B */
6582   for (PetscCount i=0; i<Annz2; i++) {
6583     for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6584   }
6585   for (PetscCount i=0; i<Bnnz2; i++) {
6586     for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6587   }
6588   PetscCall(MatSeqAIJRestoreArray(A,&Aa));
6589   PetscCall(MatSeqAIJRestoreArray(B,&Ba));
6590   PetscFunctionReturn(0);
6591 }
6592 
6593 /* ----------------------------------------------------------------*/
6594 
6595 /*MC
6596    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6597 
6598    Options Database Keys:
6599 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6600 
6601    Level: beginner
6602 
6603    Notes:
6604     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6605     in this case the values associated with the rows and columns one passes in are set to zero
6606     in the matrix
6607 
6608     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6609     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6610 
6611 .seealso: `MatCreateAIJ()`
6612 M*/
6613 
6614 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6615 {
6616   Mat_MPIAIJ     *b;
6617   PetscMPIInt    size;
6618 
6619   PetscFunctionBegin;
6620   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
6621 
6622   PetscCall(PetscNewLog(B,&b));
6623   B->data       = (void*)b;
6624   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
6625   B->assembled  = PETSC_FALSE;
6626   B->insertmode = NOT_SET_VALUES;
6627   b->size       = size;
6628 
6629   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
6630 
6631   /* build cache for off array entries formed */
6632   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
6633 
6634   b->donotstash  = PETSC_FALSE;
6635   b->colmap      = NULL;
6636   b->garray      = NULL;
6637   b->roworiented = PETSC_TRUE;
6638 
6639   /* stuff used for matrix vector multiply */
6640   b->lvec  = NULL;
6641   b->Mvctx = NULL;
6642 
6643   /* stuff for MatGetRow() */
6644   b->rowindices   = NULL;
6645   b->rowvalues    = NULL;
6646   b->getrowactive = PETSC_FALSE;
6647 
6648   /* flexible pointer used in CUSPARSE classes */
6649   b->spptr = NULL;
6650 
6651   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6652   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ));
6653   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ));
6654   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ));
6655   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ));
6656   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ));
6657   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ));
6658   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ));
6659   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM));
6660   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL));
6661 #if defined(PETSC_HAVE_CUDA)
6662   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6663 #endif
6664 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6665   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos));
6666 #endif
6667 #if defined(PETSC_HAVE_MKL_SPARSE)
6668   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL));
6669 #endif
6670   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL));
6671   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ));
6672   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ));
6673   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
6674 #if defined(PETSC_HAVE_ELEMENTAL)
6675   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental));
6676 #endif
6677 #if defined(PETSC_HAVE_SCALAPACK)
6678   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK));
6679 #endif
6680   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS));
6681   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL));
6682 #if defined(PETSC_HAVE_HYPRE)
6683   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE));
6684   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ));
6685 #endif
6686   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ));
6687   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ));
6688   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ));
6689   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ));
6690   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ));
6691   PetscFunctionReturn(0);
6692 }
6693 
6694 /*@C
6695      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6696          and "off-diagonal" part of the matrix in CSR format.
6697 
6698    Collective
6699 
6700    Input Parameters:
6701 +  comm - MPI communicator
6702 .  m - number of local rows (Cannot be PETSC_DECIDE)
6703 .  n - This value should be the same as the local size used in creating the
6704        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6705        calculated if N is given) For square matrices n is almost always m.
6706 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6707 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6708 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6709 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6710 .   a - matrix values
6711 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6712 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6713 -   oa - matrix values
6714 
6715    Output Parameter:
6716 .   mat - the matrix
6717 
6718    Level: advanced
6719 
6720    Notes:
6721        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6722        must free the arrays once the matrix has been destroyed and not before.
6723 
6724        The i and j indices are 0 based
6725 
6726        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6727 
6728        This sets local rows and cannot be used to set off-processor values.
6729 
6730        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6731        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6732        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6733        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6734        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6735        communication if it is known that only local entries will be set.
6736 
6737 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6738           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6739 @*/
6740 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6741 {
6742   Mat_MPIAIJ     *maij;
6743 
6744   PetscFunctionBegin;
6745   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6746   PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6747   PetscCheck(oi[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6748   PetscCall(MatCreate(comm,mat));
6749   PetscCall(MatSetSizes(*mat,m,n,M,N));
6750   PetscCall(MatSetType(*mat,MATMPIAIJ));
6751   maij = (Mat_MPIAIJ*) (*mat)->data;
6752 
6753   (*mat)->preallocated = PETSC_TRUE;
6754 
6755   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6756   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6757 
6758   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A));
6759   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B));
6760 
6761   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
6762   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
6763   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
6764   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
6765   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
6766   PetscFunctionReturn(0);
6767 }
6768 
6769 typedef struct {
6770   Mat       *mp;    /* intermediate products */
6771   PetscBool *mptmp; /* is the intermediate product temporary ? */
6772   PetscInt  cp;     /* number of intermediate products */
6773 
6774   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6775   PetscInt    *startsj_s,*startsj_r;
6776   PetscScalar *bufa;
6777   Mat         P_oth;
6778 
6779   /* may take advantage of merging product->B */
6780   Mat Bloc; /* B-local by merging diag and off-diag */
6781 
6782   /* cusparse does not have support to split between symbolic and numeric phases.
6783      When api_user is true, we don't need to update the numerical values
6784      of the temporary storage */
6785   PetscBool reusesym;
6786 
6787   /* support for COO values insertion */
6788   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6789   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6790   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6791   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6792   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6793   PetscMemType mtype;
6794 
6795   /* customization */
6796   PetscBool abmerge;
6797   PetscBool P_oth_bind;
6798 } MatMatMPIAIJBACKEND;
6799 
6800 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6801 {
6802   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6803   PetscInt            i;
6804 
6805   PetscFunctionBegin;
6806   PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r));
6807   PetscCall(PetscFree(mmdata->bufa));
6808   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v));
6809   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w));
6810   PetscCall(MatDestroy(&mmdata->P_oth));
6811   PetscCall(MatDestroy(&mmdata->Bloc));
6812   PetscCall(PetscSFDestroy(&mmdata->sf));
6813   for (i = 0; i < mmdata->cp; i++) {
6814     PetscCall(MatDestroy(&mmdata->mp[i]));
6815   }
6816   PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp));
6817   PetscCall(PetscFree(mmdata->own[0]));
6818   PetscCall(PetscFree(mmdata->own));
6819   PetscCall(PetscFree(mmdata->off[0]));
6820   PetscCall(PetscFree(mmdata->off));
6821   PetscCall(PetscFree(mmdata));
6822   PetscFunctionReturn(0);
6823 }
6824 
6825 /* Copy selected n entries with indices in idx[] of A to v[].
6826    If idx is NULL, copy the whole data array of A to v[]
6827  */
6828 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6829 {
6830   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6831 
6832   PetscFunctionBegin;
6833   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f));
6834   if (f) {
6835     PetscCall((*f)(A,n,idx,v));
6836   } else {
6837     const PetscScalar *vv;
6838 
6839     PetscCall(MatSeqAIJGetArrayRead(A,&vv));
6840     if (n && idx) {
6841       PetscScalar    *w = v;
6842       const PetscInt *oi = idx;
6843       PetscInt       j;
6844 
6845       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6846     } else {
6847       PetscCall(PetscArraycpy(v,vv,n));
6848     }
6849     PetscCall(MatSeqAIJRestoreArrayRead(A,&vv));
6850   }
6851   PetscFunctionReturn(0);
6852 }
6853 
6854 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6855 {
6856   MatMatMPIAIJBACKEND *mmdata;
6857   PetscInt            i,n_d,n_o;
6858 
6859   PetscFunctionBegin;
6860   MatCheckProduct(C,1);
6861   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6862   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6863   if (!mmdata->reusesym) { /* update temporary matrices */
6864     if (mmdata->P_oth) {
6865       PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
6866     }
6867     if (mmdata->Bloc) {
6868       PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc));
6869     }
6870   }
6871   mmdata->reusesym = PETSC_FALSE;
6872 
6873   for (i = 0; i < mmdata->cp; i++) {
6874     PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6875     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
6876   }
6877   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6878     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6879 
6880     if (mmdata->mptmp[i]) continue;
6881     if (noff) {
6882       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6883 
6884       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o));
6885       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d));
6886       n_o += noff;
6887       n_d += nown;
6888     } else {
6889       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6890 
6891       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d));
6892       n_d += mm->nz;
6893     }
6894   }
6895   if (mmdata->hasoffproc) { /* offprocess insertion */
6896     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6897     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6898   }
6899   PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES));
6900   PetscFunctionReturn(0);
6901 }
6902 
6903 /* Support for Pt * A, A * P, or Pt * A * P */
6904 #define MAX_NUMBER_INTERMEDIATE 4
6905 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6906 {
6907   Mat_Product            *product = C->product;
6908   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6909   Mat_MPIAIJ             *a,*p;
6910   MatMatMPIAIJBACKEND    *mmdata;
6911   ISLocalToGlobalMapping P_oth_l2g = NULL;
6912   IS                     glob = NULL;
6913   const char             *prefix;
6914   char                   pprefix[256];
6915   const PetscInt         *globidx,*P_oth_idx;
6916   PetscInt               i,j,cp,m,n,M,N,*coo_i,*coo_j;
6917   PetscCount             ncoo,ncoo_d,ncoo_o,ncoo_oown;
6918   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6919                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
6920                                                                                         /* a base offset; type-2: sparse with a local to global map table */
6921   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6922 
6923   MatProductType         ptype;
6924   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6925   PetscMPIInt            size;
6926 
6927   PetscFunctionBegin;
6928   MatCheckProduct(C,1);
6929   PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6930   ptype = product->type;
6931   if (product->A->symmetric && ptype == MATPRODUCT_AtB) {
6932     ptype = MATPRODUCT_AB;
6933     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6934   }
6935   switch (ptype) {
6936   case MATPRODUCT_AB:
6937     A = product->A;
6938     P = product->B;
6939     m = A->rmap->n;
6940     n = P->cmap->n;
6941     M = A->rmap->N;
6942     N = P->cmap->N;
6943     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6944     break;
6945   case MATPRODUCT_AtB:
6946     P = product->A;
6947     A = product->B;
6948     m = P->cmap->n;
6949     n = A->cmap->n;
6950     M = P->cmap->N;
6951     N = A->cmap->N;
6952     hasoffproc = PETSC_TRUE;
6953     break;
6954   case MATPRODUCT_PtAP:
6955     A = product->A;
6956     P = product->B;
6957     m = P->cmap->n;
6958     n = P->cmap->n;
6959     M = P->cmap->N;
6960     N = P->cmap->N;
6961     hasoffproc = PETSC_TRUE;
6962     break;
6963   default:
6964     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6965   }
6966   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size));
6967   if (size == 1) hasoffproc = PETSC_FALSE;
6968 
6969   /* defaults */
6970   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6971     mp[i]    = NULL;
6972     mptmp[i] = PETSC_FALSE;
6973     rmapt[i] = -1;
6974     cmapt[i] = -1;
6975     rmapa[i] = NULL;
6976     cmapa[i] = NULL;
6977   }
6978 
6979   /* customization */
6980   PetscCall(PetscNew(&mmdata));
6981   mmdata->reusesym = product->api_user;
6982   if (ptype == MATPRODUCT_AB) {
6983     if (product->api_user) {
6984       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");
6985       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
6986       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6987       PetscOptionsEnd();
6988     } else {
6989       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");
6990       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
6991       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6992       PetscOptionsEnd();
6993     }
6994   } else if (ptype == MATPRODUCT_PtAP) {
6995     if (product->api_user) {
6996       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");
6997       PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6998       PetscOptionsEnd();
6999     } else {
7000       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
7001       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7002       PetscOptionsEnd();
7003     }
7004   }
7005   a = (Mat_MPIAIJ*)A->data;
7006   p = (Mat_MPIAIJ*)P->data;
7007   PetscCall(MatSetSizes(C,m,n,M,N));
7008   PetscCall(PetscLayoutSetUp(C->rmap));
7009   PetscCall(PetscLayoutSetUp(C->cmap));
7010   PetscCall(MatSetType(C,((PetscObject)A)->type_name));
7011   PetscCall(MatGetOptionsPrefix(C,&prefix));
7012 
7013   cp   = 0;
7014   switch (ptype) {
7015   case MATPRODUCT_AB: /* A * P */
7016     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
7017 
7018     /* A_diag * P_local (merged or not) */
7019     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7020       /* P is product->B */
7021       PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7022       PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
7023       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7024       PetscCall(MatProductSetFill(mp[cp],product->fill));
7025       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7026       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7027       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7028       mp[cp]->product->api_user = product->api_user;
7029       PetscCall(MatProductSetFromOptions(mp[cp]));
7030       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7031       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7032       PetscCall(ISGetIndices(glob,&globidx));
7033       rmapt[cp] = 1;
7034       cmapt[cp] = 2;
7035       cmapa[cp] = globidx;
7036       mptmp[cp] = PETSC_FALSE;
7037       cp++;
7038     } else { /* A_diag * P_diag and A_diag * P_off */
7039       PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp]));
7040       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7041       PetscCall(MatProductSetFill(mp[cp],product->fill));
7042       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7043       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7044       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7045       mp[cp]->product->api_user = product->api_user;
7046       PetscCall(MatProductSetFromOptions(mp[cp]));
7047       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7048       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7049       rmapt[cp] = 1;
7050       cmapt[cp] = 1;
7051       mptmp[cp] = PETSC_FALSE;
7052       cp++;
7053       PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp]));
7054       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7055       PetscCall(MatProductSetFill(mp[cp],product->fill));
7056       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7057       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7058       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7059       mp[cp]->product->api_user = product->api_user;
7060       PetscCall(MatProductSetFromOptions(mp[cp]));
7061       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7062       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7063       rmapt[cp] = 1;
7064       cmapt[cp] = 2;
7065       cmapa[cp] = p->garray;
7066       mptmp[cp] = PETSC_FALSE;
7067       cp++;
7068     }
7069 
7070     /* A_off * P_other */
7071     if (mmdata->P_oth) {
7072       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */
7073       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7074       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7075       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7076       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7077       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7078       PetscCall(MatProductSetFill(mp[cp],product->fill));
7079       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7080       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7081       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7082       mp[cp]->product->api_user = product->api_user;
7083       PetscCall(MatProductSetFromOptions(mp[cp]));
7084       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7085       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7086       rmapt[cp] = 1;
7087       cmapt[cp] = 2;
7088       cmapa[cp] = P_oth_idx;
7089       mptmp[cp] = PETSC_FALSE;
7090       cp++;
7091     }
7092     break;
7093 
7094   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7095     /* A is product->B */
7096     PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7097     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7098       PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]));
7099       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7100       PetscCall(MatProductSetFill(mp[cp],product->fill));
7101       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7102       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7103       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7104       mp[cp]->product->api_user = product->api_user;
7105       PetscCall(MatProductSetFromOptions(mp[cp]));
7106       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7107       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7108       PetscCall(ISGetIndices(glob,&globidx));
7109       rmapt[cp] = 2;
7110       rmapa[cp] = globidx;
7111       cmapt[cp] = 2;
7112       cmapa[cp] = globidx;
7113       mptmp[cp] = PETSC_FALSE;
7114       cp++;
7115     } else {
7116       PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]));
7117       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7118       PetscCall(MatProductSetFill(mp[cp],product->fill));
7119       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7120       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7121       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7122       mp[cp]->product->api_user = product->api_user;
7123       PetscCall(MatProductSetFromOptions(mp[cp]));
7124       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7125       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7126       PetscCall(ISGetIndices(glob,&globidx));
7127       rmapt[cp] = 1;
7128       cmapt[cp] = 2;
7129       cmapa[cp] = globidx;
7130       mptmp[cp] = PETSC_FALSE;
7131       cp++;
7132       PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]));
7133       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7134       PetscCall(MatProductSetFill(mp[cp],product->fill));
7135       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7136       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7137       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7138       mp[cp]->product->api_user = product->api_user;
7139       PetscCall(MatProductSetFromOptions(mp[cp]));
7140       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7141       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7142       rmapt[cp] = 2;
7143       rmapa[cp] = p->garray;
7144       cmapt[cp] = 2;
7145       cmapa[cp] = globidx;
7146       mptmp[cp] = PETSC_FALSE;
7147       cp++;
7148     }
7149     break;
7150   case MATPRODUCT_PtAP:
7151     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
7152     /* P is product->B */
7153     PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7154     PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
7155     PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP));
7156     PetscCall(MatProductSetFill(mp[cp],product->fill));
7157     PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7158     PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7159     PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7160     mp[cp]->product->api_user = product->api_user;
7161     PetscCall(MatProductSetFromOptions(mp[cp]));
7162     PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7163     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7164     PetscCall(ISGetIndices(glob,&globidx));
7165     rmapt[cp] = 2;
7166     rmapa[cp] = globidx;
7167     cmapt[cp] = 2;
7168     cmapa[cp] = globidx;
7169     mptmp[cp] = PETSC_FALSE;
7170     cp++;
7171     if (mmdata->P_oth) {
7172       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g));
7173       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7174       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7175       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7176       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7177       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7178       PetscCall(MatProductSetFill(mp[cp],product->fill));
7179       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7180       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7181       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7182       mp[cp]->product->api_user = product->api_user;
7183       PetscCall(MatProductSetFromOptions(mp[cp]));
7184       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7185       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7186       mptmp[cp] = PETSC_TRUE;
7187       cp++;
7188       PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]));
7189       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7190       PetscCall(MatProductSetFill(mp[cp],product->fill));
7191       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7192       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7193       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7194       mp[cp]->product->api_user = product->api_user;
7195       PetscCall(MatProductSetFromOptions(mp[cp]));
7196       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7197       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7198       rmapt[cp] = 2;
7199       rmapa[cp] = globidx;
7200       cmapt[cp] = 2;
7201       cmapa[cp] = P_oth_idx;
7202       mptmp[cp] = PETSC_FALSE;
7203       cp++;
7204     }
7205     break;
7206   default:
7207     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
7208   }
7209   /* sanity check */
7210   if (size > 1) for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i);
7211 
7212   PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp));
7213   for (i = 0; i < cp; i++) {
7214     mmdata->mp[i]    = mp[i];
7215     mmdata->mptmp[i] = mptmp[i];
7216   }
7217   mmdata->cp = cp;
7218   C->product->data       = mmdata;
7219   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7220   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7221 
7222   /* memory type */
7223   mmdata->mtype = PETSC_MEMTYPE_HOST;
7224   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,""));
7225   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,""));
7226   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7227   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7228 
7229   /* prepare coo coordinates for values insertion */
7230 
7231   /* count total nonzeros of those intermediate seqaij Mats
7232     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7233     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7234     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7235   */
7236   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7237     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7238     if (mptmp[cp]) continue;
7239     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7240       const PetscInt *rmap = rmapa[cp];
7241       const PetscInt mr = mp[cp]->rmap->n;
7242       const PetscInt rs = C->rmap->rstart;
7243       const PetscInt re = C->rmap->rend;
7244       const PetscInt *ii  = mm->i;
7245       for (i = 0; i < mr; i++) {
7246         const PetscInt gr = rmap[i];
7247         const PetscInt nz = ii[i+1] - ii[i];
7248         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7249         else ncoo_oown += nz; /* this row is local */
7250       }
7251     } else ncoo_d += mm->nz;
7252   }
7253 
7254   /*
7255     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7256 
7257     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7258 
7259     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
7260 
7261     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7262     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7263     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7264 
7265     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7266     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7267   */
7268   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */
7269   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own));
7270 
7271   /* gather (i,j) of nonzeros inserted by remote procs */
7272   if (hasoffproc) {
7273     PetscSF  msf;
7274     PetscInt ncoo2,*coo_i2,*coo_j2;
7275 
7276     PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0]));
7277     PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0]));
7278     PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */
7279 
7280     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7281       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7282       PetscInt   *idxoff = mmdata->off[cp];
7283       PetscInt   *idxown = mmdata->own[cp];
7284       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7285         const PetscInt *rmap = rmapa[cp];
7286         const PetscInt *cmap = cmapa[cp];
7287         const PetscInt *ii  = mm->i;
7288         PetscInt       *coi = coo_i + ncoo_o;
7289         PetscInt       *coj = coo_j + ncoo_o;
7290         const PetscInt mr = mp[cp]->rmap->n;
7291         const PetscInt rs = C->rmap->rstart;
7292         const PetscInt re = C->rmap->rend;
7293         const PetscInt cs = C->cmap->rstart;
7294         for (i = 0; i < mr; i++) {
7295           const PetscInt *jj = mm->j + ii[i];
7296           const PetscInt gr  = rmap[i];
7297           const PetscInt nz  = ii[i+1] - ii[i];
7298           if (gr < rs || gr >= re) { /* this is an offproc row */
7299             for (j = ii[i]; j < ii[i+1]; j++) {
7300               *coi++ = gr;
7301               *idxoff++ = j;
7302             }
7303             if (!cmapt[cp]) { /* already global */
7304               for (j = 0; j < nz; j++) *coj++ = jj[j];
7305             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7306               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7307             } else { /* offdiag */
7308               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7309             }
7310             ncoo_o += nz;
7311           } else { /* this is a local row */
7312             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
7313           }
7314         }
7315       }
7316       mmdata->off[cp + 1] = idxoff;
7317       mmdata->own[cp + 1] = idxown;
7318     }
7319 
7320     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7321     PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i));
7322     PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf));
7323     PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL));
7324     ncoo = ncoo_d + ncoo_oown + ncoo2;
7325     PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2));
7326     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7327     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown));
7328     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7329     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7330     PetscCall(PetscFree2(coo_i,coo_j));
7331     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7332     PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w));
7333     coo_i = coo_i2;
7334     coo_j = coo_j2;
7335   } else { /* no offproc values insertion */
7336     ncoo = ncoo_d;
7337     PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j));
7338 
7339     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7340     PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER));
7341     PetscCall(PetscSFSetUp(mmdata->sf));
7342   }
7343   mmdata->hasoffproc = hasoffproc;
7344 
7345   /* gather (i,j) of nonzeros inserted locally */
7346   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7347     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
7348     PetscInt       *coi = coo_i + ncoo_d;
7349     PetscInt       *coj = coo_j + ncoo_d;
7350     const PetscInt *jj  = mm->j;
7351     const PetscInt *ii  = mm->i;
7352     const PetscInt *cmap = cmapa[cp];
7353     const PetscInt *rmap = rmapa[cp];
7354     const PetscInt mr = mp[cp]->rmap->n;
7355     const PetscInt rs = C->rmap->rstart;
7356     const PetscInt re = C->rmap->rend;
7357     const PetscInt cs = C->cmap->rstart;
7358 
7359     if (mptmp[cp]) continue;
7360     if (rmapt[cp] == 1) { /* consecutive rows */
7361       /* fill coo_i */
7362       for (i = 0; i < mr; i++) {
7363         const PetscInt gr = i + rs;
7364         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
7365       }
7366       /* fill coo_j */
7367       if (!cmapt[cp]) { /* type-0, already global */
7368         PetscCall(PetscArraycpy(coj,jj,mm->nz));
7369       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
7370         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7371       } else { /* type-2, local to global for sparse columns */
7372         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7373       }
7374       ncoo_d += mm->nz;
7375     } else if (rmapt[cp] == 2) { /* sparse rows */
7376       for (i = 0; i < mr; i++) {
7377         const PetscInt *jj = mm->j + ii[i];
7378         const PetscInt gr  = rmap[i];
7379         const PetscInt nz  = ii[i+1] - ii[i];
7380         if (gr >= rs && gr < re) { /* local rows */
7381           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
7382           if (!cmapt[cp]) { /* type-0, already global */
7383             for (j = 0; j < nz; j++) *coj++ = jj[j];
7384           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7385             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7386           } else { /* type-2, local to global for sparse columns */
7387             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7388           }
7389           ncoo_d += nz;
7390         }
7391       }
7392     }
7393   }
7394   if (glob) {
7395     PetscCall(ISRestoreIndices(glob,&globidx));
7396   }
7397   PetscCall(ISDestroy(&glob));
7398   if (P_oth_l2g) {
7399     PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx));
7400   }
7401   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7402   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7403   PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v));
7404 
7405   /* preallocate with COO data */
7406   PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j));
7407   PetscCall(PetscFree2(coo_i,coo_j));
7408   PetscFunctionReturn(0);
7409 }
7410 
7411 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7412 {
7413   Mat_Product *product = mat->product;
7414 #if defined(PETSC_HAVE_DEVICE)
7415   PetscBool    match   = PETSC_FALSE;
7416   PetscBool    usecpu  = PETSC_FALSE;
7417 #else
7418   PetscBool    match   = PETSC_TRUE;
7419 #endif
7420 
7421   PetscFunctionBegin;
7422   MatCheckProduct(mat,1);
7423 #if defined(PETSC_HAVE_DEVICE)
7424   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7425     PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match));
7426   }
7427   if (match) { /* we can always fallback to the CPU if requested */
7428     switch (product->type) {
7429     case MATPRODUCT_AB:
7430       if (product->api_user) {
7431         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");
7432         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7433         PetscOptionsEnd();
7434       } else {
7435         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");
7436         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7437         PetscOptionsEnd();
7438       }
7439       break;
7440     case MATPRODUCT_AtB:
7441       if (product->api_user) {
7442         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");
7443         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7444         PetscOptionsEnd();
7445       } else {
7446         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");
7447         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7448         PetscOptionsEnd();
7449       }
7450       break;
7451     case MATPRODUCT_PtAP:
7452       if (product->api_user) {
7453         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");
7454         PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7455         PetscOptionsEnd();
7456       } else {
7457         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");
7458         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7459         PetscOptionsEnd();
7460       }
7461       break;
7462     default:
7463       break;
7464     }
7465     match = (PetscBool)!usecpu;
7466   }
7467 #endif
7468   if (match) {
7469     switch (product->type) {
7470     case MATPRODUCT_AB:
7471     case MATPRODUCT_AtB:
7472     case MATPRODUCT_PtAP:
7473       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7474       break;
7475     default:
7476       break;
7477     }
7478   }
7479   /* fallback to MPIAIJ ops */
7480   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7481   PetscFunctionReturn(0);
7482 }
7483 
7484 /*
7485     Special version for direct calls from Fortran
7486 */
7487 #include <petsc/private/fortranimpl.h>
7488 
7489 /* Change these macros so can be used in void function */
7490 /* Identical to PetscCallVoid, except it assigns to *_ierr */
7491 #undef  PetscCall
7492 #define PetscCall(...) do {                                                                    \
7493     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__;                                              \
7494     if (PetscUnlikely(ierr_msv_mpiaij)) {                                                      \
7495       *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \
7496       return;                                                                                  \
7497     }                                                                                          \
7498   } while (0)
7499 
7500 #undef SETERRQ
7501 #define SETERRQ(comm,ierr,...) do {                                                            \
7502     *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \
7503     return;                                                                                    \
7504   } while (0)
7505 
7506 #if defined(PETSC_HAVE_FORTRAN_CAPS)
7507 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
7508 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
7509 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
7510 #else
7511 #endif
7512 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
7513 {
7514   Mat          mat  = *mmat;
7515   PetscInt     m    = *mm, n = *mn;
7516   InsertMode   addv = *maddv;
7517   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ*)mat->data;
7518   PetscScalar  value;
7519 
7520   MatCheckPreallocated(mat,1);
7521   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
7522   else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
7523   {
7524     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
7525     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
7526     PetscBool roworiented = aij->roworiented;
7527 
7528     /* Some Variables required in the macro */
7529     Mat        A                    = aij->A;
7530     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
7531     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
7532     MatScalar  *aa;
7533     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
7534     Mat        B                    = aij->B;
7535     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
7536     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
7537     MatScalar  *ba;
7538     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
7539      * cannot use "#if defined" inside a macro. */
7540     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
7541 
7542     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
7543     PetscInt  nonew = a->nonew;
7544     MatScalar *ap1,*ap2;
7545 
7546     PetscFunctionBegin;
7547     PetscCall(MatSeqAIJGetArray(A,&aa));
7548     PetscCall(MatSeqAIJGetArray(B,&ba));
7549     for (i=0; i<m; i++) {
7550       if (im[i] < 0) continue;
7551       PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
7552       if (im[i] >= rstart && im[i] < rend) {
7553         row      = im[i] - rstart;
7554         lastcol1 = -1;
7555         rp1      = aj + ai[row];
7556         ap1      = aa + ai[row];
7557         rmax1    = aimax[row];
7558         nrow1    = ailen[row];
7559         low1     = 0;
7560         high1    = nrow1;
7561         lastcol2 = -1;
7562         rp2      = bj + bi[row];
7563         ap2      = ba + bi[row];
7564         rmax2    = bimax[row];
7565         nrow2    = bilen[row];
7566         low2     = 0;
7567         high2    = nrow2;
7568 
7569         for (j=0; j<n; j++) {
7570           if (roworiented) value = v[i*n+j];
7571           else value = v[i+j*m];
7572           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
7573           if (in[j] >= cstart && in[j] < cend) {
7574             col = in[j] - cstart;
7575             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
7576           } else if (in[j] < 0) continue;
7577           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
7578             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
7579             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
7580           } else {
7581             if (mat->was_assembled) {
7582               if (!aij->colmap) {
7583                 PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
7584               }
7585 #if defined(PETSC_USE_CTABLE)
7586               PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col));
7587               col--;
7588 #else
7589               col = aij->colmap[in[j]] - 1;
7590 #endif
7591               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
7592                 PetscCall(MatDisAssemble_MPIAIJ(mat));
7593                 col  =  in[j];
7594                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
7595                 B        = aij->B;
7596                 b        = (Mat_SeqAIJ*)B->data;
7597                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
7598                 rp2      = bj + bi[row];
7599                 ap2      = ba + bi[row];
7600                 rmax2    = bimax[row];
7601                 nrow2    = bilen[row];
7602                 low2     = 0;
7603                 high2    = nrow2;
7604                 bm       = aij->B->rmap->n;
7605                 ba       = b->a;
7606                 inserted = PETSC_FALSE;
7607               }
7608             } else col = in[j];
7609             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
7610           }
7611         }
7612       } else if (!aij->donotstash) {
7613         if (roworiented) {
7614           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
7615         } else {
7616           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
7617         }
7618       }
7619     }
7620     PetscCall(MatSeqAIJRestoreArray(A,&aa));
7621     PetscCall(MatSeqAIJRestoreArray(B,&ba));
7622   }
7623   PetscFunctionReturnVoid();
7624 }
7625 /* Undefining these here since they were redefined from their original definition above! No
7626  * other PETSc functions should be defined past this point, as it is impossible to recover the
7627  * original definitions */
7628 #undef PetscCall
7629 #undef SETERRQ
7630