xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 30028ea28a00c337e8f392cedcbc558844c6540b)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
10 {
11   Mat            B;
12 
13   PetscFunctionBegin;
14   PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&B));
15   PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject)B));
16   PetscCall(MatGetRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done));
17   PetscFunctionReturn(0);
18 }
19 
20 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
21 {
22   Mat            B;
23 
24   PetscFunctionBegin;
25   PetscCall(PetscObjectQuery((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject*)&B));
26   PetscCall(MatRestoreRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done));
27   PetscCall(MatDestroy(&B));
28   PetscFunctionReturn(0);
29 }
30 
31 /*MC
32    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
33 
34    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
35    and MATMPIAIJ otherwise.  As a result, for single process communicators,
36   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
37   for communicators controlling multiple processes.  It is recommended that you call both of
38   the above preallocation routines for simplicity.
39 
40    Options Database Keys:
41 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
42 
43   Developer Notes:
44     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
45    enough exist.
46 
47   Level: beginner
48 
49 .seealso: `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
50 M*/
51 
52 /*MC
53    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
54 
55    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
56    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
57    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
58   for communicators controlling multiple processes.  It is recommended that you call both of
59   the above preallocation routines for simplicity.
60 
61    Options Database Keys:
62 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
63 
64   Level: beginner
65 
66 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
67 M*/
68 
69 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
70 {
71   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
72 
73   PetscFunctionBegin;
74 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
75   A->boundtocpu = flg;
76 #endif
77   if (a->A) PetscCall(MatBindToCPU(a->A,flg));
78   if (a->B) PetscCall(MatBindToCPU(a->B,flg));
79 
80   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
81    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
82    * to differ from the parent matrix. */
83   if (a->lvec) PetscCall(VecBindToCPU(a->lvec,flg));
84   if (a->diag) PetscCall(VecBindToCPU(a->diag,flg));
85 
86   PetscFunctionReturn(0);
87 }
88 
89 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
90 {
91   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
92 
93   PetscFunctionBegin;
94   if (mat->A) {
95     PetscCall(MatSetBlockSizes(mat->A,rbs,cbs));
96     PetscCall(MatSetBlockSizes(mat->B,rbs,1));
97   }
98   PetscFunctionReturn(0);
99 }
100 
101 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
102 {
103   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
104   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
105   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
106   const PetscInt  *ia,*ib;
107   const MatScalar *aa,*bb,*aav,*bav;
108   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
109   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
110 
111   PetscFunctionBegin;
112   *keptrows = NULL;
113 
114   ia   = a->i;
115   ib   = b->i;
116   PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav));
117   PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav));
118   for (i=0; i<m; i++) {
119     na = ia[i+1] - ia[i];
120     nb = ib[i+1] - ib[i];
121     if (!na && !nb) {
122       cnt++;
123       goto ok1;
124     }
125     aa = aav + ia[i];
126     for (j=0; j<na; j++) {
127       if (aa[j] != 0.0) goto ok1;
128     }
129     bb = bav + ib[i];
130     for (j=0; j <nb; j++) {
131       if (bb[j] != 0.0) goto ok1;
132     }
133     cnt++;
134 ok1:;
135   }
136   PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M)));
137   if (!n0rows) {
138     PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
139     PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
140     PetscFunctionReturn(0);
141   }
142   PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows));
143   cnt  = 0;
144   for (i=0; i<m; i++) {
145     na = ia[i+1] - ia[i];
146     nb = ib[i+1] - ib[i];
147     if (!na && !nb) continue;
148     aa = aav + ia[i];
149     for (j=0; j<na;j++) {
150       if (aa[j] != 0.0) {
151         rows[cnt++] = rstart + i;
152         goto ok2;
153       }
154     }
155     bb = bav + ib[i];
156     for (j=0; j<nb; j++) {
157       if (bb[j] != 0.0) {
158         rows[cnt++] = rstart + i;
159         goto ok2;
160       }
161     }
162 ok2:;
163   }
164   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows));
165   PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
166   PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
167   PetscFunctionReturn(0);
168 }
169 
170 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
171 {
172   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
173   PetscBool         cong;
174 
175   PetscFunctionBegin;
176   PetscCall(MatHasCongruentLayouts(Y,&cong));
177   if (Y->assembled && cong) {
178     PetscCall(MatDiagonalSet(aij->A,D,is));
179   } else {
180     PetscCall(MatDiagonalSet_Default(Y,D,is));
181   }
182   PetscFunctionReturn(0);
183 }
184 
185 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
188   PetscInt       i,rstart,nrows,*rows;
189 
190   PetscFunctionBegin;
191   *zrows = NULL;
192   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows));
193   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
194   for (i=0; i<nrows; i++) rows[i] += rstart;
195   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows));
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
200 {
201   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
202   PetscInt          i,m,n,*garray = aij->garray;
203   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
204   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
205   PetscReal         *work;
206   const PetscScalar *dummy;
207 
208   PetscFunctionBegin;
209   PetscCall(MatGetSize(A,&m,&n));
210   PetscCall(PetscCalloc1(n,&work));
211   PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy));
212   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy));
213   PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy));
214   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy));
215   if (type == NORM_2) {
216     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
217       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
218     }
219     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
220       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
221     }
222   } else if (type == NORM_1) {
223     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
224       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
225     }
226     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
227       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
228     }
229   } else if (type == NORM_INFINITY) {
230     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
231       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
232     }
233     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
234       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
235     }
236   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
237     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
238       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
239     }
240     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
241       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
242     }
243   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
244     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
245       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
246     }
247     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
248       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
249     }
250   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
251   if (type == NORM_INFINITY) {
252     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
253   } else {
254     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
255   }
256   PetscCall(PetscFree(work));
257   if (type == NORM_2) {
258     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
259   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
260     for (i=0; i<n; i++) reductions[i] /= m;
261   }
262   PetscFunctionReturn(0);
263 }
264 
265 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
266 {
267   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
268   IS              sis,gis;
269   const PetscInt  *isis,*igis;
270   PetscInt        n,*iis,nsis,ngis,rstart,i;
271 
272   PetscFunctionBegin;
273   PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis));
274   PetscCall(MatFindNonzeroRows(a->B,&gis));
275   PetscCall(ISGetSize(gis,&ngis));
276   PetscCall(ISGetSize(sis,&nsis));
277   PetscCall(ISGetIndices(sis,&isis));
278   PetscCall(ISGetIndices(gis,&igis));
279 
280   PetscCall(PetscMalloc1(ngis+nsis,&iis));
281   PetscCall(PetscArraycpy(iis,igis,ngis));
282   PetscCall(PetscArraycpy(iis+ngis,isis,nsis));
283   n    = ngis + nsis;
284   PetscCall(PetscSortRemoveDupsInt(&n,iis));
285   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
286   for (i=0; i<n; i++) iis[i] += rstart;
287   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is));
288 
289   PetscCall(ISRestoreIndices(sis,&isis));
290   PetscCall(ISRestoreIndices(gis,&igis));
291   PetscCall(ISDestroy(&sis));
292   PetscCall(ISDestroy(&gis));
293   PetscFunctionReturn(0);
294 }
295 
296 /*
297   Local utility routine that creates a mapping from the global column
298 number to the local number in the off-diagonal part of the local
299 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
300 a slightly higher hash table cost; without it it is not scalable (each processor
301 has an order N integer array but is fast to access.
302 */
303 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscInt       n = aij->B->cmap->n,i;
307 
308   PetscFunctionBegin;
309   PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
310 #if defined(PETSC_USE_CTABLE)
311   PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap));
312   for (i=0; i<n; i++) {
313     PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES));
314   }
315 #else
316   PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap));
317   PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt)));
318   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
319 #endif
320   PetscFunctionReturn(0);
321 }
322 
323 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
324 { \
325     if (col <= lastcol1)  low1 = 0;     \
326     else                 high1 = nrow1; \
327     lastcol1 = col;\
328     while (high1-low1 > 5) { \
329       t = (low1+high1)/2; \
330       if (rp1[t] > col) high1 = t; \
331       else              low1  = t; \
332     } \
333       for (_i=low1; _i<high1; _i++) { \
334         if (rp1[_i] > col) break; \
335         if (rp1[_i] == col) { \
336           if (addv == ADD_VALUES) { \
337             ap1[_i] += value;   \
338             /* Not sure LogFlops will slow dow the code or not */ \
339             (void)PetscLogFlops(1.0);   \
340            } \
341           else                    ap1[_i] = value; \
342           goto a_noinsert; \
343         } \
344       }  \
345       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
346       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
347       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
348       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
349       N = nrow1++ - 1; a->nz++; high1++; \
350       /* shift up all the later entries in this row */ \
351       PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\
352       PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\
353       rp1[_i] = col;  \
354       ap1[_i] = value;  \
355       A->nonzerostate++;\
356       a_noinsert: ; \
357       ailen[row] = nrow1; \
358 }
359 
360 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
361   { \
362     if (col <= lastcol2) low2 = 0;                        \
363     else high2 = nrow2;                                   \
364     lastcol2 = col;                                       \
365     while (high2-low2 > 5) {                              \
366       t = (low2+high2)/2;                                 \
367       if (rp2[t] > col) high2 = t;                        \
368       else             low2  = t;                         \
369     }                                                     \
370     for (_i=low2; _i<high2; _i++) {                       \
371       if (rp2[_i] > col) break;                           \
372       if (rp2[_i] == col) {                               \
373         if (addv == ADD_VALUES) {                         \
374           ap2[_i] += value;                               \
375           (void)PetscLogFlops(1.0);                       \
376         }                                                 \
377         else                    ap2[_i] = value;          \
378         goto b_noinsert;                                  \
379       }                                                   \
380     }                                                     \
381     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
382     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
383     PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
384     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
385     N = nrow2++ - 1; b->nz++; high2++;                    \
386     /* shift up all the later entries in this row */      \
387     PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\
388     PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\
389     rp2[_i] = col;                                        \
390     ap2[_i] = value;                                      \
391     B->nonzerostate++;                                    \
392     b_noinsert: ;                                         \
393     bilen[row] = nrow2;                                   \
394   }
395 
396 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
397 {
398   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
399   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
400   PetscInt       l,*garray = mat->garray,diag;
401   PetscScalar    *aa,*ba;
402 
403   PetscFunctionBegin;
404   /* code only works for square matrices A */
405 
406   /* find size of row to the left of the diagonal part */
407   PetscCall(MatGetOwnershipRange(A,&diag,NULL));
408   row  = row - diag;
409   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
410     if (garray[b->j[b->i[row]+l]] > diag) break;
411   }
412   if (l) {
413     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
414     PetscCall(PetscArraycpy(ba+b->i[row],v,l));
415     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
416   }
417 
418   /* diagonal part */
419   if (a->i[row+1]-a->i[row]) {
420     PetscCall(MatSeqAIJGetArray(mat->A,&aa));
421     PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row])));
422     PetscCall(MatSeqAIJRestoreArray(mat->A,&aa));
423   }
424 
425   /* right of diagonal part */
426   if (b->i[row+1]-b->i[row]-l) {
427     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
428     PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l));
429     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
430   }
431   PetscFunctionReturn(0);
432 }
433 
434 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
435 {
436   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
437   PetscScalar    value = 0.0;
438   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
439   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
440   PetscBool      roworiented = aij->roworiented;
441 
442   /* Some Variables required in the macro */
443   Mat        A                    = aij->A;
444   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
445   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
446   PetscBool  ignorezeroentries    = a->ignorezeroentries;
447   Mat        B                    = aij->B;
448   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
449   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
450   MatScalar  *aa,*ba;
451   PetscInt   *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
452   PetscInt   nonew;
453   MatScalar  *ap1,*ap2;
454 
455   PetscFunctionBegin;
456   PetscCall(MatSeqAIJGetArray(A,&aa));
457   PetscCall(MatSeqAIJGetArray(B,&ba));
458   for (i=0; i<m; i++) {
459     if (im[i] < 0) continue;
460     PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
461     if (im[i] >= rstart && im[i] < rend) {
462       row      = im[i] - rstart;
463       lastcol1 = -1;
464       rp1      = aj + ai[row];
465       ap1      = aa + ai[row];
466       rmax1    = aimax[row];
467       nrow1    = ailen[row];
468       low1     = 0;
469       high1    = nrow1;
470       lastcol2 = -1;
471       rp2      = bj + bi[row];
472       ap2      = ba + bi[row];
473       rmax2    = bimax[row];
474       nrow2    = bilen[row];
475       low2     = 0;
476       high2    = nrow2;
477 
478       for (j=0; j<n; j++) {
479         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
480         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
481         if (in[j] >= cstart && in[j] < cend) {
482           col   = in[j] - cstart;
483           nonew = a->nonew;
484           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
485         } else if (in[j] < 0) {
486           continue;
487         } else {
488           PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
489           if (mat->was_assembled) {
490             if (!aij->colmap) {
491               PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
492             }
493 #if defined(PETSC_USE_CTABLE)
494             PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */
495             col--;
496 #else
497             col = aij->colmap[in[j]] - 1;
498 #endif
499             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
500               PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */
501               col  =  in[j];
502               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
503               B        = aij->B;
504               b        = (Mat_SeqAIJ*)B->data;
505               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
506               rp2      = bj + bi[row];
507               ap2      = ba + bi[row];
508               rmax2    = bimax[row];
509               nrow2    = bilen[row];
510               low2     = 0;
511               high2    = nrow2;
512               bm       = aij->B->rmap->n;
513               ba       = b->a;
514             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
515               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
516                 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j]));
517               } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
518             }
519           } else col = in[j];
520           nonew = b->nonew;
521           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
522         }
523       }
524     } else {
525       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
526       if (!aij->donotstash) {
527         mat->assembled = PETSC_FALSE;
528         if (roworiented) {
529           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
530         } else {
531           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
532         }
533       }
534     }
535   }
536   PetscCall(MatSeqAIJRestoreArray(A,&aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
537   PetscCall(MatSeqAIJRestoreArray(B,&ba));
538   PetscFunctionReturn(0);
539 }
540 
541 /*
542     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
543     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
544     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
545 */
546 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
547 {
548   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
549   Mat            A           = aij->A; /* diagonal part of the matrix */
550   Mat            B           = aij->B; /* offdiagonal part of the matrix */
551   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
552   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
553   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
554   PetscInt       *ailen      = a->ilen,*aj = a->j;
555   PetscInt       *bilen      = b->ilen,*bj = b->j;
556   PetscInt       am          = aij->A->rmap->n,j;
557   PetscInt       diag_so_far = 0,dnz;
558   PetscInt       offd_so_far = 0,onz;
559 
560   PetscFunctionBegin;
561   /* Iterate over all rows of the matrix */
562   for (j=0; j<am; j++) {
563     dnz = onz = 0;
564     /*  Iterate over all non-zero columns of the current row */
565     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
566       /* If column is in the diagonal */
567       if (mat_j[col] >= cstart && mat_j[col] < cend) {
568         aj[diag_so_far++] = mat_j[col] - cstart;
569         dnz++;
570       } else { /* off-diagonal entries */
571         bj[offd_so_far++] = mat_j[col];
572         onz++;
573       }
574     }
575     ailen[j] = dnz;
576     bilen[j] = onz;
577   }
578   PetscFunctionReturn(0);
579 }
580 
581 /*
582     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
583     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
584     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
585     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
586     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
587 */
588 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
589 {
590   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
591   Mat            A      = aij->A; /* diagonal part of the matrix */
592   Mat            B      = aij->B; /* offdiagonal part of the matrix */
593   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
594   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
595   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
596   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
597   PetscInt       *ailen = a->ilen,*aj = a->j;
598   PetscInt       *bilen = b->ilen,*bj = b->j;
599   PetscInt       am     = aij->A->rmap->n,j;
600   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
601   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
602   PetscScalar    *aa = a->a,*ba = b->a;
603 
604   PetscFunctionBegin;
605   /* Iterate over all rows of the matrix */
606   for (j=0; j<am; j++) {
607     dnz_row = onz_row = 0;
608     rowstart_offd = full_offd_i[j];
609     rowstart_diag = full_diag_i[j];
610     /*  Iterate over all non-zero columns of the current row */
611     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
612       /* If column is in the diagonal */
613       if (mat_j[col] >= cstart && mat_j[col] < cend) {
614         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
615         aa[rowstart_diag+dnz_row] = mat_a[col];
616         dnz_row++;
617       } else { /* off-diagonal entries */
618         bj[rowstart_offd+onz_row] = mat_j[col];
619         ba[rowstart_offd+onz_row] = mat_a[col];
620         onz_row++;
621       }
622     }
623     ailen[j] = dnz_row;
624     bilen[j] = onz_row;
625   }
626   PetscFunctionReturn(0);
627 }
628 
629 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
630 {
631   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
632   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
633   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* negative row */
638     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= rstart && idxm[i] < rend) {
640       row = idxm[i] - rstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* negative column */
643         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= cstart && idxn[j] < cend) {
645           col  = idxn[j] - cstart;
646           PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j));
647         } else {
648           if (!aij->colmap) {
649             PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
650           }
651 #if defined(PETSC_USE_CTABLE)
652           PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col));
653           col--;
654 #else
655           col = aij->colmap[idxn[j]] - 1;
656 #endif
657           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
658           else {
659             PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j));
660           }
661         }
662       }
663     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
671   PetscInt       nstash,reallocs;
672 
673   PetscFunctionBegin;
674   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
675 
676   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
677   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
678   PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
679   PetscFunctionReturn(0);
680 }
681 
682 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
683 {
684   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
685   PetscMPIInt    n;
686   PetscInt       i,j,rstart,ncols,flg;
687   PetscInt       *row,*col;
688   PetscBool      other_disassembled;
689   PetscScalar    *val;
690 
691   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
692 
693   PetscFunctionBegin;
694   if (!aij->donotstash && !mat->nooffprocentries) {
695     while (1) {
696       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
697       if (!flg) break;
698 
699       for (i=0; i<n;) {
700         /* Now identify the consecutive vals belonging to the same row */
701         for (j=i,rstart=row[j]; j<n; j++) {
702           if (row[j] != rstart) break;
703         }
704         if (j < n) ncols = j-i;
705         else       ncols = n-i;
706         /* Now assemble all these values with a single function call */
707         PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
708         i    = j;
709       }
710     }
711     PetscCall(MatStashScatterEnd_Private(&mat->stash));
712   }
713 #if defined(PETSC_HAVE_DEVICE)
714   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
715   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
716   if (mat->boundtocpu) {
717     PetscCall(MatBindToCPU(aij->A,PETSC_TRUE));
718     PetscCall(MatBindToCPU(aij->B,PETSC_TRUE));
719   }
720 #endif
721   PetscCall(MatAssemblyBegin(aij->A,mode));
722   PetscCall(MatAssemblyEnd(aij->A,mode));
723 
724   /* determine if any processor has disassembled, if so we must
725      also disassemble ourself, in order that we may reassemble. */
726   /*
727      if nonzero structure of submatrix B cannot change then we know that
728      no processor disassembled thus we can skip this stuff
729   */
730   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
731     PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat)));
732     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
733       PetscCall(MatDisAssemble_MPIAIJ(mat));
734     }
735   }
736   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
737     PetscCall(MatSetUpMultiply_MPIAIJ(mat));
738   }
739   PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE));
740 #if defined(PETSC_HAVE_DEVICE)
741   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
742 #endif
743   PetscCall(MatAssemblyBegin(aij->B,mode));
744   PetscCall(MatAssemblyEnd(aij->B,mode));
745 
746   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
747 
748   aij->rowvalues = NULL;
749 
750   PetscCall(VecDestroy(&aij->diag));
751 
752   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
753   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
754     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
755     PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
756   }
757 #if defined(PETSC_HAVE_DEVICE)
758   mat->offloadmask = PETSC_OFFLOAD_BOTH;
759 #endif
760   PetscFunctionReturn(0);
761 }
762 
763 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
764 {
765   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
766 
767   PetscFunctionBegin;
768   PetscCall(MatZeroEntries(l->A));
769   PetscCall(MatZeroEntries(l->B));
770   PetscFunctionReturn(0);
771 }
772 
773 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
774 {
775   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
776   PetscObjectState sA, sB;
777   PetscInt        *lrows;
778   PetscInt         r, len;
779   PetscBool        cong, lch, gch;
780 
781   PetscFunctionBegin;
782   /* get locally owned rows */
783   PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows));
784   PetscCall(MatHasCongruentLayouts(A,&cong));
785   /* fix right hand side if needed */
786   if (x && b) {
787     const PetscScalar *xx;
788     PetscScalar       *bb;
789 
790     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
791     PetscCall(VecGetArrayRead(x, &xx));
792     PetscCall(VecGetArray(b, &bb));
793     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
794     PetscCall(VecRestoreArrayRead(x, &xx));
795     PetscCall(VecRestoreArray(b, &bb));
796   }
797 
798   sA = mat->A->nonzerostate;
799   sB = mat->B->nonzerostate;
800 
801   if (diag != 0.0 && cong) {
802     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
803     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
804   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
805     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
806     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
807     PetscInt   nnwA, nnwB;
808     PetscBool  nnzA, nnzB;
809 
810     nnwA = aijA->nonew;
811     nnwB = aijB->nonew;
812     nnzA = aijA->keepnonzeropattern;
813     nnzB = aijB->keepnonzeropattern;
814     if (!nnzA) {
815       PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
816       aijA->nonew = 0;
817     }
818     if (!nnzB) {
819       PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
820       aijB->nonew = 0;
821     }
822     /* Must zero here before the next loop */
823     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
824     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
825     for (r = 0; r < len; ++r) {
826       const PetscInt row = lrows[r] + A->rmap->rstart;
827       if (row >= A->cmap->N) continue;
828       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
829     }
830     aijA->nonew = nnwA;
831     aijB->nonew = nnwB;
832   } else {
833     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
834     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
835   }
836   PetscCall(PetscFree(lrows));
837   PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
838   PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
839 
840   /* reduce nonzerostate */
841   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
842   PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A)));
843   if (gch) A->nonzerostate++;
844   PetscFunctionReturn(0);
845 }
846 
847 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
848 {
849   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
850   PetscMPIInt       n = A->rmap->n;
851   PetscInt          i,j,r,m,len = 0;
852   PetscInt          *lrows,*owners = A->rmap->range;
853   PetscMPIInt       p = 0;
854   PetscSFNode       *rrows;
855   PetscSF           sf;
856   const PetscScalar *xx;
857   PetscScalar       *bb,*mask,*aij_a;
858   Vec               xmask,lmask;
859   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
860   const PetscInt    *aj, *ii,*ridx;
861   PetscScalar       *aa;
862 
863   PetscFunctionBegin;
864   /* Create SF where leaves are input rows and roots are owned rows */
865   PetscCall(PetscMalloc1(n, &lrows));
866   for (r = 0; r < n; ++r) lrows[r] = -1;
867   PetscCall(PetscMalloc1(N, &rrows));
868   for (r = 0; r < N; ++r) {
869     const PetscInt idx   = rows[r];
870     PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
871     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
872       PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p));
873     }
874     rrows[r].rank  = p;
875     rrows[r].index = rows[r] - owners[p];
876   }
877   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf));
878   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
879   /* Collect flags for rows to be zeroed */
880   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
881   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
882   PetscCall(PetscSFDestroy(&sf));
883   /* Compress and put in row numbers */
884   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
885   /* zero diagonal part of matrix */
886   PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b));
887   /* handle off diagonal part of matrix */
888   PetscCall(MatCreateVecs(A,&xmask,NULL));
889   PetscCall(VecDuplicate(l->lvec,&lmask));
890   PetscCall(VecGetArray(xmask,&bb));
891   for (i=0; i<len; i++) bb[lrows[i]] = 1;
892   PetscCall(VecRestoreArray(xmask,&bb));
893   PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
894   PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
895   PetscCall(VecDestroy(&xmask));
896   if (x && b) { /* this code is buggy when the row and column layout don't match */
897     PetscBool cong;
898 
899     PetscCall(MatHasCongruentLayouts(A,&cong));
900     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
901     PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
902     PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
903     PetscCall(VecGetArrayRead(l->lvec,&xx));
904     PetscCall(VecGetArray(b,&bb));
905   }
906   PetscCall(VecGetArray(lmask,&mask));
907   /* remove zeroed rows of off diagonal matrix */
908   PetscCall(MatSeqAIJGetArray(l->B,&aij_a));
909   ii = aij->i;
910   for (i=0; i<len; i++) {
911     PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]));
912   }
913   /* loop over all elements of off process part of matrix zeroing removed columns*/
914   if (aij->compressedrow.use) {
915     m    = aij->compressedrow.nrows;
916     ii   = aij->compressedrow.i;
917     ridx = aij->compressedrow.rindex;
918     for (i=0; i<m; i++) {
919       n  = ii[i+1] - ii[i];
920       aj = aij->j + ii[i];
921       aa = aij_a + ii[i];
922 
923       for (j=0; j<n; j++) {
924         if (PetscAbsScalar(mask[*aj])) {
925           if (b) bb[*ridx] -= *aa*xx[*aj];
926           *aa = 0.0;
927         }
928         aa++;
929         aj++;
930       }
931       ridx++;
932     }
933   } else { /* do not use compressed row format */
934     m = l->B->rmap->n;
935     for (i=0; i<m; i++) {
936       n  = ii[i+1] - ii[i];
937       aj = aij->j + ii[i];
938       aa = aij_a + ii[i];
939       for (j=0; j<n; j++) {
940         if (PetscAbsScalar(mask[*aj])) {
941           if (b) bb[i] -= *aa*xx[*aj];
942           *aa = 0.0;
943         }
944         aa++;
945         aj++;
946       }
947     }
948   }
949   if (x && b) {
950     PetscCall(VecRestoreArray(b,&bb));
951     PetscCall(VecRestoreArrayRead(l->lvec,&xx));
952   }
953   PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a));
954   PetscCall(VecRestoreArray(lmask,&mask));
955   PetscCall(VecDestroy(&lmask));
956   PetscCall(PetscFree(lrows));
957 
958   /* only change matrix nonzero state if pattern was allowed to be changed */
959   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
960     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
961     PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A)));
962   }
963   PetscFunctionReturn(0);
964 }
965 
966 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
967 {
968   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
969   PetscInt       nt;
970   VecScatter     Mvctx = a->Mvctx;
971 
972   PetscFunctionBegin;
973   PetscCall(VecGetLocalSize(xx,&nt));
974   PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt);
975   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
976   PetscCall((*a->A->ops->mult)(a->A,xx,yy));
977   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
978   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy));
979   PetscFunctionReturn(0);
980 }
981 
982 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
983 {
984   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
985 
986   PetscFunctionBegin;
987   PetscCall(MatMultDiagonalBlock(a->A,bb,xx));
988   PetscFunctionReturn(0);
989 }
990 
991 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
992 {
993   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
994   VecScatter     Mvctx = a->Mvctx;
995 
996   PetscFunctionBegin;
997   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
998   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz));
999   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
1000   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz));
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1005 {
1006   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1007 
1008   PetscFunctionBegin;
1009   /* do nondiagonal part */
1010   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1011   /* do local part */
1012   PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy));
1013   /* add partial results together */
1014   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1015   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1020 {
1021   MPI_Comm       comm;
1022   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1023   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1024   IS             Me,Notme;
1025   PetscInt       M,N,first,last,*notme,i;
1026   PetscBool      lf;
1027   PetscMPIInt    size;
1028 
1029   PetscFunctionBegin;
1030   /* Easy test: symmetric diagonal block */
1031   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1032   PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf));
1033   PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat)));
1034   if (!*f) PetscFunctionReturn(0);
1035   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
1036   PetscCallMPI(MPI_Comm_size(comm,&size));
1037   if (size == 1) PetscFunctionReturn(0);
1038 
1039   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1040   PetscCall(MatGetSize(Amat,&M,&N));
1041   PetscCall(MatGetOwnershipRange(Amat,&first,&last));
1042   PetscCall(PetscMalloc1(N-last+first,&notme));
1043   for (i=0; i<first; i++) notme[i] = i;
1044   for (i=last; i<M; i++) notme[i-last+first] = i;
1045   PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme));
1046   PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me));
1047   PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs));
1048   Aoff = Aoffs[0];
1049   PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs));
1050   Boff = Boffs[0];
1051   PetscCall(MatIsTranspose(Aoff,Boff,tol,f));
1052   PetscCall(MatDestroyMatrices(1,&Aoffs));
1053   PetscCall(MatDestroyMatrices(1,&Boffs));
1054   PetscCall(ISDestroy(&Me));
1055   PetscCall(ISDestroy(&Notme));
1056   PetscCall(PetscFree(notme));
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1061 {
1062   PetscFunctionBegin;
1063   PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f));
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1068 {
1069   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1070 
1071   PetscFunctionBegin;
1072   /* do nondiagonal part */
1073   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1074   /* do local part */
1075   PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz));
1076   /* add partial results together */
1077   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1078   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 /*
1083   This only works correctly for square matrices where the subblock A->A is the
1084    diagonal block
1085 */
1086 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1087 {
1088   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1089 
1090   PetscFunctionBegin;
1091   PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1092   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1093   PetscCall(MatGetDiagonal(a->A,v));
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1098 {
1099   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1100 
1101   PetscFunctionBegin;
1102   PetscCall(MatScale(a->A,aa));
1103   PetscCall(MatScale(a->B,aa));
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1108 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1109 {
1110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1111 
1112   PetscFunctionBegin;
1113   PetscCall(PetscSFDestroy(&aij->coo_sf));
1114   PetscCall(PetscFree(aij->Aperm1));
1115   PetscCall(PetscFree(aij->Bperm1));
1116   PetscCall(PetscFree(aij->Ajmap1));
1117   PetscCall(PetscFree(aij->Bjmap1));
1118 
1119   PetscCall(PetscFree(aij->Aimap2));
1120   PetscCall(PetscFree(aij->Bimap2));
1121   PetscCall(PetscFree(aij->Aperm2));
1122   PetscCall(PetscFree(aij->Bperm2));
1123   PetscCall(PetscFree(aij->Ajmap2));
1124   PetscCall(PetscFree(aij->Bjmap2));
1125 
1126   PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf));
1127   PetscCall(PetscFree(aij->Cperm1));
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1132 {
1133   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1134 
1135   PetscFunctionBegin;
1136 #if defined(PETSC_USE_LOG)
1137   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1138 #endif
1139   PetscCall(MatStashDestroy_Private(&mat->stash));
1140   PetscCall(VecDestroy(&aij->diag));
1141   PetscCall(MatDestroy(&aij->A));
1142   PetscCall(MatDestroy(&aij->B));
1143 #if defined(PETSC_USE_CTABLE)
1144   PetscCall(PetscTableDestroy(&aij->colmap));
1145 #else
1146   PetscCall(PetscFree(aij->colmap));
1147 #endif
1148   PetscCall(PetscFree(aij->garray));
1149   PetscCall(VecDestroy(&aij->lvec));
1150   PetscCall(VecScatterDestroy(&aij->Mvctx));
1151   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
1152   PetscCall(PetscFree(aij->ld));
1153 
1154   /* Free COO */
1155   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
1156 
1157   PetscCall(PetscFree(mat->data));
1158 
1159   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1160   PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL));
1161 
1162   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
1163   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
1164   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
1165   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL));
1166   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL));
1167   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL));
1168   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL));
1169   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL));
1170   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL));
1171   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL));
1172 #if defined(PETSC_HAVE_CUDA)
1173   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL));
1174 #endif
1175 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1176   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL));
1177 #endif
1178   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
1179 #if defined(PETSC_HAVE_ELEMENTAL)
1180   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL));
1181 #endif
1182 #if defined(PETSC_HAVE_SCALAPACK)
1183   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL));
1184 #endif
1185 #if defined(PETSC_HAVE_HYPRE)
1186   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL));
1187   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL));
1188 #endif
1189   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1190   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL));
1191   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL));
1192   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL));
1193   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL));
1194   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL));
1195 #if defined(PETSC_HAVE_MKL_SPARSE)
1196   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL));
1197 #endif
1198   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL));
1199   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1200   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL));
1201   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL));
1202   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL));
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1207 {
1208   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1209   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1210   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1211   const PetscInt    *garray = aij->garray;
1212   const PetscScalar *aa,*ba;
1213   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1214   PetscInt          *rowlens;
1215   PetscInt          *colidxs;
1216   PetscScalar       *matvals;
1217 
1218   PetscFunctionBegin;
1219   PetscCall(PetscViewerSetUp(viewer));
1220 
1221   M  = mat->rmap->N;
1222   N  = mat->cmap->N;
1223   m  = mat->rmap->n;
1224   rs = mat->rmap->rstart;
1225   cs = mat->cmap->rstart;
1226   nz = A->nz + B->nz;
1227 
1228   /* write matrix header */
1229   header[0] = MAT_FILE_CLASSID;
1230   header[1] = M; header[2] = N; header[3] = nz;
1231   PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat)));
1232   PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT));
1233 
1234   /* fill in and store row lengths  */
1235   PetscCall(PetscMalloc1(m,&rowlens));
1236   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1237   PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT));
1238   PetscCall(PetscFree(rowlens));
1239 
1240   /* fill in and store column indices */
1241   PetscCall(PetscMalloc1(nz,&colidxs));
1242   for (cnt=0, i=0; i<m; i++) {
1243     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1244       if (garray[B->j[jb]] > cs) break;
1245       colidxs[cnt++] = garray[B->j[jb]];
1246     }
1247     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1248       colidxs[cnt++] = A->j[ja] + cs;
1249     for (; jb<B->i[i+1]; jb++)
1250       colidxs[cnt++] = garray[B->j[jb]];
1251   }
1252   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1253   PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
1254   PetscCall(PetscFree(colidxs));
1255 
1256   /* fill in and store nonzero values */
1257   PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa));
1258   PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba));
1259   PetscCall(PetscMalloc1(nz,&matvals));
1260   for (cnt=0, i=0; i<m; i++) {
1261     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1262       if (garray[B->j[jb]] > cs) break;
1263       matvals[cnt++] = ba[jb];
1264     }
1265     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1266       matvals[cnt++] = aa[ja];
1267     for (; jb<B->i[i+1]; jb++)
1268       matvals[cnt++] = ba[jb];
1269   }
1270   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa));
1271   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba));
1272   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1273   PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
1274   PetscCall(PetscFree(matvals));
1275 
1276   /* write block size option to the viewer's .info file */
1277   PetscCall(MatView_Binary_BlockSizes(mat,viewer));
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 #include <petscdraw.h>
1282 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1283 {
1284   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1285   PetscMPIInt       rank = aij->rank,size = aij->size;
1286   PetscBool         isdraw,iascii,isbinary;
1287   PetscViewer       sviewer;
1288   PetscViewerFormat format;
1289 
1290   PetscFunctionBegin;
1291   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1292   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1293   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1294   if (iascii) {
1295     PetscCall(PetscViewerGetFormat(viewer,&format));
1296     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1297       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1298       PetscCall(PetscMalloc1(size,&nz));
1299       PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat)));
1300       for (i=0; i<(PetscInt)size; i++) {
1301         nmax = PetscMax(nmax,nz[i]);
1302         nmin = PetscMin(nmin,nz[i]);
1303         navg += nz[i];
1304       }
1305       PetscCall(PetscFree(nz));
1306       navg = navg/size;
1307       PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n",nmin,navg,nmax));
1308       PetscFunctionReturn(0);
1309     }
1310     PetscCall(PetscViewerGetFormat(viewer,&format));
1311     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1312       MatInfo   info;
1313       PetscInt *inodes=NULL;
1314 
1315       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
1316       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
1317       PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL));
1318       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1319       if (!inodes) {
1320         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n",
1321                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1322       } else {
1323         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n",
1324                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1325       }
1326       PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info));
1327       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1328       PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info));
1329       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1330       PetscCall(PetscViewerFlush(viewer));
1331       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1332       PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
1333       PetscCall(VecScatterView(aij->Mvctx,viewer));
1334       PetscFunctionReturn(0);
1335     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1336       PetscInt inodecount,inodelimit,*inodes;
1337       PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit));
1338       if (inodes) {
1339         PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit));
1340       } else {
1341         PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n"));
1342       }
1343       PetscFunctionReturn(0);
1344     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1345       PetscFunctionReturn(0);
1346     }
1347   } else if (isbinary) {
1348     if (size == 1) {
1349       PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1350       PetscCall(MatView(aij->A,viewer));
1351     } else {
1352       PetscCall(MatView_MPIAIJ_Binary(mat,viewer));
1353     }
1354     PetscFunctionReturn(0);
1355   } else if (iascii && size == 1) {
1356     PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1357     PetscCall(MatView(aij->A,viewer));
1358     PetscFunctionReturn(0);
1359   } else if (isdraw) {
1360     PetscDraw draw;
1361     PetscBool isnull;
1362     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
1363     PetscCall(PetscDrawIsNull(draw,&isnull));
1364     if (isnull) PetscFunctionReturn(0);
1365   }
1366 
1367   { /* assemble the entire matrix onto first processor */
1368     Mat A = NULL, Av;
1369     IS  isrow,iscol;
1370 
1371     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1372     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1373     PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A));
1374     PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL));
1375 /*  The commented code uses MatCreateSubMatrices instead */
1376 /*
1377     Mat *AA, A = NULL, Av;
1378     IS  isrow,iscol;
1379 
1380     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1381     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1382     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1383     if (rank == 0) {
1384        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1385        A    = AA[0];
1386        Av   = AA[0];
1387     }
1388     PetscCall(MatDestroySubMatrices(1,&AA));
1389 */
1390     PetscCall(ISDestroy(&iscol));
1391     PetscCall(ISDestroy(&isrow));
1392     /*
1393        Everyone has to call to draw the matrix since the graphics waits are
1394        synchronized across all processors that share the PetscDraw object
1395     */
1396     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1397     if (rank == 0) {
1398       if (((PetscObject)mat)->name) {
1399         PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name));
1400       }
1401       PetscCall(MatView_SeqAIJ(Av,sviewer));
1402     }
1403     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1404     PetscCall(PetscViewerFlush(viewer));
1405     PetscCall(MatDestroy(&A));
1406   }
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1411 {
1412   PetscBool      iascii,isdraw,issocket,isbinary;
1413 
1414   PetscFunctionBegin;
1415   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1416   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1417   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1418   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
1419   if (iascii || isdraw || isbinary || issocket) {
1420     PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer));
1421   }
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1426 {
1427   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1428   Vec            bb1 = NULL;
1429   PetscBool      hasop;
1430 
1431   PetscFunctionBegin;
1432   if (flag == SOR_APPLY_UPPER) {
1433     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1434     PetscFunctionReturn(0);
1435   }
1436 
1437   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1438     PetscCall(VecDuplicate(bb,&bb1));
1439   }
1440 
1441   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1442     if (flag & SOR_ZERO_INITIAL_GUESS) {
1443       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1444       its--;
1445     }
1446 
1447     while (its--) {
1448       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1449       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1450 
1451       /* update rhs: bb1 = bb - B*x */
1452       PetscCall(VecScale(mat->lvec,-1.0));
1453       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1454 
1455       /* local sweep */
1456       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx));
1457     }
1458   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1459     if (flag & SOR_ZERO_INITIAL_GUESS) {
1460       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1461       its--;
1462     }
1463     while (its--) {
1464       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1465       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1466 
1467       /* update rhs: bb1 = bb - B*x */
1468       PetscCall(VecScale(mat->lvec,-1.0));
1469       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1470 
1471       /* local sweep */
1472       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx));
1473     }
1474   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1475     if (flag & SOR_ZERO_INITIAL_GUESS) {
1476       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1477       its--;
1478     }
1479     while (its--) {
1480       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1481       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1482 
1483       /* update rhs: bb1 = bb - B*x */
1484       PetscCall(VecScale(mat->lvec,-1.0));
1485       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1486 
1487       /* local sweep */
1488       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx));
1489     }
1490   } else if (flag & SOR_EISENSTAT) {
1491     Vec xx1;
1492 
1493     PetscCall(VecDuplicate(bb,&xx1));
1494     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx));
1495 
1496     PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1497     PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1498     if (!mat->diag) {
1499       PetscCall(MatCreateVecs(matin,&mat->diag,NULL));
1500       PetscCall(MatGetDiagonal(matin,mat->diag));
1501     }
1502     PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop));
1503     if (hasop) {
1504       PetscCall(MatMultDiagonalBlock(matin,xx,bb1));
1505     } else {
1506       PetscCall(VecPointwiseMult(bb1,mat->diag,xx));
1507     }
1508     PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb));
1509 
1510     PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1));
1511 
1512     /* local sweep */
1513     PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1));
1514     PetscCall(VecAXPY(xx,1.0,xx1));
1515     PetscCall(VecDestroy(&xx1));
1516   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1517 
1518   PetscCall(VecDestroy(&bb1));
1519 
1520   matin->factorerrortype = mat->A->factorerrortype;
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1525 {
1526   Mat            aA,aB,Aperm;
1527   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1528   PetscScalar    *aa,*ba;
1529   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1530   PetscSF        rowsf,sf;
1531   IS             parcolp = NULL;
1532   PetscBool      done;
1533 
1534   PetscFunctionBegin;
1535   PetscCall(MatGetLocalSize(A,&m,&n));
1536   PetscCall(ISGetIndices(rowp,&rwant));
1537   PetscCall(ISGetIndices(colp,&cwant));
1538   PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest));
1539 
1540   /* Invert row permutation to find out where my rows should go */
1541   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf));
1542   PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant));
1543   PetscCall(PetscSFSetFromOptions(rowsf));
1544   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1545   PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1546   PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1547 
1548   /* Invert column permutation to find out where my columns should go */
1549   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1550   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant));
1551   PetscCall(PetscSFSetFromOptions(sf));
1552   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1553   PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1554   PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1555   PetscCall(PetscSFDestroy(&sf));
1556 
1557   PetscCall(ISRestoreIndices(rowp,&rwant));
1558   PetscCall(ISRestoreIndices(colp,&cwant));
1559   PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols));
1560 
1561   /* Find out where my gcols should go */
1562   PetscCall(MatGetSize(aB,NULL,&ng));
1563   PetscCall(PetscMalloc1(ng,&gcdest));
1564   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1565   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols));
1566   PetscCall(PetscSFSetFromOptions(sf));
1567   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1568   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1569   PetscCall(PetscSFDestroy(&sf));
1570 
1571   PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz));
1572   PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1573   PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1574   for (i=0; i<m; i++) {
1575     PetscInt    row = rdest[i];
1576     PetscMPIInt rowner;
1577     PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner));
1578     for (j=ai[i]; j<ai[i+1]; j++) {
1579       PetscInt    col = cdest[aj[j]];
1580       PetscMPIInt cowner;
1581       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */
1582       if (rowner == cowner) dnnz[i]++;
1583       else onnz[i]++;
1584     }
1585     for (j=bi[i]; j<bi[i+1]; j++) {
1586       PetscInt    col = gcdest[bj[j]];
1587       PetscMPIInt cowner;
1588       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner));
1589       if (rowner == cowner) dnnz[i]++;
1590       else onnz[i]++;
1591     }
1592   }
1593   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1594   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1595   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1596   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1597   PetscCall(PetscSFDestroy(&rowsf));
1598 
1599   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm));
1600   PetscCall(MatSeqAIJGetArray(aA,&aa));
1601   PetscCall(MatSeqAIJGetArray(aB,&ba));
1602   for (i=0; i<m; i++) {
1603     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1604     PetscInt j0,rowlen;
1605     rowlen = ai[i+1] - ai[i];
1606     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1607       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1608       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES));
1609     }
1610     rowlen = bi[i+1] - bi[i];
1611     for (j0=j=0; j<rowlen; j0=j) {
1612       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1613       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES));
1614     }
1615   }
1616   PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY));
1617   PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY));
1618   PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1619   PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1620   PetscCall(MatSeqAIJRestoreArray(aA,&aa));
1621   PetscCall(MatSeqAIJRestoreArray(aB,&ba));
1622   PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz));
1623   PetscCall(PetscFree3(work,rdest,cdest));
1624   PetscCall(PetscFree(gcdest));
1625   if (parcolp) PetscCall(ISDestroy(&colp));
1626   *B = Aperm;
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1631 {
1632   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1633 
1634   PetscFunctionBegin;
1635   PetscCall(MatGetSize(aij->B,NULL,nghosts));
1636   if (ghosts) *ghosts = aij->garray;
1637   PetscFunctionReturn(0);
1638 }
1639 
1640 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1641 {
1642   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1643   Mat            A    = mat->A,B = mat->B;
1644   PetscLogDouble isend[5],irecv[5];
1645 
1646   PetscFunctionBegin;
1647   info->block_size = 1.0;
1648   PetscCall(MatGetInfo(A,MAT_LOCAL,info));
1649 
1650   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1651   isend[3] = info->memory;  isend[4] = info->mallocs;
1652 
1653   PetscCall(MatGetInfo(B,MAT_LOCAL,info));
1654 
1655   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1656   isend[3] += info->memory;  isend[4] += info->mallocs;
1657   if (flag == MAT_LOCAL) {
1658     info->nz_used      = isend[0];
1659     info->nz_allocated = isend[1];
1660     info->nz_unneeded  = isend[2];
1661     info->memory       = isend[3];
1662     info->mallocs      = isend[4];
1663   } else if (flag == MAT_GLOBAL_MAX) {
1664     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
1665 
1666     info->nz_used      = irecv[0];
1667     info->nz_allocated = irecv[1];
1668     info->nz_unneeded  = irecv[2];
1669     info->memory       = irecv[3];
1670     info->mallocs      = irecv[4];
1671   } else if (flag == MAT_GLOBAL_SUM) {
1672     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
1673 
1674     info->nz_used      = irecv[0];
1675     info->nz_allocated = irecv[1];
1676     info->nz_unneeded  = irecv[2];
1677     info->memory       = irecv[3];
1678     info->mallocs      = irecv[4];
1679   }
1680   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1681   info->fill_ratio_needed = 0;
1682   info->factor_mallocs    = 0;
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1687 {
1688   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1689 
1690   PetscFunctionBegin;
1691   switch (op) {
1692   case MAT_NEW_NONZERO_LOCATIONS:
1693   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1694   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1695   case MAT_KEEP_NONZERO_PATTERN:
1696   case MAT_NEW_NONZERO_LOCATION_ERR:
1697   case MAT_USE_INODES:
1698   case MAT_IGNORE_ZERO_ENTRIES:
1699   case MAT_FORM_EXPLICIT_TRANSPOSE:
1700     MatCheckPreallocated(A,1);
1701     PetscCall(MatSetOption(a->A,op,flg));
1702     PetscCall(MatSetOption(a->B,op,flg));
1703     break;
1704   case MAT_ROW_ORIENTED:
1705     MatCheckPreallocated(A,1);
1706     a->roworiented = flg;
1707 
1708     PetscCall(MatSetOption(a->A,op,flg));
1709     PetscCall(MatSetOption(a->B,op,flg));
1710     break;
1711   case MAT_FORCE_DIAGONAL_ENTRIES:
1712   case MAT_SORTED_FULL:
1713     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1714     break;
1715   case MAT_IGNORE_OFF_PROC_ENTRIES:
1716     a->donotstash = flg;
1717     break;
1718   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1719   case MAT_SPD:
1720   case MAT_SYMMETRIC:
1721   case MAT_STRUCTURALLY_SYMMETRIC:
1722   case MAT_HERMITIAN:
1723   case MAT_SYMMETRY_ETERNAL:
1724     break;
1725   case MAT_SUBMAT_SINGLEIS:
1726     A->submat_singleis = flg;
1727     break;
1728   case MAT_STRUCTURE_ONLY:
1729     /* The option is handled directly by MatSetOption() */
1730     break;
1731   default:
1732     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1733   }
1734   PetscFunctionReturn(0);
1735 }
1736 
1737 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1738 {
1739   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1740   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1741   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1742   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1743   PetscInt       *cmap,*idx_p;
1744 
1745   PetscFunctionBegin;
1746   PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1747   mat->getrowactive = PETSC_TRUE;
1748 
1749   if (!mat->rowvalues && (idx || v)) {
1750     /*
1751         allocate enough space to hold information from the longest row.
1752     */
1753     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1754     PetscInt   max = 1,tmp;
1755     for (i=0; i<matin->rmap->n; i++) {
1756       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1757       if (max < tmp) max = tmp;
1758     }
1759     PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices));
1760   }
1761 
1762   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1763   lrow = row - rstart;
1764 
1765   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1766   if (!v)   {pvA = NULL; pvB = NULL;}
1767   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1768   PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
1769   PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1770   nztot = nzA + nzB;
1771 
1772   cmap = mat->garray;
1773   if (v  || idx) {
1774     if (nztot) {
1775       /* Sort by increasing column numbers, assuming A and B already sorted */
1776       PetscInt imark = -1;
1777       if (v) {
1778         *v = v_p = mat->rowvalues;
1779         for (i=0; i<nzB; i++) {
1780           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1781           else break;
1782         }
1783         imark = i;
1784         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1785         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1786       }
1787       if (idx) {
1788         *idx = idx_p = mat->rowindices;
1789         if (imark > -1) {
1790           for (i=0; i<imark; i++) {
1791             idx_p[i] = cmap[cworkB[i]];
1792           }
1793         } else {
1794           for (i=0; i<nzB; i++) {
1795             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1796             else break;
1797           }
1798           imark = i;
1799         }
1800         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1801         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1802       }
1803     } else {
1804       if (idx) *idx = NULL;
1805       if (v)   *v   = NULL;
1806     }
1807   }
1808   *nz  = nztot;
1809   PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
1810   PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1815 {
1816   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1817 
1818   PetscFunctionBegin;
1819   PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1820   aij->getrowactive = PETSC_FALSE;
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1825 {
1826   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ*)mat->data;
1827   Mat_SeqAIJ      *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1828   PetscInt        i,j,cstart = mat->cmap->rstart;
1829   PetscReal       sum = 0.0;
1830   const MatScalar *v,*amata,*bmata;
1831 
1832   PetscFunctionBegin;
1833   if (aij->size == 1) {
1834     PetscCall(MatNorm(aij->A,type,norm));
1835   } else {
1836     PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata));
1837     PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata));
1838     if (type == NORM_FROBENIUS) {
1839       v = amata;
1840       for (i=0; i<amat->nz; i++) {
1841         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1842       }
1843       v = bmata;
1844       for (i=0; i<bmat->nz; i++) {
1845         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1846       }
1847       PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1848       *norm = PetscSqrtReal(*norm);
1849       PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz));
1850     } else if (type == NORM_1) { /* max column norm */
1851       PetscReal *tmp,*tmp2;
1852       PetscInt  *jj,*garray = aij->garray;
1853       PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp));
1854       PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2));
1855       *norm = 0.0;
1856       v     = amata; jj = amat->j;
1857       for (j=0; j<amat->nz; j++) {
1858         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1859       }
1860       v = bmata; jj = bmat->j;
1861       for (j=0; j<bmat->nz; j++) {
1862         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1863       }
1864       PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1865       for (j=0; j<mat->cmap->N; j++) {
1866         if (tmp2[j] > *norm) *norm = tmp2[j];
1867       }
1868       PetscCall(PetscFree(tmp));
1869       PetscCall(PetscFree(tmp2));
1870       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1871     } else if (type == NORM_INFINITY) { /* max row norm */
1872       PetscReal ntemp = 0.0;
1873       for (j=0; j<aij->A->rmap->n; j++) {
1874         v   = amata + amat->i[j];
1875         sum = 0.0;
1876         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1877           sum += PetscAbsScalar(*v); v++;
1878         }
1879         v = bmata + bmat->i[j];
1880         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1881           sum += PetscAbsScalar(*v); v++;
1882         }
1883         if (sum > ntemp) ntemp = sum;
1884       }
1885       PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat)));
1886       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1887     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1888     PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata));
1889     PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata));
1890   }
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1895 {
1896   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1897   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1898   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1899   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1900   Mat             B,A_diag,*B_diag;
1901   const MatScalar *pbv,*bv;
1902 
1903   PetscFunctionBegin;
1904   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1905   ai = Aloc->i; aj = Aloc->j;
1906   bi = Bloc->i; bj = Bloc->j;
1907   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1908     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1909     PetscSFNode          *oloc;
1910     PETSC_UNUSED PetscSF sf;
1911 
1912     PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc));
1913     /* compute d_nnz for preallocation */
1914     PetscCall(PetscArrayzero(d_nnz,na));
1915     for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++;
1916     /* compute local off-diagonal contributions */
1917     PetscCall(PetscArrayzero(g_nnz,nb));
1918     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1919     /* map those to global */
1920     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1921     PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray));
1922     PetscCall(PetscSFSetFromOptions(sf));
1923     PetscCall(PetscArrayzero(o_nnz,na));
1924     PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1925     PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1926     PetscCall(PetscSFDestroy(&sf));
1927 
1928     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
1929     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
1930     PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs)));
1931     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
1932     PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
1933     PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc));
1934   } else {
1935     B    = *matout;
1936     PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
1937   }
1938 
1939   b           = (Mat_MPIAIJ*)B->data;
1940   A_diag      = a->A;
1941   B_diag      = &b->A;
1942   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1943   A_diag_ncol = A_diag->cmap->N;
1944   B_diag_ilen = sub_B_diag->ilen;
1945   B_diag_i    = sub_B_diag->i;
1946 
1947   /* Set ilen for diagonal of B */
1948   for (i=0; i<A_diag_ncol; i++) {
1949     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1950   }
1951 
1952   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1953   very quickly (=without using MatSetValues), because all writes are local. */
1954   PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag));
1955 
1956   /* copy over the B part */
1957   PetscCall(PetscMalloc1(bi[mb],&cols));
1958   PetscCall(MatSeqAIJGetArrayRead(a->B,&bv));
1959   pbv  = bv;
1960   row  = A->rmap->rstart;
1961   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1962   cols_tmp = cols;
1963   for (i=0; i<mb; i++) {
1964     ncol = bi[i+1]-bi[i];
1965     PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES));
1966     row++;
1967     pbv += ncol; cols_tmp += ncol;
1968   }
1969   PetscCall(PetscFree(cols));
1970   PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv));
1971 
1972   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
1973   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
1974   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1975     *matout = B;
1976   } else {
1977     PetscCall(MatHeaderMerge(A,&B));
1978   }
1979   PetscFunctionReturn(0);
1980 }
1981 
1982 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1983 {
1984   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1985   Mat            a    = aij->A,b = aij->B;
1986   PetscInt       s1,s2,s3;
1987 
1988   PetscFunctionBegin;
1989   PetscCall(MatGetLocalSize(mat,&s2,&s3));
1990   if (rr) {
1991     PetscCall(VecGetLocalSize(rr,&s1));
1992     PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1993     /* Overlap communication with computation. */
1994     PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1995   }
1996   if (ll) {
1997     PetscCall(VecGetLocalSize(ll,&s1));
1998     PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1999     PetscCall((*b->ops->diagonalscale)(b,ll,NULL));
2000   }
2001   /* scale  the diagonal block */
2002   PetscCall((*a->ops->diagonalscale)(a,ll,rr));
2003 
2004   if (rr) {
2005     /* Do a scatter end and then right scale the off-diagonal block */
2006     PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
2007     PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec));
2008   }
2009   PetscFunctionReturn(0);
2010 }
2011 
2012 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2013 {
2014   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2015 
2016   PetscFunctionBegin;
2017   PetscCall(MatSetUnfactored(a->A));
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2022 {
2023   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2024   Mat            a,b,c,d;
2025   PetscBool      flg;
2026 
2027   PetscFunctionBegin;
2028   a = matA->A; b = matA->B;
2029   c = matB->A; d = matB->B;
2030 
2031   PetscCall(MatEqual(a,c,&flg));
2032   if (flg) {
2033     PetscCall(MatEqual(b,d,&flg));
2034   }
2035   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2036   PetscFunctionReturn(0);
2037 }
2038 
2039 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2040 {
2041   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2042   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2043 
2044   PetscFunctionBegin;
2045   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2046   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2047     /* because of the column compression in the off-processor part of the matrix a->B,
2048        the number of columns in a->B and b->B may be different, hence we cannot call
2049        the MatCopy() directly on the two parts. If need be, we can provide a more
2050        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2051        then copying the submatrices */
2052     PetscCall(MatCopy_Basic(A,B,str));
2053   } else {
2054     PetscCall(MatCopy(a->A,b->A,str));
2055     PetscCall(MatCopy(a->B,b->B,str));
2056   }
2057   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2058   PetscFunctionReturn(0);
2059 }
2060 
2061 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2062 {
2063   PetscFunctionBegin;
2064   PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 /*
2069    Computes the number of nonzeros per row needed for preallocation when X and Y
2070    have different nonzero structure.
2071 */
2072 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2073 {
2074   PetscInt       i,j,k,nzx,nzy;
2075 
2076   PetscFunctionBegin;
2077   /* Set the number of nonzeros in the new matrix */
2078   for (i=0; i<m; i++) {
2079     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2080     nzx = xi[i+1] - xi[i];
2081     nzy = yi[i+1] - yi[i];
2082     nnz[i] = 0;
2083     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2084       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2085       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2086       nnz[i]++;
2087     }
2088     for (; k<nzy; k++) nnz[i]++;
2089   }
2090   PetscFunctionReturn(0);
2091 }
2092 
2093 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2094 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2095 {
2096   PetscInt       m = Y->rmap->N;
2097   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2098   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2099 
2100   PetscFunctionBegin;
2101   PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz));
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2106 {
2107   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2108 
2109   PetscFunctionBegin;
2110   if (str == SAME_NONZERO_PATTERN) {
2111     PetscCall(MatAXPY(yy->A,a,xx->A,str));
2112     PetscCall(MatAXPY(yy->B,a,xx->B,str));
2113   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2114     PetscCall(MatAXPY_Basic(Y,a,X,str));
2115   } else {
2116     Mat      B;
2117     PetscInt *nnz_d,*nnz_o;
2118 
2119     PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d));
2120     PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o));
2121     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
2122     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
2123     PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap));
2124     PetscCall(MatSetType(B,((PetscObject)Y)->type_name));
2125     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d));
2126     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
2127     PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o));
2128     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
2129     PetscCall(MatHeaderMerge(Y,&B));
2130     PetscCall(PetscFree(nnz_d));
2131     PetscCall(PetscFree(nnz_o));
2132   }
2133   PetscFunctionReturn(0);
2134 }
2135 
2136 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2137 
2138 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2139 {
2140   PetscFunctionBegin;
2141   if (PetscDefined(USE_COMPLEX)) {
2142     Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2143 
2144     PetscCall(MatConjugate_SeqAIJ(aij->A));
2145     PetscCall(MatConjugate_SeqAIJ(aij->B));
2146   }
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2151 {
2152   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2153 
2154   PetscFunctionBegin;
2155   PetscCall(MatRealPart(a->A));
2156   PetscCall(MatRealPart(a->B));
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2161 {
2162   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2163 
2164   PetscFunctionBegin;
2165   PetscCall(MatImaginaryPart(a->A));
2166   PetscCall(MatImaginaryPart(a->B));
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2171 {
2172   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2173   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2174   PetscScalar       *va,*vv;
2175   Vec               vB,vA;
2176   const PetscScalar *vb;
2177 
2178   PetscFunctionBegin;
2179   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA));
2180   PetscCall(MatGetRowMaxAbs(a->A,vA,idx));
2181 
2182   PetscCall(VecGetArrayWrite(vA,&va));
2183   if (idx) {
2184     for (i=0; i<m; i++) {
2185       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2186     }
2187   }
2188 
2189   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB));
2190   PetscCall(PetscMalloc1(m,&idxb));
2191   PetscCall(MatGetRowMaxAbs(a->B,vB,idxb));
2192 
2193   PetscCall(VecGetArrayWrite(v,&vv));
2194   PetscCall(VecGetArrayRead(vB,&vb));
2195   for (i=0; i<m; i++) {
2196     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2197       vv[i] = vb[i];
2198       if (idx) idx[i] = a->garray[idxb[i]];
2199     } else {
2200       vv[i] = va[i];
2201       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2202         idx[i] = a->garray[idxb[i]];
2203     }
2204   }
2205   PetscCall(VecRestoreArrayWrite(vA,&vv));
2206   PetscCall(VecRestoreArrayWrite(vA,&va));
2207   PetscCall(VecRestoreArrayRead(vB,&vb));
2208   PetscCall(PetscFree(idxb));
2209   PetscCall(VecDestroy(&vA));
2210   PetscCall(VecDestroy(&vB));
2211   PetscFunctionReturn(0);
2212 }
2213 
2214 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2215 {
2216   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2217   PetscInt          m = A->rmap->n,n = A->cmap->n;
2218   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2219   PetscInt          *cmap  = mat->garray;
2220   PetscInt          *diagIdx, *offdiagIdx;
2221   Vec               diagV, offdiagV;
2222   PetscScalar       *a, *diagA, *offdiagA;
2223   const PetscScalar *ba,*bav;
2224   PetscInt          r,j,col,ncols,*bi,*bj;
2225   Mat               B = mat->B;
2226   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2227 
2228   PetscFunctionBegin;
2229   /* When a process holds entire A and other processes have no entry */
2230   if (A->cmap->N == n) {
2231     PetscCall(VecGetArrayWrite(v,&diagA));
2232     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2233     PetscCall(MatGetRowMinAbs(mat->A,diagV,idx));
2234     PetscCall(VecDestroy(&diagV));
2235     PetscCall(VecRestoreArrayWrite(v,&diagA));
2236     PetscFunctionReturn(0);
2237   } else if (n == 0) {
2238     if (m) {
2239       PetscCall(VecGetArrayWrite(v,&a));
2240       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2241       PetscCall(VecRestoreArrayWrite(v,&a));
2242     }
2243     PetscFunctionReturn(0);
2244   }
2245 
2246   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2247   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2248   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2249   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2250 
2251   /* Get offdiagIdx[] for implicit 0.0 */
2252   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2253   ba   = bav;
2254   bi   = b->i;
2255   bj   = b->j;
2256   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2257   for (r = 0; r < m; r++) {
2258     ncols = bi[r+1] - bi[r];
2259     if (ncols == A->cmap->N - n) { /* Brow is dense */
2260       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2261     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2262       offdiagA[r] = 0.0;
2263 
2264       /* Find first hole in the cmap */
2265       for (j=0; j<ncols; j++) {
2266         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2267         if (col > j && j < cstart) {
2268           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2269           break;
2270         } else if (col > j + n && j >= cstart) {
2271           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2272           break;
2273         }
2274       }
2275       if (j == ncols && ncols < A->cmap->N - n) {
2276         /* a hole is outside compressed Bcols */
2277         if (ncols == 0) {
2278           if (cstart) {
2279             offdiagIdx[r] = 0;
2280           } else offdiagIdx[r] = cend;
2281         } else { /* ncols > 0 */
2282           offdiagIdx[r] = cmap[ncols-1] + 1;
2283           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2284         }
2285       }
2286     }
2287 
2288     for (j=0; j<ncols; j++) {
2289       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2290       ba++; bj++;
2291     }
2292   }
2293 
2294   PetscCall(VecGetArrayWrite(v, &a));
2295   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2296   for (r = 0; r < m; ++r) {
2297     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2298       a[r]   = diagA[r];
2299       if (idx) idx[r] = cstart + diagIdx[r];
2300     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2301       a[r] = diagA[r];
2302       if (idx) {
2303         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2304           idx[r] = cstart + diagIdx[r];
2305         } else idx[r] = offdiagIdx[r];
2306       }
2307     } else {
2308       a[r]   = offdiagA[r];
2309       if (idx) idx[r] = offdiagIdx[r];
2310     }
2311   }
2312   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2313   PetscCall(VecRestoreArrayWrite(v, &a));
2314   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2315   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2316   PetscCall(VecDestroy(&diagV));
2317   PetscCall(VecDestroy(&offdiagV));
2318   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2319   PetscFunctionReturn(0);
2320 }
2321 
2322 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2323 {
2324   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2325   PetscInt          m = A->rmap->n,n = A->cmap->n;
2326   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2327   PetscInt          *cmap  = mat->garray;
2328   PetscInt          *diagIdx, *offdiagIdx;
2329   Vec               diagV, offdiagV;
2330   PetscScalar       *a, *diagA, *offdiagA;
2331   const PetscScalar *ba,*bav;
2332   PetscInt          r,j,col,ncols,*bi,*bj;
2333   Mat               B = mat->B;
2334   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2335 
2336   PetscFunctionBegin;
2337   /* When a process holds entire A and other processes have no entry */
2338   if (A->cmap->N == n) {
2339     PetscCall(VecGetArrayWrite(v,&diagA));
2340     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2341     PetscCall(MatGetRowMin(mat->A,diagV,idx));
2342     PetscCall(VecDestroy(&diagV));
2343     PetscCall(VecRestoreArrayWrite(v,&diagA));
2344     PetscFunctionReturn(0);
2345   } else if (n == 0) {
2346     if (m) {
2347       PetscCall(VecGetArrayWrite(v,&a));
2348       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2349       PetscCall(VecRestoreArrayWrite(v,&a));
2350     }
2351     PetscFunctionReturn(0);
2352   }
2353 
2354   PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx));
2355   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2356   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2357   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2358 
2359   /* Get offdiagIdx[] for implicit 0.0 */
2360   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2361   ba   = bav;
2362   bi   = b->i;
2363   bj   = b->j;
2364   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2365   for (r = 0; r < m; r++) {
2366     ncols = bi[r+1] - bi[r];
2367     if (ncols == A->cmap->N - n) { /* Brow is dense */
2368       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2369     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2370       offdiagA[r] = 0.0;
2371 
2372       /* Find first hole in the cmap */
2373       for (j=0; j<ncols; j++) {
2374         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2375         if (col > j && j < cstart) {
2376           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2377           break;
2378         } else if (col > j + n && j >= cstart) {
2379           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2380           break;
2381         }
2382       }
2383       if (j == ncols && ncols < A->cmap->N - n) {
2384         /* a hole is outside compressed Bcols */
2385         if (ncols == 0) {
2386           if (cstart) {
2387             offdiagIdx[r] = 0;
2388           } else offdiagIdx[r] = cend;
2389         } else { /* ncols > 0 */
2390           offdiagIdx[r] = cmap[ncols-1] + 1;
2391           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2392         }
2393       }
2394     }
2395 
2396     for (j=0; j<ncols; j++) {
2397       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2398       ba++; bj++;
2399     }
2400   }
2401 
2402   PetscCall(VecGetArrayWrite(v, &a));
2403   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2404   for (r = 0; r < m; ++r) {
2405     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2406       a[r]   = diagA[r];
2407       if (idx) idx[r] = cstart + diagIdx[r];
2408     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2409       a[r] = diagA[r];
2410       if (idx) {
2411         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2412           idx[r] = cstart + diagIdx[r];
2413         } else idx[r] = offdiagIdx[r];
2414       }
2415     } else {
2416       a[r]   = offdiagA[r];
2417       if (idx) idx[r] = offdiagIdx[r];
2418     }
2419   }
2420   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2421   PetscCall(VecRestoreArrayWrite(v, &a));
2422   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2423   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2424   PetscCall(VecDestroy(&diagV));
2425   PetscCall(VecDestroy(&offdiagV));
2426   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2431 {
2432   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2433   PetscInt          m = A->rmap->n,n = A->cmap->n;
2434   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2435   PetscInt          *cmap  = mat->garray;
2436   PetscInt          *diagIdx, *offdiagIdx;
2437   Vec               diagV, offdiagV;
2438   PetscScalar       *a, *diagA, *offdiagA;
2439   const PetscScalar *ba,*bav;
2440   PetscInt          r,j,col,ncols,*bi,*bj;
2441   Mat               B = mat->B;
2442   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2443 
2444   PetscFunctionBegin;
2445   /* When a process holds entire A and other processes have no entry */
2446   if (A->cmap->N == n) {
2447     PetscCall(VecGetArrayWrite(v,&diagA));
2448     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2449     PetscCall(MatGetRowMax(mat->A,diagV,idx));
2450     PetscCall(VecDestroy(&diagV));
2451     PetscCall(VecRestoreArrayWrite(v,&diagA));
2452     PetscFunctionReturn(0);
2453   } else if (n == 0) {
2454     if (m) {
2455       PetscCall(VecGetArrayWrite(v,&a));
2456       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2457       PetscCall(VecRestoreArrayWrite(v,&a));
2458     }
2459     PetscFunctionReturn(0);
2460   }
2461 
2462   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2463   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2464   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2465   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2466 
2467   /* Get offdiagIdx[] for implicit 0.0 */
2468   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2469   ba   = bav;
2470   bi   = b->i;
2471   bj   = b->j;
2472   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2473   for (r = 0; r < m; r++) {
2474     ncols = bi[r+1] - bi[r];
2475     if (ncols == A->cmap->N - n) { /* Brow is dense */
2476       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2477     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2478       offdiagA[r] = 0.0;
2479 
2480       /* Find first hole in the cmap */
2481       for (j=0; j<ncols; j++) {
2482         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2483         if (col > j && j < cstart) {
2484           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2485           break;
2486         } else if (col > j + n && j >= cstart) {
2487           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2488           break;
2489         }
2490       }
2491       if (j == ncols && ncols < A->cmap->N - n) {
2492         /* a hole is outside compressed Bcols */
2493         if (ncols == 0) {
2494           if (cstart) {
2495             offdiagIdx[r] = 0;
2496           } else offdiagIdx[r] = cend;
2497         } else { /* ncols > 0 */
2498           offdiagIdx[r] = cmap[ncols-1] + 1;
2499           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2500         }
2501       }
2502     }
2503 
2504     for (j=0; j<ncols; j++) {
2505       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2506       ba++; bj++;
2507     }
2508   }
2509 
2510   PetscCall(VecGetArrayWrite(v,    &a));
2511   PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA));
2512   for (r = 0; r < m; ++r) {
2513     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2514       a[r] = diagA[r];
2515       if (idx) idx[r] = cstart + diagIdx[r];
2516     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2517       a[r] = diagA[r];
2518       if (idx) {
2519         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2520           idx[r] = cstart + diagIdx[r];
2521         } else idx[r] = offdiagIdx[r];
2522       }
2523     } else {
2524       a[r] = offdiagA[r];
2525       if (idx) idx[r] = offdiagIdx[r];
2526     }
2527   }
2528   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2529   PetscCall(VecRestoreArrayWrite(v,       &a));
2530   PetscCall(VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA));
2531   PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA));
2532   PetscCall(VecDestroy(&diagV));
2533   PetscCall(VecDestroy(&offdiagV));
2534   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2535   PetscFunctionReturn(0);
2536 }
2537 
2538 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2539 {
2540   Mat            *dummy;
2541 
2542   PetscFunctionBegin;
2543   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy));
2544   *newmat = *dummy;
2545   PetscCall(PetscFree(dummy));
2546   PetscFunctionReturn(0);
2547 }
2548 
2549 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2550 {
2551   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2552 
2553   PetscFunctionBegin;
2554   PetscCall(MatInvertBlockDiagonal(a->A,values));
2555   A->factorerrortype = a->A->factorerrortype;
2556   PetscFunctionReturn(0);
2557 }
2558 
2559 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2560 {
2561   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2562 
2563   PetscFunctionBegin;
2564   PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2565   PetscCall(MatSetRandom(aij->A,rctx));
2566   if (x->assembled) {
2567     PetscCall(MatSetRandom(aij->B,rctx));
2568   } else {
2569     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx));
2570   }
2571   PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY));
2572   PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY));
2573   PetscFunctionReturn(0);
2574 }
2575 
2576 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2577 {
2578   PetscFunctionBegin;
2579   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2580   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 /*@
2585    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2586 
2587    Collective on Mat
2588 
2589    Input Parameters:
2590 +    A - the matrix
2591 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2592 
2593  Level: advanced
2594 
2595 @*/
2596 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2597 {
2598   PetscFunctionBegin;
2599   PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));
2600   PetscFunctionReturn(0);
2601 }
2602 
2603 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2604 {
2605   PetscBool            sc = PETSC_FALSE,flg;
2606 
2607   PetscFunctionBegin;
2608   PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options");
2609   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2610   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg));
2611   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc));
2612   PetscOptionsHeadEnd();
2613   PetscFunctionReturn(0);
2614 }
2615 
2616 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2617 {
2618   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2619   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2620 
2621   PetscFunctionBegin;
2622   if (!Y->preallocated) {
2623     PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL));
2624   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2625     PetscInt nonew = aij->nonew;
2626     PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL));
2627     aij->nonew = nonew;
2628   }
2629   PetscCall(MatShift_Basic(Y,a));
2630   PetscFunctionReturn(0);
2631 }
2632 
2633 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2634 {
2635   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2636 
2637   PetscFunctionBegin;
2638   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2639   PetscCall(MatMissingDiagonal(a->A,missing,d));
2640   if (d) {
2641     PetscInt rstart;
2642     PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
2643     *d += rstart;
2644 
2645   }
2646   PetscFunctionReturn(0);
2647 }
2648 
2649 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2650 {
2651   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2652 
2653   PetscFunctionBegin;
2654   PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag));
2655   PetscFunctionReturn(0);
2656 }
2657 
2658 /* -------------------------------------------------------------------*/
2659 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2660                                        MatGetRow_MPIAIJ,
2661                                        MatRestoreRow_MPIAIJ,
2662                                        MatMult_MPIAIJ,
2663                                 /* 4*/ MatMultAdd_MPIAIJ,
2664                                        MatMultTranspose_MPIAIJ,
2665                                        MatMultTransposeAdd_MPIAIJ,
2666                                        NULL,
2667                                        NULL,
2668                                        NULL,
2669                                 /*10*/ NULL,
2670                                        NULL,
2671                                        NULL,
2672                                        MatSOR_MPIAIJ,
2673                                        MatTranspose_MPIAIJ,
2674                                 /*15*/ MatGetInfo_MPIAIJ,
2675                                        MatEqual_MPIAIJ,
2676                                        MatGetDiagonal_MPIAIJ,
2677                                        MatDiagonalScale_MPIAIJ,
2678                                        MatNorm_MPIAIJ,
2679                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2680                                        MatAssemblyEnd_MPIAIJ,
2681                                        MatSetOption_MPIAIJ,
2682                                        MatZeroEntries_MPIAIJ,
2683                                 /*24*/ MatZeroRows_MPIAIJ,
2684                                        NULL,
2685                                        NULL,
2686                                        NULL,
2687                                        NULL,
2688                                 /*29*/ MatSetUp_MPIAIJ,
2689                                        NULL,
2690                                        NULL,
2691                                        MatGetDiagonalBlock_MPIAIJ,
2692                                        NULL,
2693                                 /*34*/ MatDuplicate_MPIAIJ,
2694                                        NULL,
2695                                        NULL,
2696                                        NULL,
2697                                        NULL,
2698                                 /*39*/ MatAXPY_MPIAIJ,
2699                                        MatCreateSubMatrices_MPIAIJ,
2700                                        MatIncreaseOverlap_MPIAIJ,
2701                                        MatGetValues_MPIAIJ,
2702                                        MatCopy_MPIAIJ,
2703                                 /*44*/ MatGetRowMax_MPIAIJ,
2704                                        MatScale_MPIAIJ,
2705                                        MatShift_MPIAIJ,
2706                                        MatDiagonalSet_MPIAIJ,
2707                                        MatZeroRowsColumns_MPIAIJ,
2708                                 /*49*/ MatSetRandom_MPIAIJ,
2709                                        MatGetRowIJ_MPIAIJ,
2710                                        MatRestoreRowIJ_MPIAIJ,
2711                                        NULL,
2712                                        NULL,
2713                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2714                                        NULL,
2715                                        MatSetUnfactored_MPIAIJ,
2716                                        MatPermute_MPIAIJ,
2717                                        NULL,
2718                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2719                                        MatDestroy_MPIAIJ,
2720                                        MatView_MPIAIJ,
2721                                        NULL,
2722                                        NULL,
2723                                 /*64*/ NULL,
2724                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2725                                        NULL,
2726                                        NULL,
2727                                        NULL,
2728                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2729                                        MatGetRowMinAbs_MPIAIJ,
2730                                        NULL,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                 /*75*/ MatFDColoringApply_AIJ,
2735                                        MatSetFromOptions_MPIAIJ,
2736                                        NULL,
2737                                        NULL,
2738                                        MatFindZeroDiagonals_MPIAIJ,
2739                                 /*80*/ NULL,
2740                                        NULL,
2741                                        NULL,
2742                                 /*83*/ MatLoad_MPIAIJ,
2743                                        MatIsSymmetric_MPIAIJ,
2744                                        NULL,
2745                                        NULL,
2746                                        NULL,
2747                                        NULL,
2748                                 /*89*/ NULL,
2749                                        NULL,
2750                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2751                                        NULL,
2752                                        NULL,
2753                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2754                                        NULL,
2755                                        NULL,
2756                                        NULL,
2757                                        MatBindToCPU_MPIAIJ,
2758                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2759                                        NULL,
2760                                        NULL,
2761                                        MatConjugate_MPIAIJ,
2762                                        NULL,
2763                                 /*104*/MatSetValuesRow_MPIAIJ,
2764                                        MatRealPart_MPIAIJ,
2765                                        MatImaginaryPart_MPIAIJ,
2766                                        NULL,
2767                                        NULL,
2768                                 /*109*/NULL,
2769                                        NULL,
2770                                        MatGetRowMin_MPIAIJ,
2771                                        NULL,
2772                                        MatMissingDiagonal_MPIAIJ,
2773                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2774                                        NULL,
2775                                        MatGetGhosts_MPIAIJ,
2776                                        NULL,
2777                                        NULL,
2778                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2779                                        NULL,
2780                                        NULL,
2781                                        NULL,
2782                                        MatGetMultiProcBlock_MPIAIJ,
2783                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2784                                        MatGetColumnReductions_MPIAIJ,
2785                                        MatInvertBlockDiagonal_MPIAIJ,
2786                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2787                                        MatCreateSubMatricesMPI_MPIAIJ,
2788                                 /*129*/NULL,
2789                                        NULL,
2790                                        NULL,
2791                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2792                                        NULL,
2793                                 /*134*/NULL,
2794                                        NULL,
2795                                        NULL,
2796                                        NULL,
2797                                        NULL,
2798                                 /*139*/MatSetBlockSizes_MPIAIJ,
2799                                        NULL,
2800                                        NULL,
2801                                        MatFDColoringSetUp_MPIXAIJ,
2802                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2803                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2804                                 /*145*/NULL,
2805                                        NULL,
2806                                        NULL
2807 };
2808 
2809 /* ----------------------------------------------------------------------------------------*/
2810 
2811 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2812 {
2813   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2814 
2815   PetscFunctionBegin;
2816   PetscCall(MatStoreValues(aij->A));
2817   PetscCall(MatStoreValues(aij->B));
2818   PetscFunctionReturn(0);
2819 }
2820 
2821 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2822 {
2823   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2824 
2825   PetscFunctionBegin;
2826   PetscCall(MatRetrieveValues(aij->A));
2827   PetscCall(MatRetrieveValues(aij->B));
2828   PetscFunctionReturn(0);
2829 }
2830 
2831 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2832 {
2833   Mat_MPIAIJ     *b;
2834   PetscMPIInt    size;
2835 
2836   PetscFunctionBegin;
2837   PetscCall(PetscLayoutSetUp(B->rmap));
2838   PetscCall(PetscLayoutSetUp(B->cmap));
2839   b = (Mat_MPIAIJ*)B->data;
2840 
2841 #if defined(PETSC_USE_CTABLE)
2842   PetscCall(PetscTableDestroy(&b->colmap));
2843 #else
2844   PetscCall(PetscFree(b->colmap));
2845 #endif
2846   PetscCall(PetscFree(b->garray));
2847   PetscCall(VecDestroy(&b->lvec));
2848   PetscCall(VecScatterDestroy(&b->Mvctx));
2849 
2850   /* Because the B will have been resized we simply destroy it and create a new one each time */
2851   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
2852   PetscCall(MatDestroy(&b->B));
2853   PetscCall(MatCreate(PETSC_COMM_SELF,&b->B));
2854   PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
2855   PetscCall(MatSetBlockSizesFromMats(b->B,B,B));
2856   PetscCall(MatSetType(b->B,MATSEQAIJ));
2857   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
2858 
2859   if (!B->preallocated) {
2860     PetscCall(MatCreate(PETSC_COMM_SELF,&b->A));
2861     PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
2862     PetscCall(MatSetBlockSizesFromMats(b->A,B,B));
2863     PetscCall(MatSetType(b->A,MATSEQAIJ));
2864     PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
2865   }
2866 
2867   PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz));
2868   PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz));
2869   B->preallocated  = PETSC_TRUE;
2870   B->was_assembled = PETSC_FALSE;
2871   B->assembled     = PETSC_FALSE;
2872   PetscFunctionReturn(0);
2873 }
2874 
2875 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2876 {
2877   Mat_MPIAIJ     *b;
2878 
2879   PetscFunctionBegin;
2880   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2881   PetscCall(PetscLayoutSetUp(B->rmap));
2882   PetscCall(PetscLayoutSetUp(B->cmap));
2883   b = (Mat_MPIAIJ*)B->data;
2884 
2885 #if defined(PETSC_USE_CTABLE)
2886   PetscCall(PetscTableDestroy(&b->colmap));
2887 #else
2888   PetscCall(PetscFree(b->colmap));
2889 #endif
2890   PetscCall(PetscFree(b->garray));
2891   PetscCall(VecDestroy(&b->lvec));
2892   PetscCall(VecScatterDestroy(&b->Mvctx));
2893 
2894   PetscCall(MatResetPreallocation(b->A));
2895   PetscCall(MatResetPreallocation(b->B));
2896   B->preallocated  = PETSC_TRUE;
2897   B->was_assembled = PETSC_FALSE;
2898   B->assembled = PETSC_FALSE;
2899   PetscFunctionReturn(0);
2900 }
2901 
2902 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2903 {
2904   Mat            mat;
2905   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2906 
2907   PetscFunctionBegin;
2908   *newmat = NULL;
2909   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
2910   PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
2911   PetscCall(MatSetBlockSizesFromMats(mat,matin,matin));
2912   PetscCall(MatSetType(mat,((PetscObject)matin)->type_name));
2913   a       = (Mat_MPIAIJ*)mat->data;
2914 
2915   mat->factortype   = matin->factortype;
2916   mat->assembled    = matin->assembled;
2917   mat->insertmode   = NOT_SET_VALUES;
2918   mat->preallocated = matin->preallocated;
2919 
2920   a->size         = oldmat->size;
2921   a->rank         = oldmat->rank;
2922   a->donotstash   = oldmat->donotstash;
2923   a->roworiented  = oldmat->roworiented;
2924   a->rowindices   = NULL;
2925   a->rowvalues    = NULL;
2926   a->getrowactive = PETSC_FALSE;
2927 
2928   PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap));
2929   PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap));
2930 
2931   if (oldmat->colmap) {
2932 #if defined(PETSC_USE_CTABLE)
2933     PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
2934 #else
2935     PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap));
2936     PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt)));
2937     PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N));
2938 #endif
2939   } else a->colmap = NULL;
2940   if (oldmat->garray) {
2941     PetscInt len;
2942     len  = oldmat->B->cmap->n;
2943     PetscCall(PetscMalloc1(len+1,&a->garray));
2944     PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
2945     if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len));
2946   } else a->garray = NULL;
2947 
2948   /* It may happen MatDuplicate is called with a non-assembled matrix
2949      In fact, MatDuplicate only requires the matrix to be preallocated
2950      This may happen inside a DMCreateMatrix_Shell */
2951   if (oldmat->lvec) {
2952     PetscCall(VecDuplicate(oldmat->lvec,&a->lvec));
2953     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
2954   }
2955   if (oldmat->Mvctx) {
2956     PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
2957     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
2958   }
2959   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2960   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2961   PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B));
2962   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
2963   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
2964   *newmat = mat;
2965   PetscFunctionReturn(0);
2966 }
2967 
2968 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2969 {
2970   PetscBool      isbinary, ishdf5;
2971 
2972   PetscFunctionBegin;
2973   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2974   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2975   /* force binary viewer to load .info file if it has not yet done so */
2976   PetscCall(PetscViewerSetUp(viewer));
2977   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2978   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
2979   if (isbinary) {
2980     PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer));
2981   } else if (ishdf5) {
2982 #if defined(PETSC_HAVE_HDF5)
2983     PetscCall(MatLoad_AIJ_HDF5(newMat,viewer));
2984 #else
2985     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2986 #endif
2987   } else {
2988     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2989   }
2990   PetscFunctionReturn(0);
2991 }
2992 
2993 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
2994 {
2995   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
2996   PetscInt       *rowidxs,*colidxs;
2997   PetscScalar    *matvals;
2998 
2999   PetscFunctionBegin;
3000   PetscCall(PetscViewerSetUp(viewer));
3001 
3002   /* read in matrix header */
3003   PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT));
3004   PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3005   M  = header[1]; N = header[2]; nz = header[3];
3006   PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
3007   PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
3008   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3009 
3010   /* set block sizes from the viewer's .info file */
3011   PetscCall(MatLoad_Binary_BlockSizes(mat,viewer));
3012   /* set global sizes if not set already */
3013   if (mat->rmap->N < 0) mat->rmap->N = M;
3014   if (mat->cmap->N < 0) mat->cmap->N = N;
3015   PetscCall(PetscLayoutSetUp(mat->rmap));
3016   PetscCall(PetscLayoutSetUp(mat->cmap));
3017 
3018   /* check if the matrix sizes are correct */
3019   PetscCall(MatGetSize(mat,&rows,&cols));
3020   PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3021 
3022   /* read in row lengths and build row indices */
3023   PetscCall(MatGetLocalSize(mat,&m,NULL));
3024   PetscCall(PetscMalloc1(m+1,&rowidxs));
3025   PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT));
3026   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3027   PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer)));
3028   PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3029   /* read in column indices and matrix values */
3030   PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals));
3031   PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
3032   PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
3033   /* store matrix indices and values */
3034   PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals));
3035   PetscCall(PetscFree(rowidxs));
3036   PetscCall(PetscFree2(colidxs,matvals));
3037   PetscFunctionReturn(0);
3038 }
3039 
3040 /* Not scalable because of ISAllGather() unless getting all columns. */
3041 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3042 {
3043   IS             iscol_local;
3044   PetscBool      isstride;
3045   PetscMPIInt    lisstride=0,gisstride;
3046 
3047   PetscFunctionBegin;
3048   /* check if we are grabbing all columns*/
3049   PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride));
3050 
3051   if (isstride) {
3052     PetscInt  start,len,mstart,mlen;
3053     PetscCall(ISStrideGetInfo(iscol,&start,NULL));
3054     PetscCall(ISGetLocalSize(iscol,&len));
3055     PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen));
3056     if (mstart == start && mlen-mstart == len) lisstride = 1;
3057   }
3058 
3059   PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat)));
3060   if (gisstride) {
3061     PetscInt N;
3062     PetscCall(MatGetSize(mat,NULL,&N));
3063     PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local));
3064     PetscCall(ISSetIdentity(iscol_local));
3065     PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3066   } else {
3067     PetscInt cbs;
3068     PetscCall(ISGetBlockSize(iscol,&cbs));
3069     PetscCall(ISAllGather(iscol,&iscol_local));
3070     PetscCall(ISSetBlockSize(iscol_local,cbs));
3071   }
3072 
3073   *isseq = iscol_local;
3074   PetscFunctionReturn(0);
3075 }
3076 
3077 /*
3078  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3079  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3080 
3081  Input Parameters:
3082    mat - matrix
3083    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3084            i.e., mat->rstart <= isrow[i] < mat->rend
3085    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3086            i.e., mat->cstart <= iscol[i] < mat->cend
3087  Output Parameter:
3088    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3089    iscol_o - sequential column index set for retrieving mat->B
3090    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3091  */
3092 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3093 {
3094   Vec            x,cmap;
3095   const PetscInt *is_idx;
3096   PetscScalar    *xarray,*cmaparray;
3097   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3098   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3099   Mat            B=a->B;
3100   Vec            lvec=a->lvec,lcmap;
3101   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3102   MPI_Comm       comm;
3103   VecScatter     Mvctx=a->Mvctx;
3104 
3105   PetscFunctionBegin;
3106   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3107   PetscCall(ISGetLocalSize(iscol,&ncols));
3108 
3109   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3110   PetscCall(MatCreateVecs(mat,&x,NULL));
3111   PetscCall(VecSet(x,-1.0));
3112   PetscCall(VecDuplicate(x,&cmap));
3113   PetscCall(VecSet(cmap,-1.0));
3114 
3115   /* Get start indices */
3116   PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm));
3117   isstart -= ncols;
3118   PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend));
3119 
3120   PetscCall(ISGetIndices(iscol,&is_idx));
3121   PetscCall(VecGetArray(x,&xarray));
3122   PetscCall(VecGetArray(cmap,&cmaparray));
3123   PetscCall(PetscMalloc1(ncols,&idx));
3124   for (i=0; i<ncols; i++) {
3125     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3126     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3127     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3128   }
3129   PetscCall(VecRestoreArray(x,&xarray));
3130   PetscCall(VecRestoreArray(cmap,&cmaparray));
3131   PetscCall(ISRestoreIndices(iscol,&is_idx));
3132 
3133   /* Get iscol_d */
3134   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d));
3135   PetscCall(ISGetBlockSize(iscol,&i));
3136   PetscCall(ISSetBlockSize(*iscol_d,i));
3137 
3138   /* Get isrow_d */
3139   PetscCall(ISGetLocalSize(isrow,&m));
3140   rstart = mat->rmap->rstart;
3141   PetscCall(PetscMalloc1(m,&idx));
3142   PetscCall(ISGetIndices(isrow,&is_idx));
3143   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3144   PetscCall(ISRestoreIndices(isrow,&is_idx));
3145 
3146   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d));
3147   PetscCall(ISGetBlockSize(isrow,&i));
3148   PetscCall(ISSetBlockSize(*isrow_d,i));
3149 
3150   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3151   PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3152   PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3153 
3154   PetscCall(VecDuplicate(lvec,&lcmap));
3155 
3156   PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3157   PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3158 
3159   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3160   /* off-process column indices */
3161   count = 0;
3162   PetscCall(PetscMalloc1(Bn,&idx));
3163   PetscCall(PetscMalloc1(Bn,&cmap1));
3164 
3165   PetscCall(VecGetArray(lvec,&xarray));
3166   PetscCall(VecGetArray(lcmap,&cmaparray));
3167   for (i=0; i<Bn; i++) {
3168     if (PetscRealPart(xarray[i]) > -1.0) {
3169       idx[count]     = i;                   /* local column index in off-diagonal part B */
3170       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3171       count++;
3172     }
3173   }
3174   PetscCall(VecRestoreArray(lvec,&xarray));
3175   PetscCall(VecRestoreArray(lcmap,&cmaparray));
3176 
3177   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o));
3178   /* cannot ensure iscol_o has same blocksize as iscol! */
3179 
3180   PetscCall(PetscFree(idx));
3181   *garray = cmap1;
3182 
3183   PetscCall(VecDestroy(&x));
3184   PetscCall(VecDestroy(&cmap));
3185   PetscCall(VecDestroy(&lcmap));
3186   PetscFunctionReturn(0);
3187 }
3188 
3189 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3190 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3191 {
3192   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3193   Mat            M = NULL;
3194   MPI_Comm       comm;
3195   IS             iscol_d,isrow_d,iscol_o;
3196   Mat            Asub = NULL,Bsub = NULL;
3197   PetscInt       n;
3198 
3199   PetscFunctionBegin;
3200   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3201 
3202   if (call == MAT_REUSE_MATRIX) {
3203     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3204     PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d));
3205     PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3206 
3207     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d));
3208     PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3209 
3210     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o));
3211     PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3212 
3213     /* Update diagonal and off-diagonal portions of submat */
3214     asub = (Mat_MPIAIJ*)(*submat)->data;
3215     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A));
3216     PetscCall(ISGetLocalSize(iscol_o,&n));
3217     if (n) {
3218       PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B));
3219     }
3220     PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY));
3221     PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY));
3222 
3223   } else { /* call == MAT_INITIAL_MATRIX) */
3224     const PetscInt *garray;
3225     PetscInt        BsubN;
3226 
3227     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3228     PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray));
3229 
3230     /* Create local submatrices Asub and Bsub */
3231     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub));
3232     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub));
3233 
3234     /* Create submatrix M */
3235     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M));
3236 
3237     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3238     asub = (Mat_MPIAIJ*)M->data;
3239 
3240     PetscCall(ISGetLocalSize(iscol_o,&BsubN));
3241     n = asub->B->cmap->N;
3242     if (BsubN > n) {
3243       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3244       const PetscInt *idx;
3245       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3246       PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN));
3247 
3248       PetscCall(PetscMalloc1(n,&idx_new));
3249       j = 0;
3250       PetscCall(ISGetIndices(iscol_o,&idx));
3251       for (i=0; i<n; i++) {
3252         if (j >= BsubN) break;
3253         while (subgarray[i] > garray[j]) j++;
3254 
3255         if (subgarray[i] == garray[j]) {
3256           idx_new[i] = idx[j++];
3257         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]);
3258       }
3259       PetscCall(ISRestoreIndices(iscol_o,&idx));
3260 
3261       PetscCall(ISDestroy(&iscol_o));
3262       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o));
3263 
3264     } else if (BsubN < n) {
3265       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N);
3266     }
3267 
3268     PetscCall(PetscFree(garray));
3269     *submat = M;
3270 
3271     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3272     PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d));
3273     PetscCall(ISDestroy(&isrow_d));
3274 
3275     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d));
3276     PetscCall(ISDestroy(&iscol_d));
3277 
3278     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o));
3279     PetscCall(ISDestroy(&iscol_o));
3280   }
3281   PetscFunctionReturn(0);
3282 }
3283 
3284 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3285 {
3286   IS             iscol_local=NULL,isrow_d;
3287   PetscInt       csize;
3288   PetscInt       n,i,j,start,end;
3289   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3290   MPI_Comm       comm;
3291 
3292   PetscFunctionBegin;
3293   /* If isrow has same processor distribution as mat,
3294      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3295   if (call == MAT_REUSE_MATRIX) {
3296     PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d));
3297     if (isrow_d) {
3298       sameRowDist  = PETSC_TRUE;
3299       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3300     } else {
3301       PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local));
3302       if (iscol_local) {
3303         sameRowDist  = PETSC_TRUE;
3304         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3305       }
3306     }
3307   } else {
3308     /* Check if isrow has same processor distribution as mat */
3309     sameDist[0] = PETSC_FALSE;
3310     PetscCall(ISGetLocalSize(isrow,&n));
3311     if (!n) {
3312       sameDist[0] = PETSC_TRUE;
3313     } else {
3314       PetscCall(ISGetMinMax(isrow,&i,&j));
3315       PetscCall(MatGetOwnershipRange(mat,&start,&end));
3316       if (i >= start && j < end) {
3317         sameDist[0] = PETSC_TRUE;
3318       }
3319     }
3320 
3321     /* Check if iscol has same processor distribution as mat */
3322     sameDist[1] = PETSC_FALSE;
3323     PetscCall(ISGetLocalSize(iscol,&n));
3324     if (!n) {
3325       sameDist[1] = PETSC_TRUE;
3326     } else {
3327       PetscCall(ISGetMinMax(iscol,&i,&j));
3328       PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end));
3329       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3330     }
3331 
3332     PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3333     PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm));
3334     sameRowDist = tsameDist[0];
3335   }
3336 
3337   if (sameRowDist) {
3338     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3339       /* isrow and iscol have same processor distribution as mat */
3340       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat));
3341       PetscFunctionReturn(0);
3342     } else { /* sameRowDist */
3343       /* isrow has same processor distribution as mat */
3344       if (call == MAT_INITIAL_MATRIX) {
3345         PetscBool sorted;
3346         PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3347         PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */
3348         PetscCall(ISGetSize(iscol,&i));
3349         PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i);
3350 
3351         PetscCall(ISSorted(iscol_local,&sorted));
3352         if (sorted) {
3353           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3354           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat));
3355           PetscFunctionReturn(0);
3356         }
3357       } else { /* call == MAT_REUSE_MATRIX */
3358         IS iscol_sub;
3359         PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3360         if (iscol_sub) {
3361           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat));
3362           PetscFunctionReturn(0);
3363         }
3364       }
3365     }
3366   }
3367 
3368   /* General case: iscol -> iscol_local which has global size of iscol */
3369   if (call == MAT_REUSE_MATRIX) {
3370     PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
3371     PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3372   } else {
3373     if (!iscol_local) {
3374       PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3375     }
3376   }
3377 
3378   PetscCall(ISGetLocalSize(iscol,&csize));
3379   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat));
3380 
3381   if (call == MAT_INITIAL_MATRIX) {
3382     PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
3383     PetscCall(ISDestroy(&iscol_local));
3384   }
3385   PetscFunctionReturn(0);
3386 }
3387 
3388 /*@C
3389      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3390          and "off-diagonal" part of the matrix in CSR format.
3391 
3392    Collective
3393 
3394    Input Parameters:
3395 +  comm - MPI communicator
3396 .  A - "diagonal" portion of matrix
3397 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3398 -  garray - global index of B columns
3399 
3400    Output Parameter:
3401 .   mat - the matrix, with input A as its local diagonal matrix
3402    Level: advanced
3403 
3404    Notes:
3405        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3406        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3407 
3408 .seealso: `MatCreateMPIAIJWithSplitArrays()`
3409 @*/
3410 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3411 {
3412   Mat_MPIAIJ        *maij;
3413   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3414   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3415   const PetscScalar *oa;
3416   Mat               Bnew;
3417   PetscInt          m,n,N;
3418 
3419   PetscFunctionBegin;
3420   PetscCall(MatCreate(comm,mat));
3421   PetscCall(MatGetSize(A,&m,&n));
3422   PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N);
3423   PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs);
3424   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3425   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3426 
3427   /* Get global columns of mat */
3428   PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm));
3429 
3430   PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N));
3431   PetscCall(MatSetType(*mat,MATMPIAIJ));
3432   PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs));
3433   maij = (Mat_MPIAIJ*)(*mat)->data;
3434 
3435   (*mat)->preallocated = PETSC_TRUE;
3436 
3437   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3438   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3439 
3440   /* Set A as diagonal portion of *mat */
3441   maij->A = A;
3442 
3443   nz = oi[m];
3444   for (i=0; i<nz; i++) {
3445     col   = oj[i];
3446     oj[i] = garray[col];
3447   }
3448 
3449   /* Set Bnew as off-diagonal portion of *mat */
3450   PetscCall(MatSeqAIJGetArrayRead(B,&oa));
3451   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew));
3452   PetscCall(MatSeqAIJRestoreArrayRead(B,&oa));
3453   bnew        = (Mat_SeqAIJ*)Bnew->data;
3454   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3455   maij->B     = Bnew;
3456 
3457   PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N);
3458 
3459   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3460   b->free_a       = PETSC_FALSE;
3461   b->free_ij      = PETSC_FALSE;
3462   PetscCall(MatDestroy(&B));
3463 
3464   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3465   bnew->free_a       = PETSC_TRUE;
3466   bnew->free_ij      = PETSC_TRUE;
3467 
3468   /* condense columns of maij->B */
3469   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
3470   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
3471   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
3472   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
3473   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3474   PetscFunctionReturn(0);
3475 }
3476 
3477 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3478 
3479 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3480 {
3481   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3482   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3483   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3484   Mat            M,Msub,B=a->B;
3485   MatScalar      *aa;
3486   Mat_SeqAIJ     *aij;
3487   PetscInt       *garray = a->garray,*colsub,Ncols;
3488   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3489   IS             iscol_sub,iscmap;
3490   const PetscInt *is_idx,*cmap;
3491   PetscBool      allcolumns=PETSC_FALSE;
3492   MPI_Comm       comm;
3493 
3494   PetscFunctionBegin;
3495   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3496   if (call == MAT_REUSE_MATRIX) {
3497     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3498     PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3499     PetscCall(ISGetLocalSize(iscol_sub,&count));
3500 
3501     PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap));
3502     PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3503 
3504     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub));
3505     PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3506 
3507     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub));
3508 
3509   } else { /* call == MAT_INITIAL_MATRIX) */
3510     PetscBool flg;
3511 
3512     PetscCall(ISGetLocalSize(iscol,&n));
3513     PetscCall(ISGetSize(iscol,&Ncols));
3514 
3515     /* (1) iscol -> nonscalable iscol_local */
3516     /* Check for special case: each processor gets entire matrix columns */
3517     PetscCall(ISIdentity(iscol_local,&flg));
3518     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3519     PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3520     if (allcolumns) {
3521       iscol_sub = iscol_local;
3522       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3523       PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap));
3524 
3525     } else {
3526       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3527       PetscInt *idx,*cmap1,k;
3528       PetscCall(PetscMalloc1(Ncols,&idx));
3529       PetscCall(PetscMalloc1(Ncols,&cmap1));
3530       PetscCall(ISGetIndices(iscol_local,&is_idx));
3531       count = 0;
3532       k     = 0;
3533       for (i=0; i<Ncols; i++) {
3534         j = is_idx[i];
3535         if (j >= cstart && j < cend) {
3536           /* diagonal part of mat */
3537           idx[count]     = j;
3538           cmap1[count++] = i; /* column index in submat */
3539         } else if (Bn) {
3540           /* off-diagonal part of mat */
3541           if (j == garray[k]) {
3542             idx[count]     = j;
3543             cmap1[count++] = i;  /* column index in submat */
3544           } else if (j > garray[k]) {
3545             while (j > garray[k] && k < Bn-1) k++;
3546             if (j == garray[k]) {
3547               idx[count]     = j;
3548               cmap1[count++] = i; /* column index in submat */
3549             }
3550           }
3551         }
3552       }
3553       PetscCall(ISRestoreIndices(iscol_local,&is_idx));
3554 
3555       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub));
3556       PetscCall(ISGetBlockSize(iscol,&cbs));
3557       PetscCall(ISSetBlockSize(iscol_sub,cbs));
3558 
3559       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap));
3560     }
3561 
3562     /* (3) Create sequential Msub */
3563     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub));
3564   }
3565 
3566   PetscCall(ISGetLocalSize(iscol_sub,&count));
3567   aij  = (Mat_SeqAIJ*)(Msub)->data;
3568   ii   = aij->i;
3569   PetscCall(ISGetIndices(iscmap,&cmap));
3570 
3571   /*
3572       m - number of local rows
3573       Ncols - number of columns (same on all processors)
3574       rstart - first row in new global matrix generated
3575   */
3576   PetscCall(MatGetSize(Msub,&m,NULL));
3577 
3578   if (call == MAT_INITIAL_MATRIX) {
3579     /* (4) Create parallel newmat */
3580     PetscMPIInt    rank,size;
3581     PetscInt       csize;
3582 
3583     PetscCallMPI(MPI_Comm_size(comm,&size));
3584     PetscCallMPI(MPI_Comm_rank(comm,&rank));
3585 
3586     /*
3587         Determine the number of non-zeros in the diagonal and off-diagonal
3588         portions of the matrix in order to do correct preallocation
3589     */
3590 
3591     /* first get start and end of "diagonal" columns */
3592     PetscCall(ISGetLocalSize(iscol,&csize));
3593     if (csize == PETSC_DECIDE) {
3594       PetscCall(ISGetSize(isrow,&mglobal));
3595       if (mglobal == Ncols) { /* square matrix */
3596         nlocal = m;
3597       } else {
3598         nlocal = Ncols/size + ((Ncols % size) > rank);
3599       }
3600     } else {
3601       nlocal = csize;
3602     }
3603     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3604     rstart = rend - nlocal;
3605     PetscCheck(rank != size - 1 || rend == Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols);
3606 
3607     /* next, compute all the lengths */
3608     jj    = aij->j;
3609     PetscCall(PetscMalloc1(2*m+1,&dlens));
3610     olens = dlens + m;
3611     for (i=0; i<m; i++) {
3612       jend = ii[i+1] - ii[i];
3613       olen = 0;
3614       dlen = 0;
3615       for (j=0; j<jend; j++) {
3616         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3617         else dlen++;
3618         jj++;
3619       }
3620       olens[i] = olen;
3621       dlens[i] = dlen;
3622     }
3623 
3624     PetscCall(ISGetBlockSize(isrow,&bs));
3625     PetscCall(ISGetBlockSize(iscol,&cbs));
3626 
3627     PetscCall(MatCreate(comm,&M));
3628     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols));
3629     PetscCall(MatSetBlockSizes(M,bs,cbs));
3630     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3631     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3632     PetscCall(PetscFree(dlens));
3633 
3634   } else { /* call == MAT_REUSE_MATRIX */
3635     M    = *newmat;
3636     PetscCall(MatGetLocalSize(M,&i,NULL));
3637     PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3638     PetscCall(MatZeroEntries(M));
3639     /*
3640          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3641        rather than the slower MatSetValues().
3642     */
3643     M->was_assembled = PETSC_TRUE;
3644     M->assembled     = PETSC_FALSE;
3645   }
3646 
3647   /* (5) Set values of Msub to *newmat */
3648   PetscCall(PetscMalloc1(count,&colsub));
3649   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
3650 
3651   jj   = aij->j;
3652   PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa));
3653   for (i=0; i<m; i++) {
3654     row = rstart + i;
3655     nz  = ii[i+1] - ii[i];
3656     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3657     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES));
3658     jj += nz; aa += nz;
3659   }
3660   PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa));
3661   PetscCall(ISRestoreIndices(iscmap,&cmap));
3662 
3663   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3664   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3665 
3666   PetscCall(PetscFree(colsub));
3667 
3668   /* save Msub, iscol_sub and iscmap used in processor for next request */
3669   if (call == MAT_INITIAL_MATRIX) {
3670     *newmat = M;
3671     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub));
3672     PetscCall(MatDestroy(&Msub));
3673 
3674     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub));
3675     PetscCall(ISDestroy(&iscol_sub));
3676 
3677     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap));
3678     PetscCall(ISDestroy(&iscmap));
3679 
3680     if (iscol_local) {
3681       PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local));
3682       PetscCall(ISDestroy(&iscol_local));
3683     }
3684   }
3685   PetscFunctionReturn(0);
3686 }
3687 
3688 /*
3689     Not great since it makes two copies of the submatrix, first an SeqAIJ
3690   in local and then by concatenating the local matrices the end result.
3691   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3692 
3693   Note: This requires a sequential iscol with all indices.
3694 */
3695 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3696 {
3697   PetscMPIInt    rank,size;
3698   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3699   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3700   Mat            M,Mreuse;
3701   MatScalar      *aa,*vwork;
3702   MPI_Comm       comm;
3703   Mat_SeqAIJ     *aij;
3704   PetscBool      colflag,allcolumns=PETSC_FALSE;
3705 
3706   PetscFunctionBegin;
3707   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3708   PetscCallMPI(MPI_Comm_rank(comm,&rank));
3709   PetscCallMPI(MPI_Comm_size(comm,&size));
3710 
3711   /* Check for special case: each processor gets entire matrix columns */
3712   PetscCall(ISIdentity(iscol,&colflag));
3713   PetscCall(ISGetLocalSize(iscol,&n));
3714   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3715   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3716 
3717   if (call ==  MAT_REUSE_MATRIX) {
3718     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse));
3719     PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3720     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse));
3721   } else {
3722     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse));
3723   }
3724 
3725   /*
3726       m - number of local rows
3727       n - number of columns (same on all processors)
3728       rstart - first row in new global matrix generated
3729   */
3730   PetscCall(MatGetSize(Mreuse,&m,&n));
3731   PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs));
3732   if (call == MAT_INITIAL_MATRIX) {
3733     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3734     ii  = aij->i;
3735     jj  = aij->j;
3736 
3737     /*
3738         Determine the number of non-zeros in the diagonal and off-diagonal
3739         portions of the matrix in order to do correct preallocation
3740     */
3741 
3742     /* first get start and end of "diagonal" columns */
3743     if (csize == PETSC_DECIDE) {
3744       PetscCall(ISGetSize(isrow,&mglobal));
3745       if (mglobal == n) { /* square matrix */
3746         nlocal = m;
3747       } else {
3748         nlocal = n/size + ((n % size) > rank);
3749       }
3750     } else {
3751       nlocal = csize;
3752     }
3753     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3754     rstart = rend - nlocal;
3755     PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
3756 
3757     /* next, compute all the lengths */
3758     PetscCall(PetscMalloc1(2*m+1,&dlens));
3759     olens = dlens + m;
3760     for (i=0; i<m; i++) {
3761       jend = ii[i+1] - ii[i];
3762       olen = 0;
3763       dlen = 0;
3764       for (j=0; j<jend; j++) {
3765         if (*jj < rstart || *jj >= rend) olen++;
3766         else dlen++;
3767         jj++;
3768       }
3769       olens[i] = olen;
3770       dlens[i] = dlen;
3771     }
3772     PetscCall(MatCreate(comm,&M));
3773     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n));
3774     PetscCall(MatSetBlockSizes(M,bs,cbs));
3775     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3776     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3777     PetscCall(PetscFree(dlens));
3778   } else {
3779     PetscInt ml,nl;
3780 
3781     M    = *newmat;
3782     PetscCall(MatGetLocalSize(M,&ml,&nl));
3783     PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3784     PetscCall(MatZeroEntries(M));
3785     /*
3786          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3787        rather than the slower MatSetValues().
3788     */
3789     M->was_assembled = PETSC_TRUE;
3790     M->assembled     = PETSC_FALSE;
3791   }
3792   PetscCall(MatGetOwnershipRange(M,&rstart,&rend));
3793   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3794   ii   = aij->i;
3795   jj   = aij->j;
3796 
3797   /* trigger copy to CPU if needed */
3798   PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa));
3799   for (i=0; i<m; i++) {
3800     row   = rstart + i;
3801     nz    = ii[i+1] - ii[i];
3802     cwork = jj; jj += nz;
3803     vwork = aa; aa += nz;
3804     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES));
3805   }
3806   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa));
3807 
3808   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3809   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3810   *newmat = M;
3811 
3812   /* save submatrix used in processor for next request */
3813   if (call ==  MAT_INITIAL_MATRIX) {
3814     PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse));
3815     PetscCall(MatDestroy(&Mreuse));
3816   }
3817   PetscFunctionReturn(0);
3818 }
3819 
3820 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3821 {
3822   PetscInt       m,cstart, cend,j,nnz,i,d;
3823   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3824   const PetscInt *JJ;
3825   PetscBool      nooffprocentries;
3826 
3827   PetscFunctionBegin;
3828   PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]);
3829 
3830   PetscCall(PetscLayoutSetUp(B->rmap));
3831   PetscCall(PetscLayoutSetUp(B->cmap));
3832   m      = B->rmap->n;
3833   cstart = B->cmap->rstart;
3834   cend   = B->cmap->rend;
3835   rstart = B->rmap->rstart;
3836 
3837   PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz));
3838 
3839   if (PetscDefined(USE_DEBUG)) {
3840     for (i=0; i<m; i++) {
3841       nnz = Ii[i+1]- Ii[i];
3842       JJ  = J + Ii[i];
3843       PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz);
3844       PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]);
3845       PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N);
3846     }
3847   }
3848 
3849   for (i=0; i<m; i++) {
3850     nnz     = Ii[i+1]- Ii[i];
3851     JJ      = J + Ii[i];
3852     nnz_max = PetscMax(nnz_max,nnz);
3853     d       = 0;
3854     for (j=0; j<nnz; j++) {
3855       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3856     }
3857     d_nnz[i] = d;
3858     o_nnz[i] = nnz - d;
3859   }
3860   PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
3861   PetscCall(PetscFree2(d_nnz,o_nnz));
3862 
3863   for (i=0; i<m; i++) {
3864     ii   = i + rstart;
3865     PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES));
3866   }
3867   nooffprocentries    = B->nooffprocentries;
3868   B->nooffprocentries = PETSC_TRUE;
3869   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
3870   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3871   B->nooffprocentries = nooffprocentries;
3872 
3873   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3874   PetscFunctionReturn(0);
3875 }
3876 
3877 /*@
3878    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3879    (the default parallel PETSc format).
3880 
3881    Collective
3882 
3883    Input Parameters:
3884 +  B - the matrix
3885 .  i - the indices into j for the start of each local row (starts with zero)
3886 .  j - the column indices for each local row (starts with zero)
3887 -  v - optional values in the matrix
3888 
3889    Level: developer
3890 
3891    Notes:
3892        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3893      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3894      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3895 
3896        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3897 
3898        The format which is used for the sparse matrix input, is equivalent to a
3899     row-major ordering.. i.e for the following matrix, the input data expected is
3900     as shown
3901 
3902 $        1 0 0
3903 $        2 0 3     P0
3904 $       -------
3905 $        4 5 6     P1
3906 $
3907 $     Process0 [P0]: rows_owned=[0,1]
3908 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3909 $        j =  {0,0,2}  [size = 3]
3910 $        v =  {1,2,3}  [size = 3]
3911 $
3912 $     Process1 [P1]: rows_owned=[2]
3913 $        i =  {0,3}    [size = nrow+1  = 1+1]
3914 $        j =  {0,1,2}  [size = 3]
3915 $        v =  {4,5,6}  [size = 3]
3916 
3917 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
3918           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
3919 @*/
3920 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3921 {
3922   PetscFunctionBegin;
3923   PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3924   PetscFunctionReturn(0);
3925 }
3926 
3927 /*@C
3928    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3929    (the default parallel PETSc format).  For good matrix assembly performance
3930    the user should preallocate the matrix storage by setting the parameters
3931    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3932    performance can be increased by more than a factor of 50.
3933 
3934    Collective
3935 
3936    Input Parameters:
3937 +  B - the matrix
3938 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3939            (same value is used for all local rows)
3940 .  d_nnz - array containing the number of nonzeros in the various rows of the
3941            DIAGONAL portion of the local submatrix (possibly different for each row)
3942            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3943            The size of this array is equal to the number of local rows, i.e 'm'.
3944            For matrices that will be factored, you must leave room for (and set)
3945            the diagonal entry even if it is zero.
3946 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3947            submatrix (same value is used for all local rows).
3948 -  o_nnz - array containing the number of nonzeros in the various rows of the
3949            OFF-DIAGONAL portion of the local submatrix (possibly different for
3950            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3951            structure. The size of this array is equal to the number
3952            of local rows, i.e 'm'.
3953 
3954    If the *_nnz parameter is given then the *_nz parameter is ignored
3955 
3956    The AIJ format (also called the Yale sparse matrix format or
3957    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3958    storage.  The stored row and column indices begin with zero.
3959    See Users-Manual: ch_mat for details.
3960 
3961    The parallel matrix is partitioned such that the first m0 rows belong to
3962    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3963    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3964 
3965    The DIAGONAL portion of the local submatrix of a processor can be defined
3966    as the submatrix which is obtained by extraction the part corresponding to
3967    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3968    first row that belongs to the processor, r2 is the last row belonging to
3969    the this processor, and c1-c2 is range of indices of the local part of a
3970    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3971    common case of a square matrix, the row and column ranges are the same and
3972    the DIAGONAL part is also square. The remaining portion of the local
3973    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3974 
3975    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3976 
3977    You can call MatGetInfo() to get information on how effective the preallocation was;
3978    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3979    You can also run with the option -info and look for messages with the string
3980    malloc in them to see if additional memory allocation was needed.
3981 
3982    Example usage:
3983 
3984    Consider the following 8x8 matrix with 34 non-zero values, that is
3985    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3986    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3987    as follows:
3988 
3989 .vb
3990             1  2  0  |  0  3  0  |  0  4
3991     Proc0   0  5  6  |  7  0  0  |  8  0
3992             9  0 10  | 11  0  0  | 12  0
3993     -------------------------------------
3994            13  0 14  | 15 16 17  |  0  0
3995     Proc1   0 18  0  | 19 20 21  |  0  0
3996             0  0  0  | 22 23  0  | 24  0
3997     -------------------------------------
3998     Proc2  25 26 27  |  0  0 28  | 29  0
3999            30  0  0  | 31 32 33  |  0 34
4000 .ve
4001 
4002    This can be represented as a collection of submatrices as:
4003 
4004 .vb
4005       A B C
4006       D E F
4007       G H I
4008 .ve
4009 
4010    Where the submatrices A,B,C are owned by proc0, D,E,F are
4011    owned by proc1, G,H,I are owned by proc2.
4012 
4013    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4014    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4015    The 'M','N' parameters are 8,8, and have the same values on all procs.
4016 
4017    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4018    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4019    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4020    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4021    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4022    matrix, ans [DF] as another SeqAIJ matrix.
4023 
4024    When d_nz, o_nz parameters are specified, d_nz storage elements are
4025    allocated for every row of the local diagonal submatrix, and o_nz
4026    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4027    One way to choose d_nz and o_nz is to use the max nonzerors per local
4028    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4029    In this case, the values of d_nz,o_nz are:
4030 .vb
4031      proc0 : dnz = 2, o_nz = 2
4032      proc1 : dnz = 3, o_nz = 2
4033      proc2 : dnz = 1, o_nz = 4
4034 .ve
4035    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4036    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4037    for proc3. i.e we are using 12+15+10=37 storage locations to store
4038    34 values.
4039 
4040    When d_nnz, o_nnz parameters are specified, the storage is specified
4041    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4042    In the above case the values for d_nnz,o_nnz are:
4043 .vb
4044      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4045      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4046      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4047 .ve
4048    Here the space allocated is sum of all the above values i.e 34, and
4049    hence pre-allocation is perfect.
4050 
4051    Level: intermediate
4052 
4053 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4054           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4055 @*/
4056 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4057 {
4058   PetscFunctionBegin;
4059   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4060   PetscValidType(B,1);
4061   PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
4062   PetscFunctionReturn(0);
4063 }
4064 
4065 /*@
4066      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4067          CSR format for the local rows.
4068 
4069    Collective
4070 
4071    Input Parameters:
4072 +  comm - MPI communicator
4073 .  m - number of local rows (Cannot be PETSC_DECIDE)
4074 .  n - This value should be the same as the local size used in creating the
4075        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4076        calculated if N is given) For square matrices n is almost always m.
4077 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4078 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4079 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4080 .   j - column indices
4081 -   a - matrix values
4082 
4083    Output Parameter:
4084 .   mat - the matrix
4085 
4086    Level: intermediate
4087 
4088    Notes:
4089        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4090      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4091      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4092 
4093        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4094 
4095        The format which is used for the sparse matrix input, is equivalent to a
4096     row-major ordering.. i.e for the following matrix, the input data expected is
4097     as shown
4098 
4099        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4100 
4101 $        1 0 0
4102 $        2 0 3     P0
4103 $       -------
4104 $        4 5 6     P1
4105 $
4106 $     Process0 [P0]: rows_owned=[0,1]
4107 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4108 $        j =  {0,0,2}  [size = 3]
4109 $        v =  {1,2,3}  [size = 3]
4110 $
4111 $     Process1 [P1]: rows_owned=[2]
4112 $        i =  {0,3}    [size = nrow+1  = 1+1]
4113 $        j =  {0,1,2}  [size = 3]
4114 $        v =  {4,5,6}  [size = 3]
4115 
4116 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4117           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4118 @*/
4119 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4120 {
4121   PetscFunctionBegin;
4122   PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4123   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4124   PetscCall(MatCreate(comm,mat));
4125   PetscCall(MatSetSizes(*mat,m,n,M,N));
4126   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4127   PetscCall(MatSetType(*mat,MATMPIAIJ));
4128   PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a));
4129   PetscFunctionReturn(0);
4130 }
4131 
4132 /*@
4133      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4134          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4135 
4136    Collective
4137 
4138    Input Parameters:
4139 +  mat - the matrix
4140 .  m - number of local rows (Cannot be PETSC_DECIDE)
4141 .  n - This value should be the same as the local size used in creating the
4142        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4143        calculated if N is given) For square matrices n is almost always m.
4144 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4145 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4146 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4147 .  J - column indices
4148 -  v - matrix values
4149 
4150    Level: intermediate
4151 
4152 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4153           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4154 @*/
4155 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4156 {
4157   PetscInt       cstart,nnz,i,j;
4158   PetscInt       *ld;
4159   PetscBool      nooffprocentries;
4160   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4161   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4162   PetscScalar    *ad,*ao;
4163   const PetscInt *Adi = Ad->i;
4164   PetscInt       ldi,Iii,md;
4165 
4166   PetscFunctionBegin;
4167   PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4168   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4169   PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4170   PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4171 
4172   PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad));
4173   PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao));
4174   cstart = mat->cmap->rstart;
4175   if (!Aij->ld) {
4176     /* count number of entries below block diagonal */
4177     PetscCall(PetscCalloc1(m,&ld));
4178     Aij->ld = ld;
4179     for (i=0; i<m; i++) {
4180       nnz  = Ii[i+1]- Ii[i];
4181       j     = 0;
4182       while  (J[j] < cstart && j < nnz) {j++;}
4183       J    += nnz;
4184       ld[i] = j;
4185     }
4186   } else {
4187     ld = Aij->ld;
4188   }
4189 
4190   for (i=0; i<m; i++) {
4191     nnz  = Ii[i+1]- Ii[i];
4192     Iii  = Ii[i];
4193     ldi  = ld[i];
4194     md   = Adi[i+1]-Adi[i];
4195     PetscCall(PetscArraycpy(ao,v + Iii,ldi));
4196     PetscCall(PetscArraycpy(ad,v + Iii + ldi,md));
4197     PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md));
4198     ad  += md;
4199     ao  += nnz - md;
4200   }
4201   nooffprocentries      = mat->nooffprocentries;
4202   mat->nooffprocentries = PETSC_TRUE;
4203   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad));
4204   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao));
4205   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4206   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4207   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4208   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
4209   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
4210   mat->nooffprocentries = nooffprocentries;
4211   PetscFunctionReturn(0);
4212 }
4213 
4214 /*@C
4215    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4216    (the default parallel PETSc format).  For good matrix assembly performance
4217    the user should preallocate the matrix storage by setting the parameters
4218    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4219    performance can be increased by more than a factor of 50.
4220 
4221    Collective
4222 
4223    Input Parameters:
4224 +  comm - MPI communicator
4225 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4226            This value should be the same as the local size used in creating the
4227            y vector for the matrix-vector product y = Ax.
4228 .  n - This value should be the same as the local size used in creating the
4229        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4230        calculated if N is given) For square matrices n is almost always m.
4231 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4232 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4233 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4234            (same value is used for all local rows)
4235 .  d_nnz - array containing the number of nonzeros in the various rows of the
4236            DIAGONAL portion of the local submatrix (possibly different for each row)
4237            or NULL, if d_nz is used to specify the nonzero structure.
4238            The size of this array is equal to the number of local rows, i.e 'm'.
4239 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4240            submatrix (same value is used for all local rows).
4241 -  o_nnz - array containing the number of nonzeros in the various rows of the
4242            OFF-DIAGONAL portion of the local submatrix (possibly different for
4243            each row) or NULL, if o_nz is used to specify the nonzero
4244            structure. The size of this array is equal to the number
4245            of local rows, i.e 'm'.
4246 
4247    Output Parameter:
4248 .  A - the matrix
4249 
4250    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4251    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4252    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4253 
4254    Notes:
4255    If the *_nnz parameter is given then the *_nz parameter is ignored
4256 
4257    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4258    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4259    storage requirements for this matrix.
4260 
4261    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4262    processor than it must be used on all processors that share the object for
4263    that argument.
4264 
4265    The user MUST specify either the local or global matrix dimensions
4266    (possibly both).
4267 
4268    The parallel matrix is partitioned across processors such that the
4269    first m0 rows belong to process 0, the next m1 rows belong to
4270    process 1, the next m2 rows belong to process 2 etc.. where
4271    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4272    values corresponding to [m x N] submatrix.
4273 
4274    The columns are logically partitioned with the n0 columns belonging
4275    to 0th partition, the next n1 columns belonging to the next
4276    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4277 
4278    The DIAGONAL portion of the local submatrix on any given processor
4279    is the submatrix corresponding to the rows and columns m,n
4280    corresponding to the given processor. i.e diagonal matrix on
4281    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4282    etc. The remaining portion of the local submatrix [m x (N-n)]
4283    constitute the OFF-DIAGONAL portion. The example below better
4284    illustrates this concept.
4285 
4286    For a square global matrix we define each processor's diagonal portion
4287    to be its local rows and the corresponding columns (a square submatrix);
4288    each processor's off-diagonal portion encompasses the remainder of the
4289    local matrix (a rectangular submatrix).
4290 
4291    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4292 
4293    When calling this routine with a single process communicator, a matrix of
4294    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4295    type of communicator, use the construction mechanism
4296 .vb
4297      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4298 .ve
4299 
4300 $     MatCreate(...,&A);
4301 $     MatSetType(A,MATMPIAIJ);
4302 $     MatSetSizes(A, m,n,M,N);
4303 $     MatMPIAIJSetPreallocation(A,...);
4304 
4305    By default, this format uses inodes (identical nodes) when possible.
4306    We search for consecutive rows with the same nonzero structure, thereby
4307    reusing matrix information to achieve increased efficiency.
4308 
4309    Options Database Keys:
4310 +  -mat_no_inode  - Do not use inodes
4311 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4312 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
4313         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4314         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.
4315 
4316    Example usage:
4317 
4318    Consider the following 8x8 matrix with 34 non-zero values, that is
4319    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4320    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4321    as follows
4322 
4323 .vb
4324             1  2  0  |  0  3  0  |  0  4
4325     Proc0   0  5  6  |  7  0  0  |  8  0
4326             9  0 10  | 11  0  0  | 12  0
4327     -------------------------------------
4328            13  0 14  | 15 16 17  |  0  0
4329     Proc1   0 18  0  | 19 20 21  |  0  0
4330             0  0  0  | 22 23  0  | 24  0
4331     -------------------------------------
4332     Proc2  25 26 27  |  0  0 28  | 29  0
4333            30  0  0  | 31 32 33  |  0 34
4334 .ve
4335 
4336    This can be represented as a collection of submatrices as
4337 
4338 .vb
4339       A B C
4340       D E F
4341       G H I
4342 .ve
4343 
4344    Where the submatrices A,B,C are owned by proc0, D,E,F are
4345    owned by proc1, G,H,I are owned by proc2.
4346 
4347    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4348    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4349    The 'M','N' parameters are 8,8, and have the same values on all procs.
4350 
4351    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4352    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4353    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4354    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4355    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4356    matrix, ans [DF] as another SeqAIJ matrix.
4357 
4358    When d_nz, o_nz parameters are specified, d_nz storage elements are
4359    allocated for every row of the local diagonal submatrix, and o_nz
4360    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4361    One way to choose d_nz and o_nz is to use the max nonzerors per local
4362    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4363    In this case, the values of d_nz,o_nz are
4364 .vb
4365      proc0 : dnz = 2, o_nz = 2
4366      proc1 : dnz = 3, o_nz = 2
4367      proc2 : dnz = 1, o_nz = 4
4368 .ve
4369    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4370    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4371    for proc3. i.e we are using 12+15+10=37 storage locations to store
4372    34 values.
4373 
4374    When d_nnz, o_nnz parameters are specified, the storage is specified
4375    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4376    In the above case the values for d_nnz,o_nnz are
4377 .vb
4378      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4379      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4380      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4381 .ve
4382    Here the space allocated is sum of all the above values i.e 34, and
4383    hence pre-allocation is perfect.
4384 
4385    Level: intermediate
4386 
4387 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4388           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4389 @*/
4390 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4391 {
4392   PetscMPIInt    size;
4393 
4394   PetscFunctionBegin;
4395   PetscCall(MatCreate(comm,A));
4396   PetscCall(MatSetSizes(*A,m,n,M,N));
4397   PetscCallMPI(MPI_Comm_size(comm,&size));
4398   if (size > 1) {
4399     PetscCall(MatSetType(*A,MATMPIAIJ));
4400     PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz));
4401   } else {
4402     PetscCall(MatSetType(*A,MATSEQAIJ));
4403     PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz));
4404   }
4405   PetscFunctionReturn(0);
4406 }
4407 
4408 /*@C
4409   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4410 
4411   Not collective
4412 
4413   Input Parameter:
4414 . A - The MPIAIJ matrix
4415 
4416   Output Parameters:
4417 + Ad - The local diagonal block as a SeqAIJ matrix
4418 . Ao - The local off-diagonal block as a SeqAIJ matrix
4419 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4420 
4421   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4422   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4423   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4424   local column numbers to global column numbers in the original matrix.
4425 
4426   Level: intermediate
4427 
4428 .seealso: `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4429 @*/
4430 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4431 {
4432   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4433   PetscBool      flg;
4434 
4435   PetscFunctionBegin;
4436   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg));
4437   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4438   if (Ad)     *Ad     = a->A;
4439   if (Ao)     *Ao     = a->B;
4440   if (colmap) *colmap = a->garray;
4441   PetscFunctionReturn(0);
4442 }
4443 
4444 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4445 {
4446   PetscInt       m,N,i,rstart,nnz,Ii;
4447   PetscInt       *indx;
4448   PetscScalar    *values;
4449   MatType        rootType;
4450 
4451   PetscFunctionBegin;
4452   PetscCall(MatGetSize(inmat,&m,&N));
4453   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4454     PetscInt       *dnz,*onz,sum,bs,cbs;
4455 
4456     if (n == PETSC_DECIDE) {
4457       PetscCall(PetscSplitOwnership(comm,&n,&N));
4458     }
4459     /* Check sum(n) = N */
4460     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
4461     PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
4462 
4463     PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm));
4464     rstart -= m;
4465 
4466     MatPreallocateBegin(comm,m,n,dnz,onz);
4467     for (i=0; i<m; i++) {
4468       PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4469       PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz));
4470       PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4471     }
4472 
4473     PetscCall(MatCreate(comm,outmat));
4474     PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4475     PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
4476     PetscCall(MatSetBlockSizes(*outmat,bs,cbs));
4477     PetscCall(MatGetRootType_Private(inmat,&rootType));
4478     PetscCall(MatSetType(*outmat,rootType));
4479     PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz));
4480     PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz));
4481     MatPreallocateEnd(dnz,onz);
4482     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
4483   }
4484 
4485   /* numeric phase */
4486   PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL));
4487   for (i=0; i<m; i++) {
4488     PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4489     Ii   = i + rstart;
4490     PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
4491     PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4492   }
4493   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
4494   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
4495   PetscFunctionReturn(0);
4496 }
4497 
4498 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4499 {
4500   PetscMPIInt       rank;
4501   PetscInt          m,N,i,rstart,nnz;
4502   size_t            len;
4503   const PetscInt    *indx;
4504   PetscViewer       out;
4505   char              *name;
4506   Mat               B;
4507   const PetscScalar *values;
4508 
4509   PetscFunctionBegin;
4510   PetscCall(MatGetLocalSize(A,&m,NULL));
4511   PetscCall(MatGetSize(A,NULL,&N));
4512   /* Should this be the type of the diagonal block of A? */
4513   PetscCall(MatCreate(PETSC_COMM_SELF,&B));
4514   PetscCall(MatSetSizes(B,m,N,m,N));
4515   PetscCall(MatSetBlockSizesFromMats(B,A,A));
4516   PetscCall(MatSetType(B,MATSEQAIJ));
4517   PetscCall(MatSeqAIJSetPreallocation(B,0,NULL));
4518   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
4519   for (i=0; i<m; i++) {
4520     PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values));
4521     PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES));
4522     PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values));
4523   }
4524   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
4525   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
4526 
4527   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
4528   PetscCall(PetscStrlen(outfile,&len));
4529   PetscCall(PetscMalloc1(len+6,&name));
4530   PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank));
4531   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out));
4532   PetscCall(PetscFree(name));
4533   PetscCall(MatView(B,out));
4534   PetscCall(PetscViewerDestroy(&out));
4535   PetscCall(MatDestroy(&B));
4536   PetscFunctionReturn(0);
4537 }
4538 
4539 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4540 {
4541   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4542 
4543   PetscFunctionBegin;
4544   if (!merge) PetscFunctionReturn(0);
4545   PetscCall(PetscFree(merge->id_r));
4546   PetscCall(PetscFree(merge->len_s));
4547   PetscCall(PetscFree(merge->len_r));
4548   PetscCall(PetscFree(merge->bi));
4549   PetscCall(PetscFree(merge->bj));
4550   PetscCall(PetscFree(merge->buf_ri[0]));
4551   PetscCall(PetscFree(merge->buf_ri));
4552   PetscCall(PetscFree(merge->buf_rj[0]));
4553   PetscCall(PetscFree(merge->buf_rj));
4554   PetscCall(PetscFree(merge->coi));
4555   PetscCall(PetscFree(merge->coj));
4556   PetscCall(PetscFree(merge->owners_co));
4557   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4558   PetscCall(PetscFree(merge));
4559   PetscFunctionReturn(0);
4560 }
4561 
4562 #include <../src/mat/utils/freespace.h>
4563 #include <petscbt.h>
4564 
4565 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4566 {
4567   MPI_Comm            comm;
4568   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4569   PetscMPIInt         size,rank,taga,*len_s;
4570   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4571   PetscInt            proc,m;
4572   PetscInt            **buf_ri,**buf_rj;
4573   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4574   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4575   MPI_Request         *s_waits,*r_waits;
4576   MPI_Status          *status;
4577   const MatScalar     *aa,*a_a;
4578   MatScalar           **abuf_r,*ba_i;
4579   Mat_Merge_SeqsToMPI *merge;
4580   PetscContainer      container;
4581 
4582   PetscFunctionBegin;
4583   PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm));
4584   PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0));
4585 
4586   PetscCallMPI(MPI_Comm_size(comm,&size));
4587   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4588 
4589   PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container));
4590   PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4591   PetscCall(PetscContainerGetPointer(container,(void**)&merge));
4592   PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a));
4593   aa   = a_a;
4594 
4595   bi     = merge->bi;
4596   bj     = merge->bj;
4597   buf_ri = merge->buf_ri;
4598   buf_rj = merge->buf_rj;
4599 
4600   PetscCall(PetscMalloc1(size,&status));
4601   owners = merge->rowmap->range;
4602   len_s  = merge->len_s;
4603 
4604   /* send and recv matrix values */
4605   /*-----------------------------*/
4606   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga));
4607   PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits));
4608 
4609   PetscCall(PetscMalloc1(merge->nsend+1,&s_waits));
4610   for (proc=0,k=0; proc<size; proc++) {
4611     if (!len_s[proc]) continue;
4612     i    = owners[proc];
4613     PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k));
4614     k++;
4615   }
4616 
4617   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status));
4618   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status));
4619   PetscCall(PetscFree(status));
4620 
4621   PetscCall(PetscFree(s_waits));
4622   PetscCall(PetscFree(r_waits));
4623 
4624   /* insert mat values of mpimat */
4625   /*----------------------------*/
4626   PetscCall(PetscMalloc1(N,&ba_i));
4627   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4628 
4629   for (k=0; k<merge->nrecv; k++) {
4630     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4631     nrows       = *(buf_ri_k[k]);
4632     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4633     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4634   }
4635 
4636   /* set values of ba */
4637   m    = merge->rowmap->n;
4638   for (i=0; i<m; i++) {
4639     arow = owners[rank] + i;
4640     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4641     bnzi = bi[i+1] - bi[i];
4642     PetscCall(PetscArrayzero(ba_i,bnzi));
4643 
4644     /* add local non-zero vals of this proc's seqmat into ba */
4645     anzi   = ai[arow+1] - ai[arow];
4646     aj     = a->j + ai[arow];
4647     aa     = a_a + ai[arow];
4648     nextaj = 0;
4649     for (j=0; nextaj<anzi; j++) {
4650       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4651         ba_i[j] += aa[nextaj++];
4652       }
4653     }
4654 
4655     /* add received vals into ba */
4656     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4657       /* i-th row */
4658       if (i == *nextrow[k]) {
4659         anzi   = *(nextai[k]+1) - *nextai[k];
4660         aj     = buf_rj[k] + *(nextai[k]);
4661         aa     = abuf_r[k] + *(nextai[k]);
4662         nextaj = 0;
4663         for (j=0; nextaj<anzi; j++) {
4664           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4665             ba_i[j] += aa[nextaj++];
4666           }
4667         }
4668         nextrow[k]++; nextai[k]++;
4669       }
4670     }
4671     PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES));
4672   }
4673   PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a));
4674   PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY));
4675   PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY));
4676 
4677   PetscCall(PetscFree(abuf_r[0]));
4678   PetscCall(PetscFree(abuf_r));
4679   PetscCall(PetscFree(ba_i));
4680   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4681   PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0));
4682   PetscFunctionReturn(0);
4683 }
4684 
4685 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4686 {
4687   Mat                 B_mpi;
4688   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4689   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4690   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4691   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4692   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4693   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4694   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4695   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4696   MPI_Status          *status;
4697   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4698   PetscBT             lnkbt;
4699   Mat_Merge_SeqsToMPI *merge;
4700   PetscContainer      container;
4701 
4702   PetscFunctionBegin;
4703   PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0));
4704 
4705   /* make sure it is a PETSc comm */
4706   PetscCall(PetscCommDuplicate(comm,&comm,NULL));
4707   PetscCallMPI(MPI_Comm_size(comm,&size));
4708   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4709 
4710   PetscCall(PetscNew(&merge));
4711   PetscCall(PetscMalloc1(size,&status));
4712 
4713   /* determine row ownership */
4714   /*---------------------------------------------------------*/
4715   PetscCall(PetscLayoutCreate(comm,&merge->rowmap));
4716   PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m));
4717   PetscCall(PetscLayoutSetSize(merge->rowmap,M));
4718   PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1));
4719   PetscCall(PetscLayoutSetUp(merge->rowmap));
4720   PetscCall(PetscMalloc1(size,&len_si));
4721   PetscCall(PetscMalloc1(size,&merge->len_s));
4722 
4723   m      = merge->rowmap->n;
4724   owners = merge->rowmap->range;
4725 
4726   /* determine the number of messages to send, their lengths */
4727   /*---------------------------------------------------------*/
4728   len_s = merge->len_s;
4729 
4730   len          = 0; /* length of buf_si[] */
4731   merge->nsend = 0;
4732   for (proc=0; proc<size; proc++) {
4733     len_si[proc] = 0;
4734     if (proc == rank) {
4735       len_s[proc] = 0;
4736     } else {
4737       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4738       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4739     }
4740     if (len_s[proc]) {
4741       merge->nsend++;
4742       nrows = 0;
4743       for (i=owners[proc]; i<owners[proc+1]; i++) {
4744         if (ai[i+1] > ai[i]) nrows++;
4745       }
4746       len_si[proc] = 2*(nrows+1);
4747       len         += len_si[proc];
4748     }
4749   }
4750 
4751   /* determine the number and length of messages to receive for ij-structure */
4752   /*-------------------------------------------------------------------------*/
4753   PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv));
4754   PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri));
4755 
4756   /* post the Irecv of j-structure */
4757   /*-------------------------------*/
4758   PetscCall(PetscCommGetNewTag(comm,&tagj));
4759   PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits));
4760 
4761   /* post the Isend of j-structure */
4762   /*--------------------------------*/
4763   PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits));
4764 
4765   for (proc=0, k=0; proc<size; proc++) {
4766     if (!len_s[proc]) continue;
4767     i    = owners[proc];
4768     PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k));
4769     k++;
4770   }
4771 
4772   /* receives and sends of j-structure are complete */
4773   /*------------------------------------------------*/
4774   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status));
4775   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status));
4776 
4777   /* send and recv i-structure */
4778   /*---------------------------*/
4779   PetscCall(PetscCommGetNewTag(comm,&tagi));
4780   PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits));
4781 
4782   PetscCall(PetscMalloc1(len+1,&buf_s));
4783   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4784   for (proc=0,k=0; proc<size; proc++) {
4785     if (!len_s[proc]) continue;
4786     /* form outgoing message for i-structure:
4787          buf_si[0]:                 nrows to be sent
4788                [1:nrows]:           row index (global)
4789                [nrows+1:2*nrows+1]: i-structure index
4790     */
4791     /*-------------------------------------------*/
4792     nrows       = len_si[proc]/2 - 1;
4793     buf_si_i    = buf_si + nrows+1;
4794     buf_si[0]   = nrows;
4795     buf_si_i[0] = 0;
4796     nrows       = 0;
4797     for (i=owners[proc]; i<owners[proc+1]; i++) {
4798       anzi = ai[i+1] - ai[i];
4799       if (anzi) {
4800         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4801         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4802         nrows++;
4803       }
4804     }
4805     PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k));
4806     k++;
4807     buf_si += len_si[proc];
4808   }
4809 
4810   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status));
4811   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status));
4812 
4813   PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv));
4814   for (i=0; i<merge->nrecv; i++) {
4815     PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));
4816   }
4817 
4818   PetscCall(PetscFree(len_si));
4819   PetscCall(PetscFree(len_ri));
4820   PetscCall(PetscFree(rj_waits));
4821   PetscCall(PetscFree2(si_waits,sj_waits));
4822   PetscCall(PetscFree(ri_waits));
4823   PetscCall(PetscFree(buf_s));
4824   PetscCall(PetscFree(status));
4825 
4826   /* compute a local seq matrix in each processor */
4827   /*----------------------------------------------*/
4828   /* allocate bi array and free space for accumulating nonzero column info */
4829   PetscCall(PetscMalloc1(m+1,&bi));
4830   bi[0] = 0;
4831 
4832   /* create and initialize a linked list */
4833   nlnk = N+1;
4834   PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt));
4835 
4836   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4837   len  = ai[owners[rank+1]] - ai[owners[rank]];
4838   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space));
4839 
4840   current_space = free_space;
4841 
4842   /* determine symbolic info for each local row */
4843   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4844 
4845   for (k=0; k<merge->nrecv; k++) {
4846     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4847     nrows       = *buf_ri_k[k];
4848     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4849     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4850   }
4851 
4852   MatPreallocateBegin(comm,m,n,dnz,onz);
4853   len  = 0;
4854   for (i=0; i<m; i++) {
4855     bnzi = 0;
4856     /* add local non-zero cols of this proc's seqmat into lnk */
4857     arow  = owners[rank] + i;
4858     anzi  = ai[arow+1] - ai[arow];
4859     aj    = a->j + ai[arow];
4860     PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4861     bnzi += nlnk;
4862     /* add received col data into lnk */
4863     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4864       if (i == *nextrow[k]) { /* i-th row */
4865         anzi  = *(nextai[k]+1) - *nextai[k];
4866         aj    = buf_rj[k] + *nextai[k];
4867         PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4868         bnzi += nlnk;
4869         nextrow[k]++; nextai[k]++;
4870       }
4871     }
4872     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4873 
4874     /* if free space is not available, make more free space */
4875     if (current_space->local_remaining<bnzi) {
4876       PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space));
4877       nspacedouble++;
4878     }
4879     /* copy data into free space, then initialize lnk */
4880     PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt));
4881     PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz));
4882 
4883     current_space->array           += bnzi;
4884     current_space->local_used      += bnzi;
4885     current_space->local_remaining -= bnzi;
4886 
4887     bi[i+1] = bi[i] + bnzi;
4888   }
4889 
4890   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4891 
4892   PetscCall(PetscMalloc1(bi[m]+1,&bj));
4893   PetscCall(PetscFreeSpaceContiguous(&free_space,bj));
4894   PetscCall(PetscLLDestroy(lnk,lnkbt));
4895 
4896   /* create symbolic parallel matrix B_mpi */
4897   /*---------------------------------------*/
4898   PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs));
4899   PetscCall(MatCreate(comm,&B_mpi));
4900   if (n==PETSC_DECIDE) {
4901     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N));
4902   } else {
4903     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4904   }
4905   PetscCall(MatSetBlockSizes(B_mpi,bs,cbs));
4906   PetscCall(MatSetType(B_mpi,MATMPIAIJ));
4907   PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz));
4908   MatPreallocateEnd(dnz,onz);
4909   PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE));
4910 
4911   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4912   B_mpi->assembled  = PETSC_FALSE;
4913   merge->bi         = bi;
4914   merge->bj         = bj;
4915   merge->buf_ri     = buf_ri;
4916   merge->buf_rj     = buf_rj;
4917   merge->coi        = NULL;
4918   merge->coj        = NULL;
4919   merge->owners_co  = NULL;
4920 
4921   PetscCall(PetscCommDestroy(&comm));
4922 
4923   /* attach the supporting struct to B_mpi for reuse */
4924   PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container));
4925   PetscCall(PetscContainerSetPointer(container,merge));
4926   PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI));
4927   PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container));
4928   PetscCall(PetscContainerDestroy(&container));
4929   *mpimat = B_mpi;
4930 
4931   PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0));
4932   PetscFunctionReturn(0);
4933 }
4934 
4935 /*@C
4936       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4937                  matrices from each processor
4938 
4939     Collective
4940 
4941    Input Parameters:
4942 +    comm - the communicators the parallel matrix will live on
4943 .    seqmat - the input sequential matrices
4944 .    m - number of local rows (or PETSC_DECIDE)
4945 .    n - number of local columns (or PETSC_DECIDE)
4946 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4947 
4948    Output Parameter:
4949 .    mpimat - the parallel matrix generated
4950 
4951     Level: advanced
4952 
4953    Notes:
4954      The dimensions of the sequential matrix in each processor MUST be the same.
4955      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4956      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4957 @*/
4958 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4959 {
4960   PetscMPIInt    size;
4961 
4962   PetscFunctionBegin;
4963   PetscCallMPI(MPI_Comm_size(comm,&size));
4964   if (size == 1) {
4965     PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
4966     if (scall == MAT_INITIAL_MATRIX) {
4967       PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat));
4968     } else {
4969       PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN));
4970     }
4971     PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
4972     PetscFunctionReturn(0);
4973   }
4974   PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
4975   if (scall == MAT_INITIAL_MATRIX) {
4976     PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat));
4977   }
4978   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat));
4979   PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
4980   PetscFunctionReturn(0);
4981 }
4982 
4983 /*@
4984      MatAIJGetLocalMat - Creates a SeqAIJ from a MATAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4985           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4986           with MatGetSize()
4987 
4988     Not Collective
4989 
4990    Input Parameters:
4991 +    A - the matrix
4992 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4993 
4994    Output Parameter:
4995 .    A_loc - the local sequential matrix generated
4996 
4997     Level: developer
4998 
4999    Notes:
5000      In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix.
5001 
5002      Destroy the matrix with MatDestroy()
5003 
5004 .seealso: MatMPIAIJGetLocalMat()
5005 
5006 @*/
5007 PetscErrorCode MatAIJGetLocalMat(Mat A,Mat *A_loc)
5008 {
5009   PetscBool      mpi;
5010 
5011   PetscFunctionBegin;
5012   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&mpi));
5013   if (mpi) {
5014     PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,A_loc));
5015   } else {
5016     *A_loc = A;
5017     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5018   }
5019   PetscFunctionReturn(0);
5020 }
5021 
5022 /*@
5023      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5024           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5025           with MatGetSize()
5026 
5027     Not Collective
5028 
5029    Input Parameters:
5030 +    A - the matrix
5031 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5032 
5033    Output Parameter:
5034 .    A_loc - the local sequential matrix generated
5035 
5036     Level: developer
5037 
5038    Notes:
5039      In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix.
5040 
5041      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5042      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5043      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5044      modify the values of the returned A_loc.
5045 
5046 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5047 @*/
5048 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5049 {
5050   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5051   Mat_SeqAIJ        *mat,*a,*b;
5052   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5053   const PetscScalar *aa,*ba,*aav,*bav;
5054   PetscScalar       *ca,*cam;
5055   PetscMPIInt       size;
5056   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5057   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5058   PetscBool         match;
5059 
5060   PetscFunctionBegin;
5061   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match));
5062   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5063   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5064   if (size == 1) {
5065     if (scall == MAT_INITIAL_MATRIX) {
5066       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5067       *A_loc = mpimat->A;
5068     } else if (scall == MAT_REUSE_MATRIX) {
5069       PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN));
5070     }
5071     PetscFunctionReturn(0);
5072   }
5073 
5074   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5075   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5076   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5077   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5078   PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav));
5079   PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav));
5080   aa   = aav;
5081   ba   = bav;
5082   if (scall == MAT_INITIAL_MATRIX) {
5083     PetscCall(PetscMalloc1(1+am,&ci));
5084     ci[0] = 0;
5085     for (i=0; i<am; i++) {
5086       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5087     }
5088     PetscCall(PetscMalloc1(1+ci[am],&cj));
5089     PetscCall(PetscMalloc1(1+ci[am],&ca));
5090     k    = 0;
5091     for (i=0; i<am; i++) {
5092       ncols_o = bi[i+1] - bi[i];
5093       ncols_d = ai[i+1] - ai[i];
5094       /* off-diagonal portion of A */
5095       for (jo=0; jo<ncols_o; jo++) {
5096         col = cmap[*bj];
5097         if (col >= cstart) break;
5098         cj[k]   = col; bj++;
5099         ca[k++] = *ba++;
5100       }
5101       /* diagonal portion of A */
5102       for (j=0; j<ncols_d; j++) {
5103         cj[k]   = cstart + *aj++;
5104         ca[k++] = *aa++;
5105       }
5106       /* off-diagonal portion of A */
5107       for (j=jo; j<ncols_o; j++) {
5108         cj[k]   = cmap[*bj++];
5109         ca[k++] = *ba++;
5110       }
5111     }
5112     /* put together the new matrix */
5113     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc));
5114     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5115     /* Since these are PETSc arrays, change flags to free them as necessary. */
5116     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5117     mat->free_a  = PETSC_TRUE;
5118     mat->free_ij = PETSC_TRUE;
5119     mat->nonew   = 0;
5120   } else if (scall == MAT_REUSE_MATRIX) {
5121     mat  =(Mat_SeqAIJ*)(*A_loc)->data;
5122     ci   = mat->i;
5123     cj   = mat->j;
5124     PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam));
5125     for (i=0; i<am; i++) {
5126       /* off-diagonal portion of A */
5127       ncols_o = bi[i+1] - bi[i];
5128       for (jo=0; jo<ncols_o; jo++) {
5129         col = cmap[*bj];
5130         if (col >= cstart) break;
5131         *cam++ = *ba++; bj++;
5132       }
5133       /* diagonal portion of A */
5134       ncols_d = ai[i+1] - ai[i];
5135       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5136       /* off-diagonal portion of A */
5137       for (j=jo; j<ncols_o; j++) {
5138         *cam++ = *ba++; bj++;
5139       }
5140     }
5141     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam));
5142   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5143   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav));
5144   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav));
5145   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5146   PetscFunctionReturn(0);
5147 }
5148 
5149 /*@
5150      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5151           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5152 
5153     Not Collective
5154 
5155    Input Parameters:
5156 +    A - the matrix
5157 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5158 
5159    Output Parameters:
5160 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5161 -    A_loc - the local sequential matrix generated
5162 
5163     Level: developer
5164 
5165    Notes:
5166      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5167 
5168 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5169 
5170 @*/
5171 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5172 {
5173   Mat            Ao,Ad;
5174   const PetscInt *cmap;
5175   PetscMPIInt    size;
5176   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5177 
5178   PetscFunctionBegin;
5179   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap));
5180   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5181   if (size == 1) {
5182     if (scall == MAT_INITIAL_MATRIX) {
5183       PetscCall(PetscObjectReference((PetscObject)Ad));
5184       *A_loc = Ad;
5185     } else if (scall == MAT_REUSE_MATRIX) {
5186       PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN));
5187     }
5188     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob));
5189     PetscFunctionReturn(0);
5190   }
5191   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f));
5192   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5193   if (f) {
5194     PetscCall((*f)(A,scall,glob,A_loc));
5195   } else {
5196     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5197     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5198     Mat_SeqAIJ        *c;
5199     PetscInt          *ai = a->i, *aj = a->j;
5200     PetscInt          *bi = b->i, *bj = b->j;
5201     PetscInt          *ci,*cj;
5202     const PetscScalar *aa,*ba;
5203     PetscScalar       *ca;
5204     PetscInt          i,j,am,dn,on;
5205 
5206     PetscCall(MatGetLocalSize(Ad,&am,&dn));
5207     PetscCall(MatGetLocalSize(Ao,NULL,&on));
5208     PetscCall(MatSeqAIJGetArrayRead(Ad,&aa));
5209     PetscCall(MatSeqAIJGetArrayRead(Ao,&ba));
5210     if (scall == MAT_INITIAL_MATRIX) {
5211       PetscInt k;
5212       PetscCall(PetscMalloc1(1+am,&ci));
5213       PetscCall(PetscMalloc1(ai[am]+bi[am],&cj));
5214       PetscCall(PetscMalloc1(ai[am]+bi[am],&ca));
5215       ci[0] = 0;
5216       for (i=0,k=0; i<am; i++) {
5217         const PetscInt ncols_o = bi[i+1] - bi[i];
5218         const PetscInt ncols_d = ai[i+1] - ai[i];
5219         ci[i+1] = ci[i] + ncols_o + ncols_d;
5220         /* diagonal portion of A */
5221         for (j=0; j<ncols_d; j++,k++) {
5222           cj[k] = *aj++;
5223           ca[k] = *aa++;
5224         }
5225         /* off-diagonal portion of A */
5226         for (j=0; j<ncols_o; j++,k++) {
5227           cj[k] = dn + *bj++;
5228           ca[k] = *ba++;
5229         }
5230       }
5231       /* put together the new matrix */
5232       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc));
5233       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5234       /* Since these are PETSc arrays, change flags to free them as necessary. */
5235       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5236       c->free_a  = PETSC_TRUE;
5237       c->free_ij = PETSC_TRUE;
5238       c->nonew   = 0;
5239       PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name));
5240     } else if (scall == MAT_REUSE_MATRIX) {
5241       PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca));
5242       for (i=0; i<am; i++) {
5243         const PetscInt ncols_d = ai[i+1] - ai[i];
5244         const PetscInt ncols_o = bi[i+1] - bi[i];
5245         /* diagonal portion of A */
5246         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5247         /* off-diagonal portion of A */
5248         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5249       }
5250       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca));
5251     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5252     PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa));
5253     PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa));
5254     if (glob) {
5255       PetscInt cst, *gidx;
5256 
5257       PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL));
5258       PetscCall(PetscMalloc1(dn+on,&gidx));
5259       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5260       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5261       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob));
5262     }
5263   }
5264   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5265   PetscFunctionReturn(0);
5266 }
5267 
5268 /*@C
5269      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5270 
5271     Not Collective
5272 
5273    Input Parameters:
5274 +    A - the matrix
5275 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5276 -    row, col - index sets of rows and columns to extract (or NULL)
5277 
5278    Output Parameter:
5279 .    A_loc - the local sequential matrix generated
5280 
5281     Level: developer
5282 
5283 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5284 
5285 @*/
5286 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5287 {
5288   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5289   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5290   IS             isrowa,iscola;
5291   Mat            *aloc;
5292   PetscBool      match;
5293 
5294   PetscFunctionBegin;
5295   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match));
5296   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5297   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0));
5298   if (!row) {
5299     start = A->rmap->rstart; end = A->rmap->rend;
5300     PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa));
5301   } else {
5302     isrowa = *row;
5303   }
5304   if (!col) {
5305     start = A->cmap->rstart;
5306     cmap  = a->garray;
5307     nzA   = a->A->cmap->n;
5308     nzB   = a->B->cmap->n;
5309     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5310     ncols = 0;
5311     for (i=0; i<nzB; i++) {
5312       if (cmap[i] < start) idx[ncols++] = cmap[i];
5313       else break;
5314     }
5315     imark = i;
5316     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5317     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5318     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola));
5319   } else {
5320     iscola = *col;
5321   }
5322   if (scall != MAT_INITIAL_MATRIX) {
5323     PetscCall(PetscMalloc1(1,&aloc));
5324     aloc[0] = *A_loc;
5325   }
5326   PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc));
5327   if (!col) { /* attach global id of condensed columns */
5328     PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola));
5329   }
5330   *A_loc = aloc[0];
5331   PetscCall(PetscFree(aloc));
5332   if (!row) {
5333     PetscCall(ISDestroy(&isrowa));
5334   }
5335   if (!col) {
5336     PetscCall(ISDestroy(&iscola));
5337   }
5338   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0));
5339   PetscFunctionReturn(0);
5340 }
5341 
5342 /*
5343  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5344  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5345  * on a global size.
5346  * */
5347 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5348 {
5349   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5350   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5351   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5352   PetscMPIInt              owner;
5353   PetscSFNode              *iremote,*oiremote;
5354   const PetscInt           *lrowindices;
5355   PetscSF                  sf,osf;
5356   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5357   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5358   MPI_Comm                 comm;
5359   ISLocalToGlobalMapping   mapping;
5360   const PetscScalar        *pd_a,*po_a;
5361 
5362   PetscFunctionBegin;
5363   PetscCall(PetscObjectGetComm((PetscObject)P,&comm));
5364   /* plocalsize is the number of roots
5365    * nrows is the number of leaves
5366    * */
5367   PetscCall(MatGetLocalSize(P,&plocalsize,NULL));
5368   PetscCall(ISGetLocalSize(rows,&nrows));
5369   PetscCall(PetscCalloc1(nrows,&iremote));
5370   PetscCall(ISGetIndices(rows,&lrowindices));
5371   for (i=0;i<nrows;i++) {
5372     /* Find a remote index and an owner for a row
5373      * The row could be local or remote
5374      * */
5375     owner = 0;
5376     lidx  = 0;
5377     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx));
5378     iremote[i].index = lidx;
5379     iremote[i].rank  = owner;
5380   }
5381   /* Create SF to communicate how many nonzero columns for each row */
5382   PetscCall(PetscSFCreate(comm,&sf));
5383   /* SF will figure out the number of nonzero colunms for each row, and their
5384    * offsets
5385    * */
5386   PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5387   PetscCall(PetscSFSetFromOptions(sf));
5388   PetscCall(PetscSFSetUp(sf));
5389 
5390   PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets));
5391   PetscCall(PetscCalloc1(2*plocalsize,&nrcols));
5392   PetscCall(PetscCalloc1(nrows,&pnnz));
5393   roffsets[0] = 0;
5394   roffsets[1] = 0;
5395   for (i=0;i<plocalsize;i++) {
5396     /* diag */
5397     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5398     /* off diag */
5399     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5400     /* compute offsets so that we relative location for each row */
5401     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5402     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5403   }
5404   PetscCall(PetscCalloc1(2*nrows,&nlcols));
5405   PetscCall(PetscCalloc1(2*nrows,&loffsets));
5406   /* 'r' means root, and 'l' means leaf */
5407   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5408   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5409   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5410   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5411   PetscCall(PetscSFDestroy(&sf));
5412   PetscCall(PetscFree(roffsets));
5413   PetscCall(PetscFree(nrcols));
5414   dntotalcols = 0;
5415   ontotalcols = 0;
5416   ncol = 0;
5417   for (i=0;i<nrows;i++) {
5418     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5419     ncol = PetscMax(pnnz[i],ncol);
5420     /* diag */
5421     dntotalcols += nlcols[i*2+0];
5422     /* off diag */
5423     ontotalcols += nlcols[i*2+1];
5424   }
5425   /* We do not need to figure the right number of columns
5426    * since all the calculations will be done by going through the raw data
5427    * */
5428   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth));
5429   PetscCall(MatSetUp(*P_oth));
5430   PetscCall(PetscFree(pnnz));
5431   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5432   /* diag */
5433   PetscCall(PetscCalloc1(dntotalcols,&iremote));
5434   /* off diag */
5435   PetscCall(PetscCalloc1(ontotalcols,&oiremote));
5436   /* diag */
5437   PetscCall(PetscCalloc1(dntotalcols,&ilocal));
5438   /* off diag */
5439   PetscCall(PetscCalloc1(ontotalcols,&oilocal));
5440   dntotalcols = 0;
5441   ontotalcols = 0;
5442   ntotalcols  = 0;
5443   for (i=0;i<nrows;i++) {
5444     owner = 0;
5445     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL));
5446     /* Set iremote for diag matrix */
5447     for (j=0;j<nlcols[i*2+0];j++) {
5448       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5449       iremote[dntotalcols].rank    = owner;
5450       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5451       ilocal[dntotalcols++]        = ntotalcols++;
5452     }
5453     /* off diag */
5454     for (j=0;j<nlcols[i*2+1];j++) {
5455       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5456       oiremote[ontotalcols].rank    = owner;
5457       oilocal[ontotalcols++]        = ntotalcols++;
5458     }
5459   }
5460   PetscCall(ISRestoreIndices(rows,&lrowindices));
5461   PetscCall(PetscFree(loffsets));
5462   PetscCall(PetscFree(nlcols));
5463   PetscCall(PetscSFCreate(comm,&sf));
5464   /* P serves as roots and P_oth is leaves
5465    * Diag matrix
5466    * */
5467   PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5468   PetscCall(PetscSFSetFromOptions(sf));
5469   PetscCall(PetscSFSetUp(sf));
5470 
5471   PetscCall(PetscSFCreate(comm,&osf));
5472   /* Off diag */
5473   PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER));
5474   PetscCall(PetscSFSetFromOptions(osf));
5475   PetscCall(PetscSFSetUp(osf));
5476   PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5477   PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5478   /* We operate on the matrix internal data for saving memory */
5479   PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5480   PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5481   PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL));
5482   /* Convert to global indices for diag matrix */
5483   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5484   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5485   /* We want P_oth store global indices */
5486   PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping));
5487   /* Use memory scalable approach */
5488   PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH));
5489   PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j));
5490   PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5491   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5492   /* Convert back to local indices */
5493   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5494   PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5495   nout = 0;
5496   PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j));
5497   PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout);
5498   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5499   /* Exchange values */
5500   PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5501   PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5502   PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5503   PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5504   /* Stop PETSc from shrinking memory */
5505   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5506   PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY));
5507   PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY));
5508   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5509   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf));
5510   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf));
5511   PetscCall(PetscSFDestroy(&sf));
5512   PetscCall(PetscSFDestroy(&osf));
5513   PetscFunctionReturn(0);
5514 }
5515 
5516 /*
5517  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5518  * This supports MPIAIJ and MAIJ
5519  * */
5520 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5521 {
5522   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5523   Mat_SeqAIJ            *p_oth;
5524   IS                    rows,map;
5525   PetscHMapI            hamp;
5526   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5527   MPI_Comm              comm;
5528   PetscSF               sf,osf;
5529   PetscBool             has;
5530 
5531   PetscFunctionBegin;
5532   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5533   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0));
5534   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5535    *  and then create a submatrix (that often is an overlapping matrix)
5536    * */
5537   if (reuse == MAT_INITIAL_MATRIX) {
5538     /* Use a hash table to figure out unique keys */
5539     PetscCall(PetscHMapICreate(&hamp));
5540     PetscCall(PetscHMapIResize(hamp,a->B->cmap->n));
5541     PetscCall(PetscCalloc1(a->B->cmap->n,&mapping));
5542     count = 0;
5543     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5544     for (i=0;i<a->B->cmap->n;i++) {
5545       key  = a->garray[i]/dof;
5546       PetscCall(PetscHMapIHas(hamp,key,&has));
5547       if (!has) {
5548         mapping[i] = count;
5549         PetscCall(PetscHMapISet(hamp,key,count++));
5550       } else {
5551         /* Current 'i' has the same value the previous step */
5552         mapping[i] = count-1;
5553       }
5554     }
5555     PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map));
5556     PetscCall(PetscHMapIGetSize(hamp,&htsize));
5557     PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count);
5558     PetscCall(PetscCalloc1(htsize,&rowindices));
5559     off = 0;
5560     PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices));
5561     PetscCall(PetscHMapIDestroy(&hamp));
5562     PetscCall(PetscSortInt(htsize,rowindices));
5563     PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows));
5564     /* In case, the matrix was already created but users want to recreate the matrix */
5565     PetscCall(MatDestroy(P_oth));
5566     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth));
5567     PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map));
5568     PetscCall(ISDestroy(&map));
5569     PetscCall(ISDestroy(&rows));
5570   } else if (reuse == MAT_REUSE_MATRIX) {
5571     /* If matrix was already created, we simply update values using SF objects
5572      * that as attached to the matrix ealier.
5573      */
5574     const PetscScalar *pd_a,*po_a;
5575 
5576     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf));
5577     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf));
5578     PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5579     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5580     /* Update values in place */
5581     PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5582     PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5583     PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5584     PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5585     PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5586     PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5587     PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5588     PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5589   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5590   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0));
5591   PetscFunctionReturn(0);
5592 }
5593 
5594 /*@C
5595   MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5596 
5597   Collective on Mat
5598 
5599   Input Parameters:
5600 + A - the first matrix in mpiaij format
5601 . B - the second matrix in mpiaij format
5602 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5603 
5604   Output Parameters:
5605 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5606 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5607 - B_seq - the sequential matrix generated
5608 
5609   Level: developer
5610 
5611 @*/
5612 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5613 {
5614   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5615   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5616   IS             isrowb,iscolb;
5617   Mat            *bseq=NULL;
5618 
5619   PetscFunctionBegin;
5620   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5621     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5622   }
5623   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0));
5624 
5625   if (scall == MAT_INITIAL_MATRIX) {
5626     start = A->cmap->rstart;
5627     cmap  = a->garray;
5628     nzA   = a->A->cmap->n;
5629     nzB   = a->B->cmap->n;
5630     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5631     ncols = 0;
5632     for (i=0; i<nzB; i++) {  /* row < local row index */
5633       if (cmap[i] < start) idx[ncols++] = cmap[i];
5634       else break;
5635     }
5636     imark = i;
5637     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5638     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5639     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb));
5640     PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb));
5641   } else {
5642     PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5643     isrowb  = *rowb; iscolb = *colb;
5644     PetscCall(PetscMalloc1(1,&bseq));
5645     bseq[0] = *B_seq;
5646   }
5647   PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq));
5648   *B_seq = bseq[0];
5649   PetscCall(PetscFree(bseq));
5650   if (!rowb) {
5651     PetscCall(ISDestroy(&isrowb));
5652   } else {
5653     *rowb = isrowb;
5654   }
5655   if (!colb) {
5656     PetscCall(ISDestroy(&iscolb));
5657   } else {
5658     *colb = iscolb;
5659   }
5660   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0));
5661   PetscFunctionReturn(0);
5662 }
5663 
5664 /*
5665     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5666     of the OFF-DIAGONAL portion of local A
5667 
5668     Collective on Mat
5669 
5670    Input Parameters:
5671 +    A,B - the matrices in mpiaij format
5672 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5673 
5674    Output Parameter:
5675 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5676 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5677 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5678 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5679 
5680     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5681      for this matrix. This is not desirable..
5682 
5683     Level: developer
5684 
5685 */
5686 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5687 {
5688   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5689   Mat_SeqAIJ             *b_oth;
5690   VecScatter             ctx;
5691   MPI_Comm               comm;
5692   const PetscMPIInt      *rprocs,*sprocs;
5693   const PetscInt         *srow,*rstarts,*sstarts;
5694   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5695   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5696   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5697   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5698   PetscMPIInt            size,tag,rank,nreqs;
5699 
5700   PetscFunctionBegin;
5701   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5702   PetscCallMPI(MPI_Comm_size(comm,&size));
5703 
5704   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5705     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5706   }
5707   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0));
5708   PetscCallMPI(MPI_Comm_rank(comm,&rank));
5709 
5710   if (size == 1) {
5711     startsj_s = NULL;
5712     bufa_ptr  = NULL;
5713     *B_oth    = NULL;
5714     PetscFunctionReturn(0);
5715   }
5716 
5717   ctx = a->Mvctx;
5718   tag = ((PetscObject)ctx)->tag;
5719 
5720   PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs));
5721   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5722   PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs));
5723   PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs));
5724   PetscCall(PetscMalloc1(nreqs,&reqs));
5725   rwaits = reqs;
5726   swaits = reqs + nrecvs;
5727 
5728   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5729   if (scall == MAT_INITIAL_MATRIX) {
5730     /* i-array */
5731     /*---------*/
5732     /*  post receives */
5733     if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */
5734     for (i=0; i<nrecvs; i++) {
5735       rowlen = rvalues + rstarts[i]*rbs;
5736       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5737       PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5738     }
5739 
5740     /* pack the outgoing message */
5741     PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj));
5742 
5743     sstartsj[0] = 0;
5744     rstartsj[0] = 0;
5745     len         = 0; /* total length of j or a array to be sent */
5746     if (nsends) {
5747       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5748       PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues));
5749     }
5750     for (i=0; i<nsends; i++) {
5751       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5752       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5753       for (j=0; j<nrows; j++) {
5754         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5755         for (l=0; l<sbs; l++) {
5756           PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */
5757 
5758           rowlen[j*sbs+l] = ncols;
5759 
5760           len += ncols;
5761           PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL));
5762         }
5763         k++;
5764       }
5765       PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i));
5766 
5767       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5768     }
5769     /* recvs and sends of i-array are completed */
5770     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5771     PetscCall(PetscFree(svalues));
5772 
5773     /* allocate buffers for sending j and a arrays */
5774     PetscCall(PetscMalloc1(len+1,&bufj));
5775     PetscCall(PetscMalloc1(len+1,&bufa));
5776 
5777     /* create i-array of B_oth */
5778     PetscCall(PetscMalloc1(aBn+2,&b_othi));
5779 
5780     b_othi[0] = 0;
5781     len       = 0; /* total length of j or a array to be received */
5782     k         = 0;
5783     for (i=0; i<nrecvs; i++) {
5784       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5785       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5786       for (j=0; j<nrows; j++) {
5787         b_othi[k+1] = b_othi[k] + rowlen[j];
5788         PetscCall(PetscIntSumError(rowlen[j],len,&len));
5789         k++;
5790       }
5791       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5792     }
5793     PetscCall(PetscFree(rvalues));
5794 
5795     /* allocate space for j and a arrays of B_oth */
5796     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj));
5797     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha));
5798 
5799     /* j-array */
5800     /*---------*/
5801     /*  post receives of j-array */
5802     for (i=0; i<nrecvs; i++) {
5803       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5804       PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5805     }
5806 
5807     /* pack the outgoing message j-array */
5808     if (nsends) k = sstarts[0];
5809     for (i=0; i<nsends; i++) {
5810       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5811       bufJ  = bufj+sstartsj[i];
5812       for (j=0; j<nrows; j++) {
5813         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5814         for (ll=0; ll<sbs; ll++) {
5815           PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5816           for (l=0; l<ncols; l++) {
5817             *bufJ++ = cols[l];
5818           }
5819           PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5820         }
5821       }
5822       PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i));
5823     }
5824 
5825     /* recvs and sends of j-array are completed */
5826     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5827   } else if (scall == MAT_REUSE_MATRIX) {
5828     sstartsj = *startsj_s;
5829     rstartsj = *startsj_r;
5830     bufa     = *bufa_ptr;
5831     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5832     PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha));
5833   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5834 
5835   /* a-array */
5836   /*---------*/
5837   /*  post receives of a-array */
5838   for (i=0; i<nrecvs; i++) {
5839     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5840     PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i));
5841   }
5842 
5843   /* pack the outgoing message a-array */
5844   if (nsends) k = sstarts[0];
5845   for (i=0; i<nsends; i++) {
5846     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5847     bufA  = bufa+sstartsj[i];
5848     for (j=0; j<nrows; j++) {
5849       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5850       for (ll=0; ll<sbs; ll++) {
5851         PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5852         for (l=0; l<ncols; l++) {
5853           *bufA++ = vals[l];
5854         }
5855         PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5856       }
5857     }
5858     PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i));
5859   }
5860   /* recvs and sends of a-array are completed */
5861   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5862   PetscCall(PetscFree(reqs));
5863 
5864   if (scall == MAT_INITIAL_MATRIX) {
5865     /* put together the new matrix */
5866     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth));
5867 
5868     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5869     /* Since these are PETSc arrays, change flags to free them as necessary. */
5870     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5871     b_oth->free_a  = PETSC_TRUE;
5872     b_oth->free_ij = PETSC_TRUE;
5873     b_oth->nonew   = 0;
5874 
5875     PetscCall(PetscFree(bufj));
5876     if (!startsj_s || !bufa_ptr) {
5877       PetscCall(PetscFree2(sstartsj,rstartsj));
5878       PetscCall(PetscFree(bufa_ptr));
5879     } else {
5880       *startsj_s = sstartsj;
5881       *startsj_r = rstartsj;
5882       *bufa_ptr  = bufa;
5883     }
5884   } else if (scall == MAT_REUSE_MATRIX) {
5885     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha));
5886   }
5887 
5888   PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs));
5889   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs));
5890   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0));
5891   PetscFunctionReturn(0);
5892 }
5893 
5894 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5895 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5896 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5897 #if defined(PETSC_HAVE_MKL_SPARSE)
5898 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5899 #endif
5900 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5901 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5902 #if defined(PETSC_HAVE_ELEMENTAL)
5903 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5904 #endif
5905 #if defined(PETSC_HAVE_SCALAPACK)
5906 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5907 #endif
5908 #if defined(PETSC_HAVE_HYPRE)
5909 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5910 #endif
5911 #if defined(PETSC_HAVE_CUDA)
5912 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5913 #endif
5914 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5915 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5916 #endif
5917 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5918 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5919 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5920 
5921 /*
5922     Computes (B'*A')' since computing B*A directly is untenable
5923 
5924                n                       p                          p
5925         [             ]       [             ]         [                 ]
5926       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5927         [             ]       [             ]         [                 ]
5928 
5929 */
5930 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5931 {
5932   Mat            At,Bt,Ct;
5933 
5934   PetscFunctionBegin;
5935   PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At));
5936   PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt));
5937   PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct));
5938   PetscCall(MatDestroy(&At));
5939   PetscCall(MatDestroy(&Bt));
5940   PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C));
5941   PetscCall(MatDestroy(&Ct));
5942   PetscFunctionReturn(0);
5943 }
5944 
5945 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5946 {
5947   PetscBool      cisdense;
5948 
5949   PetscFunctionBegin;
5950   PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n);
5951   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N));
5952   PetscCall(MatSetBlockSizesFromMats(C,A,B));
5953   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
5954   if (!cisdense) {
5955     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
5956   }
5957   PetscCall(MatSetUp(C));
5958 
5959   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5960   PetscFunctionReturn(0);
5961 }
5962 
5963 /* ----------------------------------------------------------------*/
5964 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5965 {
5966   Mat_Product *product = C->product;
5967   Mat         A = product->A,B=product->B;
5968 
5969   PetscFunctionBegin;
5970   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5971     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5972 
5973   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5974   C->ops->productsymbolic = MatProductSymbolic_AB;
5975   PetscFunctionReturn(0);
5976 }
5977 
5978 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
5979 {
5980   Mat_Product    *product = C->product;
5981 
5982   PetscFunctionBegin;
5983   if (product->type == MATPRODUCT_AB) {
5984     PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
5985   }
5986   PetscFunctionReturn(0);
5987 }
5988 
5989 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
5990 
5991   Input Parameters:
5992 
5993     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
5994     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
5995 
5996     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
5997 
5998     For Set1, j1[] contains column indices of the nonzeros.
5999     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6000     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6001     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6002 
6003     Similar for Set2.
6004 
6005     This routine merges the two sets of nonzeros row by row and removes repeats.
6006 
6007   Output Parameters: (memory is allocated by the caller)
6008 
6009     i[],j[]: the CSR of the merged matrix, which has m rows.
6010     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6011     imap2[]: similar to imap1[], but for Set2.
6012     Note we order nonzeros row-by-row and from left to right.
6013 */
6014 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[],
6015   const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[],
6016   PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[])
6017 {
6018   PetscInt       r,m; /* Row index of mat */
6019   PetscCount     t,t1,t2,b1,e1,b2,e2;
6020 
6021   PetscFunctionBegin;
6022   PetscCall(MatGetLocalSize(mat,&m,NULL));
6023   t1   = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6024   i[0] = 0;
6025   for (r=0; r<m; r++) { /* Do row by row merging */
6026     b1   = rowBegin1[r];
6027     e1   = rowEnd1[r];
6028     b2   = rowBegin2[r];
6029     e2   = rowEnd2[r];
6030     while (b1 < e1 && b2 < e2) {
6031       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6032         j[t]      = j1[b1];
6033         imap1[t1] = t;
6034         imap2[t2] = t;
6035         b1       += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */
6036         b2       += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6037         t1++; t2++; t++;
6038       } else if (j1[b1] < j2[b2]) {
6039         j[t]      = j1[b1];
6040         imap1[t1] = t;
6041         b1       += jmap1[t1+1] - jmap1[t1];
6042         t1++; t++;
6043       } else {
6044         j[t]      = j2[b2];
6045         imap2[t2] = t;
6046         b2       += jmap2[t2+1] - jmap2[t2];
6047         t2++; t++;
6048       }
6049     }
6050     /* Merge the remaining in either j1[] or j2[] */
6051     while (b1 < e1) {
6052       j[t]      = j1[b1];
6053       imap1[t1] = t;
6054       b1       += jmap1[t1+1] - jmap1[t1];
6055       t1++; t++;
6056     }
6057     while (b2 < e2) {
6058       j[t]      = j2[b2];
6059       imap2[t2] = t;
6060       b2       += jmap2[t2+1] - jmap2[t2];
6061       t2++; t++;
6062     }
6063     i[r+1] = t;
6064   }
6065   PetscFunctionReturn(0);
6066 }
6067 
6068 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6069 
6070   Input Parameters:
6071     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6072     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6073       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6074 
6075       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6076       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6077 
6078   Output Parameters:
6079     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6080     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6081       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6082       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6083 
6084     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6085       Atot: number of entries belonging to the diagonal block.
6086       Annz: number of unique nonzeros belonging to the diagonal block.
6087       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6088         repeats (i.e., same 'i,j' pair).
6089       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6090         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6091 
6092       Atot: number of entries belonging to the diagonal block
6093       Annz: number of unique nonzeros belonging to the diagonal block.
6094 
6095     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6096 
6097     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6098 */
6099 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[],
6100   PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[],
6101   PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_,
6102   PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_)
6103 {
6104   PetscInt          cstart,cend,rstart,rend,row,col;
6105   PetscCount        Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6106   PetscCount        Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6107   PetscCount        k,m,p,q,r,s,mid;
6108   PetscCount        *Aperm,*Bperm,*Ajmap,*Bjmap;
6109 
6110   PetscFunctionBegin;
6111   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6112   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6113   m    = rend - rstart;
6114 
6115   for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */
6116 
6117   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6118      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6119   */
6120   while (k<n) {
6121     row = i[k];
6122     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6123     for (s=k; s<n; s++) if (i[s] != row) break;
6124     for (p=k; p<s; p++) {
6125       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6126       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]);
6127     }
6128     PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k));
6129     PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6130     rowBegin[row-rstart] = k;
6131     rowMid[row-rstart]   = mid;
6132     rowEnd[row-rstart]   = s;
6133 
6134     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6135     Atot += mid - k;
6136     Btot += s - mid;
6137 
6138     /* Count unique nonzeros of this diag/offdiag row */
6139     for (p=k; p<mid;) {
6140       col = j[p];
6141       do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */
6142       Annz++;
6143     }
6144 
6145     for (p=mid; p<s;) {
6146       col = j[p];
6147       do {p++;} while (p<s && j[p] == col);
6148       Bnnz++;
6149     }
6150     k = s;
6151   }
6152 
6153   /* Allocation according to Atot, Btot, Annz, Bnnz */
6154   PetscCall(PetscMalloc1(Atot,&Aperm));
6155   PetscCall(PetscMalloc1(Btot,&Bperm));
6156   PetscCall(PetscMalloc1(Annz+1,&Ajmap));
6157   PetscCall(PetscMalloc1(Bnnz+1,&Bjmap));
6158 
6159   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6160   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6161   for (r=0; r<m; r++) {
6162     k     = rowBegin[r];
6163     mid   = rowMid[r];
6164     s     = rowEnd[r];
6165     PetscCall(PetscArraycpy(Aperm+Atot,perm+k,  mid-k));
6166     PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid));
6167     Atot += mid - k;
6168     Btot += s - mid;
6169 
6170     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6171     for (p=k; p<mid;) {
6172       col = j[p];
6173       q   = p;
6174       do {p++;} while (p<mid && j[p] == col);
6175       Ajmap[Annz+1] = Ajmap[Annz] + (p - q);
6176       Annz++;
6177     }
6178 
6179     for (p=mid; p<s;) {
6180       col = j[p];
6181       q   = p;
6182       do {p++;} while (p<s && j[p] == col);
6183       Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q);
6184       Bnnz++;
6185     }
6186   }
6187   /* Output */
6188   *Aperm_ = Aperm;
6189   *Annz_  = Annz;
6190   *Atot_  = Atot;
6191   *Ajmap_ = Ajmap;
6192   *Bperm_ = Bperm;
6193   *Bnnz_  = Bnnz;
6194   *Btot_  = Btot;
6195   *Bjmap_ = Bjmap;
6196   PetscFunctionReturn(0);
6197 }
6198 
6199 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6200 
6201   Input Parameters:
6202     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6203     nnz:  number of unique nonzeros in the merged matrix
6204     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6205     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6206 
6207   Output Parameter: (memory is allocated by the caller)
6208     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6209 
6210   Example:
6211     nnz1 = 4
6212     nnz  = 6
6213     imap = [1,3,4,5]
6214     jmap = [0,3,5,6,7]
6215    then,
6216     jmap_new = [0,0,3,3,5,6,7]
6217 */
6218 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[])
6219 {
6220   PetscCount k,p;
6221 
6222   PetscFunctionBegin;
6223   jmap_new[0] = 0;
6224   p = nnz; /* p loops over jmap_new[] backwards */
6225   for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */
6226     for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1];
6227   }
6228   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6229   PetscFunctionReturn(0);
6230 }
6231 
6232 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[])
6233 {
6234   MPI_Comm                  comm;
6235   PetscMPIInt               rank,size;
6236   PetscInt                  m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6237   PetscCount                k,p,q,rem; /* Loop variables over coo arrays */
6238   Mat_MPIAIJ                *mpiaij = (Mat_MPIAIJ*)mat->data;
6239 
6240   PetscFunctionBegin;
6241   PetscCall(PetscFree(mpiaij->garray));
6242   PetscCall(VecDestroy(&mpiaij->lvec));
6243 #if defined(PETSC_USE_CTABLE)
6244   PetscCall(PetscTableDestroy(&mpiaij->colmap));
6245 #else
6246   PetscCall(PetscFree(mpiaij->colmap));
6247 #endif
6248   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6249   mat->assembled = PETSC_FALSE;
6250   mat->was_assembled = PETSC_FALSE;
6251   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6252 
6253   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
6254   PetscCallMPI(MPI_Comm_size(comm,&size));
6255   PetscCallMPI(MPI_Comm_rank(comm,&rank));
6256   PetscCall(PetscLayoutSetUp(mat->rmap));
6257   PetscCall(PetscLayoutSetUp(mat->cmap));
6258   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6259   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6260   PetscCall(MatGetLocalSize(mat,&m,&n));
6261   PetscCall(MatGetSize(mat,&M,&N));
6262 
6263   /* ---------------------------------------------------------------------------*/
6264   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6265   /* entries come first, then local rows, then remote rows.                     */
6266   /* ---------------------------------------------------------------------------*/
6267   PetscCount n1 = coo_n,*perm1;
6268   PetscInt   *i1,*j1; /* Copies of input COOs along with a permutation array */
6269   PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1));
6270   PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */
6271   PetscCall(PetscArraycpy(j1,coo_j,n1));
6272   for (k=0; k<n1; k++) perm1[k] = k;
6273 
6274   /* Manipulate indices so that entries with negative row or col indices will have smallest
6275      row indices, local entries will have greater but negative row indices, and remote entries
6276      will have positive row indices.
6277   */
6278   for (k=0; k<n1; k++) {
6279     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */
6280     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6281     else {
6282       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6283       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6284     }
6285   }
6286 
6287   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6288   PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1));
6289   for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */
6290   PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */
6291   for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/
6292 
6293   /* ---------------------------------------------------------------------------*/
6294   /*           Split local rows into diag/offdiag portions                      */
6295   /* ---------------------------------------------------------------------------*/
6296   PetscCount   *rowBegin1,*rowMid1,*rowEnd1;
6297   PetscCount   *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1;
6298   PetscCount   Annz1,Bnnz1,Atot1,Btot1;
6299 
6300   PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1));
6301   PetscCall(PetscMalloc1(n1-rem,&Cperm1));
6302   PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1));
6303 
6304   /* ---------------------------------------------------------------------------*/
6305   /*           Send remote rows to their owner                                  */
6306   /* ---------------------------------------------------------------------------*/
6307   /* Find which rows should be sent to which remote ranks*/
6308   PetscInt       nsend = 0; /* Number of MPI ranks to send data to */
6309   PetscMPIInt    *sendto; /* [nsend], storing remote ranks */
6310   PetscInt       *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6311   const PetscInt *ranges;
6312   PetscInt       maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6313 
6314   PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges));
6315   PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries));
6316   for (k=rem; k<n1;) {
6317     PetscMPIInt  owner;
6318     PetscInt     firstRow,lastRow;
6319 
6320     /* Locate a row range */
6321     firstRow = i1[k]; /* first row of this owner */
6322     PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner));
6323     lastRow  = ranges[owner+1]-1; /* last row of this owner */
6324 
6325     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6326     PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p));
6327 
6328     /* All entries in [k,p) belong to this remote owner */
6329     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6330       PetscMPIInt *sendto2;
6331       PetscInt    *nentries2;
6332       PetscInt    maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size;
6333 
6334       PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2));
6335       PetscCall(PetscArraycpy(sendto2,sendto,maxNsend));
6336       PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1));
6337       PetscCall(PetscFree2(sendto,nentries2));
6338       sendto      = sendto2;
6339       nentries    = nentries2;
6340       maxNsend    = maxNsend2;
6341     }
6342     sendto[nsend]   = owner;
6343     nentries[nsend] = p - k;
6344     PetscCall(PetscCountCast(p-k,&nentries[nsend]));
6345     nsend++;
6346     k = p;
6347   }
6348 
6349   /* Build 1st SF to know offsets on remote to send data */
6350   PetscSF     sf1;
6351   PetscInt    nroots = 1,nroots2 = 0;
6352   PetscInt    nleaves = nsend,nleaves2 = 0;
6353   PetscInt    *offsets;
6354   PetscSFNode *iremote;
6355 
6356   PetscCall(PetscSFCreate(comm,&sf1));
6357   PetscCall(PetscMalloc1(nsend,&iremote));
6358   PetscCall(PetscMalloc1(nsend,&offsets));
6359   for (k=0; k<nsend; k++) {
6360     iremote[k].rank  = sendto[k];
6361     iremote[k].index = 0;
6362     nleaves2        += nentries[k];
6363     PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt");
6364   }
6365   PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6366   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM));
6367   PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6368   PetscCall(PetscSFDestroy(&sf1));
6369   PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem);
6370 
6371   /* Build 2nd SF to send remote COOs to their owner */
6372   PetscSF sf2;
6373   nroots  = nroots2;
6374   nleaves = nleaves2;
6375   PetscCall(PetscSFCreate(comm,&sf2));
6376   PetscCall(PetscSFSetFromOptions(sf2));
6377   PetscCall(PetscMalloc1(nleaves,&iremote));
6378   p       = 0;
6379   for (k=0; k<nsend; k++) {
6380     PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt");
6381     for (q=0; q<nentries[k]; q++,p++) {
6382       iremote[p].rank  = sendto[k];
6383       iremote[p].index = offsets[k] + q;
6384     }
6385   }
6386   PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6387 
6388   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6389   PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem));
6390 
6391   /* Send the remote COOs to their owner */
6392   PetscInt   n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6393   PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6394   PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2));
6395   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE));
6396   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE));
6397   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE));
6398   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE));
6399 
6400   PetscCall(PetscFree(offsets));
6401   PetscCall(PetscFree2(sendto,nentries));
6402 
6403   /* ---------------------------------------------------------------*/
6404   /* Sort received COOs by row along with the permutation array     */
6405   /* ---------------------------------------------------------------*/
6406   for (k=0; k<n2; k++) perm2[k] = k;
6407   PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2));
6408 
6409   /* ---------------------------------------------------------------*/
6410   /* Split received COOs into diag/offdiag portions                 */
6411   /* ---------------------------------------------------------------*/
6412   PetscCount  *rowBegin2,*rowMid2,*rowEnd2;
6413   PetscCount  *Ajmap2,*Aperm2,*Bjmap2,*Bperm2;
6414   PetscCount  Annz2,Bnnz2,Atot2,Btot2;
6415 
6416   PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2));
6417   PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2));
6418 
6419   /* --------------------------------------------------------------------------*/
6420   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6421   /* --------------------------------------------------------------------------*/
6422   PetscInt   *Ai,*Bi;
6423   PetscInt   *Aj,*Bj;
6424 
6425   PetscCall(PetscMalloc1(m+1,&Ai));
6426   PetscCall(PetscMalloc1(m+1,&Bi));
6427   PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6428   PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj));
6429 
6430   PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2;
6431   PetscCall(PetscMalloc1(Annz1,&Aimap1));
6432   PetscCall(PetscMalloc1(Bnnz1,&Bimap1));
6433   PetscCall(PetscMalloc1(Annz2,&Aimap2));
6434   PetscCall(PetscMalloc1(Bnnz2,&Bimap2));
6435 
6436   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj));
6437   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1,  rowEnd1,rowMid2,  rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj));
6438 
6439   /* --------------------------------------------------------------------------*/
6440   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6441   /* expect nonzeros in A/B most likely have local contributing entries        */
6442   /* --------------------------------------------------------------------------*/
6443   PetscInt Annz = Ai[m];
6444   PetscInt Bnnz = Bi[m];
6445   PetscCount *Ajmap1_new,*Bjmap1_new;
6446 
6447   PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new));
6448   PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new));
6449 
6450   PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new));
6451   PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new));
6452 
6453   PetscCall(PetscFree(Aimap1));
6454   PetscCall(PetscFree(Ajmap1));
6455   PetscCall(PetscFree(Bimap1));
6456   PetscCall(PetscFree(Bjmap1));
6457   PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1));
6458   PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2));
6459   PetscCall(PetscFree3(i1,j1,perm1));
6460   PetscCall(PetscFree3(i2,j2,perm2));
6461 
6462   Ajmap1 = Ajmap1_new;
6463   Bjmap1 = Bjmap1_new;
6464 
6465   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6466   if (Annz < Annz1 + Annz2) {
6467     PetscInt *Aj_new;
6468     PetscCall(PetscMalloc1(Annz,&Aj_new));
6469     PetscCall(PetscArraycpy(Aj_new,Aj,Annz));
6470     PetscCall(PetscFree(Aj));
6471     Aj   = Aj_new;
6472   }
6473 
6474   if (Bnnz < Bnnz1 + Bnnz2) {
6475     PetscInt *Bj_new;
6476     PetscCall(PetscMalloc1(Bnnz,&Bj_new));
6477     PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz));
6478     PetscCall(PetscFree(Bj));
6479     Bj   = Bj_new;
6480   }
6481 
6482   /* --------------------------------------------------------------------------------*/
6483   /* Create new submatrices for on-process and off-process coupling                  */
6484   /* --------------------------------------------------------------------------------*/
6485   PetscScalar   *Aa,*Ba;
6486   MatType       rtype;
6487   Mat_SeqAIJ    *a,*b;
6488   PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */
6489   PetscCall(PetscCalloc1(Bnnz,&Ba));
6490   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6491   if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;}
6492   PetscCall(MatDestroy(&mpiaij->A));
6493   PetscCall(MatDestroy(&mpiaij->B));
6494   PetscCall(MatGetRootType_Private(mat,&rtype));
6495   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A));
6496   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B));
6497   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6498 
6499   a = (Mat_SeqAIJ*)mpiaij->A->data;
6500   b = (Mat_SeqAIJ*)mpiaij->B->data;
6501   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6502   a->free_a       = b->free_a       = PETSC_TRUE;
6503   a->free_ij      = b->free_ij      = PETSC_TRUE;
6504 
6505   /* conversion must happen AFTER multiply setup */
6506   PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A));
6507   PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B));
6508   PetscCall(VecDestroy(&mpiaij->lvec));
6509   PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL));
6510   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec));
6511 
6512   mpiaij->coo_n   = coo_n;
6513   mpiaij->coo_sf  = sf2;
6514   mpiaij->sendlen = nleaves;
6515   mpiaij->recvlen = nroots;
6516 
6517   mpiaij->Annz    = Annz;
6518   mpiaij->Bnnz    = Bnnz;
6519 
6520   mpiaij->Annz2   = Annz2;
6521   mpiaij->Bnnz2   = Bnnz2;
6522 
6523   mpiaij->Atot1   = Atot1;
6524   mpiaij->Atot2   = Atot2;
6525   mpiaij->Btot1   = Btot1;
6526   mpiaij->Btot2   = Btot2;
6527 
6528   mpiaij->Ajmap1  = Ajmap1;
6529   mpiaij->Aperm1  = Aperm1;
6530 
6531   mpiaij->Bjmap1  = Bjmap1;
6532   mpiaij->Bperm1  = Bperm1;
6533 
6534   mpiaij->Aimap2  = Aimap2;
6535   mpiaij->Ajmap2  = Ajmap2;
6536   mpiaij->Aperm2  = Aperm2;
6537 
6538   mpiaij->Bimap2  = Bimap2;
6539   mpiaij->Bjmap2  = Bjmap2;
6540   mpiaij->Bperm2  = Bperm2;
6541 
6542   mpiaij->Cperm1  = Cperm1;
6543 
6544   /* Allocate in preallocation. If not used, it has zero cost on host */
6545   PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf));
6546   PetscFunctionReturn(0);
6547 }
6548 
6549 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode)
6550 {
6551   Mat_MPIAIJ           *mpiaij = (Mat_MPIAIJ*)mat->data;
6552   Mat                  A = mpiaij->A,B = mpiaij->B;
6553   PetscCount           Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2;
6554   PetscScalar          *Aa,*Ba;
6555   PetscScalar          *sendbuf = mpiaij->sendbuf;
6556   PetscScalar          *recvbuf = mpiaij->recvbuf;
6557   const PetscCount     *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2;
6558   const PetscCount     *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2;
6559   const PetscCount     *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2;
6560   const PetscCount     *Cperm1 = mpiaij->Cperm1;
6561 
6562   PetscFunctionBegin;
6563   PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */
6564   PetscCall(MatSeqAIJGetArray(B,&Ba));
6565 
6566   /* Pack entries to be sent to remote */
6567   for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6568 
6569   /* Send remote entries to their owner and overlap the communication with local computation */
6570   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE));
6571   /* Add local entries to A and B */
6572   for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6573     PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */
6574     for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]];
6575     Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum;
6576   }
6577   for (PetscCount i=0; i<Bnnz; i++) {
6578     PetscScalar sum = 0.0;
6579     for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]];
6580     Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum;
6581   }
6582   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE));
6583 
6584   /* Add received remote entries to A and B */
6585   for (PetscCount i=0; i<Annz2; i++) {
6586     for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6587   }
6588   for (PetscCount i=0; i<Bnnz2; i++) {
6589     for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6590   }
6591   PetscCall(MatSeqAIJRestoreArray(A,&Aa));
6592   PetscCall(MatSeqAIJRestoreArray(B,&Ba));
6593   PetscFunctionReturn(0);
6594 }
6595 
6596 /* ----------------------------------------------------------------*/
6597 
6598 /*MC
6599    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6600 
6601    Options Database Keys:
6602 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6603 
6604    Level: beginner
6605 
6606    Notes:
6607     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6608     in this case the values associated with the rows and columns one passes in are set to zero
6609     in the matrix
6610 
6611     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6612     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6613 
6614 .seealso: `MatCreateAIJ()`
6615 M*/
6616 
6617 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6618 {
6619   Mat_MPIAIJ     *b;
6620   PetscMPIInt    size;
6621 
6622   PetscFunctionBegin;
6623   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
6624 
6625   PetscCall(PetscNewLog(B,&b));
6626   B->data       = (void*)b;
6627   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
6628   B->assembled  = PETSC_FALSE;
6629   B->insertmode = NOT_SET_VALUES;
6630   b->size       = size;
6631 
6632   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
6633 
6634   /* build cache for off array entries formed */
6635   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
6636 
6637   b->donotstash  = PETSC_FALSE;
6638   b->colmap      = NULL;
6639   b->garray      = NULL;
6640   b->roworiented = PETSC_TRUE;
6641 
6642   /* stuff used for matrix vector multiply */
6643   b->lvec  = NULL;
6644   b->Mvctx = NULL;
6645 
6646   /* stuff for MatGetRow() */
6647   b->rowindices   = NULL;
6648   b->rowvalues    = NULL;
6649   b->getrowactive = PETSC_FALSE;
6650 
6651   /* flexible pointer used in CUSPARSE classes */
6652   b->spptr = NULL;
6653 
6654   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6655   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ));
6656   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ));
6657   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ));
6658   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ));
6659   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ));
6660   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ));
6661   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ));
6662   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM));
6663   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL));
6664 #if defined(PETSC_HAVE_CUDA)
6665   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6666 #endif
6667 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6668   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos));
6669 #endif
6670 #if defined(PETSC_HAVE_MKL_SPARSE)
6671   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL));
6672 #endif
6673   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL));
6674   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ));
6675   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ));
6676   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
6677 #if defined(PETSC_HAVE_ELEMENTAL)
6678   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental));
6679 #endif
6680 #if defined(PETSC_HAVE_SCALAPACK)
6681   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK));
6682 #endif
6683   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS));
6684   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL));
6685 #if defined(PETSC_HAVE_HYPRE)
6686   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE));
6687   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ));
6688 #endif
6689   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ));
6690   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ));
6691   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ));
6692   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ));
6693   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ));
6694   PetscFunctionReturn(0);
6695 }
6696 
6697 /*@C
6698      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6699          and "off-diagonal" part of the matrix in CSR format.
6700 
6701    Collective
6702 
6703    Input Parameters:
6704 +  comm - MPI communicator
6705 .  m - number of local rows (Cannot be PETSC_DECIDE)
6706 .  n - This value should be the same as the local size used in creating the
6707        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6708        calculated if N is given) For square matrices n is almost always m.
6709 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6710 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6711 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6712 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6713 .   a - matrix values
6714 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6715 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6716 -   oa - matrix values
6717 
6718    Output Parameter:
6719 .   mat - the matrix
6720 
6721    Level: advanced
6722 
6723    Notes:
6724        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6725        must free the arrays once the matrix has been destroyed and not before.
6726 
6727        The i and j indices are 0 based
6728 
6729        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6730 
6731        This sets local rows and cannot be used to set off-processor values.
6732 
6733        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6734        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6735        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6736        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6737        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6738        communication if it is known that only local entries will be set.
6739 
6740 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6741           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6742 @*/
6743 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6744 {
6745   Mat_MPIAIJ     *maij;
6746 
6747   PetscFunctionBegin;
6748   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6749   PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6750   PetscCheck(oi[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6751   PetscCall(MatCreate(comm,mat));
6752   PetscCall(MatSetSizes(*mat,m,n,M,N));
6753   PetscCall(MatSetType(*mat,MATMPIAIJ));
6754   maij = (Mat_MPIAIJ*) (*mat)->data;
6755 
6756   (*mat)->preallocated = PETSC_TRUE;
6757 
6758   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6759   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6760 
6761   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A));
6762   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B));
6763 
6764   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
6765   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
6766   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
6767   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
6768   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
6769   PetscFunctionReturn(0);
6770 }
6771 
6772 typedef struct {
6773   Mat       *mp;    /* intermediate products */
6774   PetscBool *mptmp; /* is the intermediate product temporary ? */
6775   PetscInt  cp;     /* number of intermediate products */
6776 
6777   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6778   PetscInt    *startsj_s,*startsj_r;
6779   PetscScalar *bufa;
6780   Mat         P_oth;
6781 
6782   /* may take advantage of merging product->B */
6783   Mat Bloc; /* B-local by merging diag and off-diag */
6784 
6785   /* cusparse does not have support to split between symbolic and numeric phases.
6786      When api_user is true, we don't need to update the numerical values
6787      of the temporary storage */
6788   PetscBool reusesym;
6789 
6790   /* support for COO values insertion */
6791   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6792   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6793   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6794   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6795   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6796   PetscMemType mtype;
6797 
6798   /* customization */
6799   PetscBool abmerge;
6800   PetscBool P_oth_bind;
6801 } MatMatMPIAIJBACKEND;
6802 
6803 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6804 {
6805   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6806   PetscInt            i;
6807 
6808   PetscFunctionBegin;
6809   PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r));
6810   PetscCall(PetscFree(mmdata->bufa));
6811   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v));
6812   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w));
6813   PetscCall(MatDestroy(&mmdata->P_oth));
6814   PetscCall(MatDestroy(&mmdata->Bloc));
6815   PetscCall(PetscSFDestroy(&mmdata->sf));
6816   for (i = 0; i < mmdata->cp; i++) {
6817     PetscCall(MatDestroy(&mmdata->mp[i]));
6818   }
6819   PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp));
6820   PetscCall(PetscFree(mmdata->own[0]));
6821   PetscCall(PetscFree(mmdata->own));
6822   PetscCall(PetscFree(mmdata->off[0]));
6823   PetscCall(PetscFree(mmdata->off));
6824   PetscCall(PetscFree(mmdata));
6825   PetscFunctionReturn(0);
6826 }
6827 
6828 /* Copy selected n entries with indices in idx[] of A to v[].
6829    If idx is NULL, copy the whole data array of A to v[]
6830  */
6831 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6832 {
6833   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6834 
6835   PetscFunctionBegin;
6836   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f));
6837   if (f) {
6838     PetscCall((*f)(A,n,idx,v));
6839   } else {
6840     const PetscScalar *vv;
6841 
6842     PetscCall(MatSeqAIJGetArrayRead(A,&vv));
6843     if (n && idx) {
6844       PetscScalar    *w = v;
6845       const PetscInt *oi = idx;
6846       PetscInt       j;
6847 
6848       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6849     } else {
6850       PetscCall(PetscArraycpy(v,vv,n));
6851     }
6852     PetscCall(MatSeqAIJRestoreArrayRead(A,&vv));
6853   }
6854   PetscFunctionReturn(0);
6855 }
6856 
6857 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6858 {
6859   MatMatMPIAIJBACKEND *mmdata;
6860   PetscInt            i,n_d,n_o;
6861 
6862   PetscFunctionBegin;
6863   MatCheckProduct(C,1);
6864   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6865   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6866   if (!mmdata->reusesym) { /* update temporary matrices */
6867     if (mmdata->P_oth) {
6868       PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
6869     }
6870     if (mmdata->Bloc) {
6871       PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc));
6872     }
6873   }
6874   mmdata->reusesym = PETSC_FALSE;
6875 
6876   for (i = 0; i < mmdata->cp; i++) {
6877     PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6878     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
6879   }
6880   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6881     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6882 
6883     if (mmdata->mptmp[i]) continue;
6884     if (noff) {
6885       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6886 
6887       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o));
6888       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d));
6889       n_o += noff;
6890       n_d += nown;
6891     } else {
6892       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6893 
6894       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d));
6895       n_d += mm->nz;
6896     }
6897   }
6898   if (mmdata->hasoffproc) { /* offprocess insertion */
6899     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6900     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6901   }
6902   PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES));
6903   PetscFunctionReturn(0);
6904 }
6905 
6906 /* Support for Pt * A, A * P, or Pt * A * P */
6907 #define MAX_NUMBER_INTERMEDIATE 4
6908 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6909 {
6910   Mat_Product            *product = C->product;
6911   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6912   Mat_MPIAIJ             *a,*p;
6913   MatMatMPIAIJBACKEND    *mmdata;
6914   ISLocalToGlobalMapping P_oth_l2g = NULL;
6915   IS                     glob = NULL;
6916   const char             *prefix;
6917   char                   pprefix[256];
6918   const PetscInt         *globidx,*P_oth_idx;
6919   PetscInt               i,j,cp,m,n,M,N,*coo_i,*coo_j;
6920   PetscCount             ncoo,ncoo_d,ncoo_o,ncoo_oown;
6921   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6922                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
6923                                                                                         /* a base offset; type-2: sparse with a local to global map table */
6924   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6925 
6926   MatProductType         ptype;
6927   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6928   PetscMPIInt            size;
6929 
6930   PetscFunctionBegin;
6931   MatCheckProduct(C,1);
6932   PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6933   ptype = product->type;
6934   if (product->A->symmetric && ptype == MATPRODUCT_AtB) {
6935     ptype = MATPRODUCT_AB;
6936     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6937   }
6938   switch (ptype) {
6939   case MATPRODUCT_AB:
6940     A = product->A;
6941     P = product->B;
6942     m = A->rmap->n;
6943     n = P->cmap->n;
6944     M = A->rmap->N;
6945     N = P->cmap->N;
6946     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6947     break;
6948   case MATPRODUCT_AtB:
6949     P = product->A;
6950     A = product->B;
6951     m = P->cmap->n;
6952     n = A->cmap->n;
6953     M = P->cmap->N;
6954     N = A->cmap->N;
6955     hasoffproc = PETSC_TRUE;
6956     break;
6957   case MATPRODUCT_PtAP:
6958     A = product->A;
6959     P = product->B;
6960     m = P->cmap->n;
6961     n = P->cmap->n;
6962     M = P->cmap->N;
6963     N = P->cmap->N;
6964     hasoffproc = PETSC_TRUE;
6965     break;
6966   default:
6967     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6968   }
6969   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size));
6970   if (size == 1) hasoffproc = PETSC_FALSE;
6971 
6972   /* defaults */
6973   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6974     mp[i]    = NULL;
6975     mptmp[i] = PETSC_FALSE;
6976     rmapt[i] = -1;
6977     cmapt[i] = -1;
6978     rmapa[i] = NULL;
6979     cmapa[i] = NULL;
6980   }
6981 
6982   /* customization */
6983   PetscCall(PetscNew(&mmdata));
6984   mmdata->reusesym = product->api_user;
6985   if (ptype == MATPRODUCT_AB) {
6986     if (product->api_user) {
6987       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");
6988       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
6989       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6990       PetscOptionsEnd();
6991     } else {
6992       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");
6993       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
6994       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6995       PetscOptionsEnd();
6996     }
6997   } else if (ptype == MATPRODUCT_PtAP) {
6998     if (product->api_user) {
6999       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");
7000       PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7001       PetscOptionsEnd();
7002     } else {
7003       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
7004       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7005       PetscOptionsEnd();
7006     }
7007   }
7008   a = (Mat_MPIAIJ*)A->data;
7009   p = (Mat_MPIAIJ*)P->data;
7010   PetscCall(MatSetSizes(C,m,n,M,N));
7011   PetscCall(PetscLayoutSetUp(C->rmap));
7012   PetscCall(PetscLayoutSetUp(C->cmap));
7013   PetscCall(MatSetType(C,((PetscObject)A)->type_name));
7014   PetscCall(MatGetOptionsPrefix(C,&prefix));
7015 
7016   cp   = 0;
7017   switch (ptype) {
7018   case MATPRODUCT_AB: /* A * P */
7019     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
7020 
7021     /* A_diag * P_local (merged or not) */
7022     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7023       /* P is product->B */
7024       PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7025       PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
7026       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7027       PetscCall(MatProductSetFill(mp[cp],product->fill));
7028       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7029       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7030       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7031       mp[cp]->product->api_user = product->api_user;
7032       PetscCall(MatProductSetFromOptions(mp[cp]));
7033       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7034       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7035       PetscCall(ISGetIndices(glob,&globidx));
7036       rmapt[cp] = 1;
7037       cmapt[cp] = 2;
7038       cmapa[cp] = globidx;
7039       mptmp[cp] = PETSC_FALSE;
7040       cp++;
7041     } else { /* A_diag * P_diag and A_diag * P_off */
7042       PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp]));
7043       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7044       PetscCall(MatProductSetFill(mp[cp],product->fill));
7045       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7046       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7047       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7048       mp[cp]->product->api_user = product->api_user;
7049       PetscCall(MatProductSetFromOptions(mp[cp]));
7050       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7051       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7052       rmapt[cp] = 1;
7053       cmapt[cp] = 1;
7054       mptmp[cp] = PETSC_FALSE;
7055       cp++;
7056       PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp]));
7057       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7058       PetscCall(MatProductSetFill(mp[cp],product->fill));
7059       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7060       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7061       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7062       mp[cp]->product->api_user = product->api_user;
7063       PetscCall(MatProductSetFromOptions(mp[cp]));
7064       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7065       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7066       rmapt[cp] = 1;
7067       cmapt[cp] = 2;
7068       cmapa[cp] = p->garray;
7069       mptmp[cp] = PETSC_FALSE;
7070       cp++;
7071     }
7072 
7073     /* A_off * P_other */
7074     if (mmdata->P_oth) {
7075       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */
7076       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7077       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7078       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7079       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7080       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7081       PetscCall(MatProductSetFill(mp[cp],product->fill));
7082       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7083       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7084       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7085       mp[cp]->product->api_user = product->api_user;
7086       PetscCall(MatProductSetFromOptions(mp[cp]));
7087       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7088       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7089       rmapt[cp] = 1;
7090       cmapt[cp] = 2;
7091       cmapa[cp] = P_oth_idx;
7092       mptmp[cp] = PETSC_FALSE;
7093       cp++;
7094     }
7095     break;
7096 
7097   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7098     /* A is product->B */
7099     PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7100     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7101       PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]));
7102       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7103       PetscCall(MatProductSetFill(mp[cp],product->fill));
7104       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7105       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7106       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7107       mp[cp]->product->api_user = product->api_user;
7108       PetscCall(MatProductSetFromOptions(mp[cp]));
7109       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7110       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7111       PetscCall(ISGetIndices(glob,&globidx));
7112       rmapt[cp] = 2;
7113       rmapa[cp] = globidx;
7114       cmapt[cp] = 2;
7115       cmapa[cp] = globidx;
7116       mptmp[cp] = PETSC_FALSE;
7117       cp++;
7118     } else {
7119       PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]));
7120       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7121       PetscCall(MatProductSetFill(mp[cp],product->fill));
7122       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7123       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7124       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7125       mp[cp]->product->api_user = product->api_user;
7126       PetscCall(MatProductSetFromOptions(mp[cp]));
7127       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7128       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7129       PetscCall(ISGetIndices(glob,&globidx));
7130       rmapt[cp] = 1;
7131       cmapt[cp] = 2;
7132       cmapa[cp] = globidx;
7133       mptmp[cp] = PETSC_FALSE;
7134       cp++;
7135       PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]));
7136       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7137       PetscCall(MatProductSetFill(mp[cp],product->fill));
7138       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7139       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7140       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7141       mp[cp]->product->api_user = product->api_user;
7142       PetscCall(MatProductSetFromOptions(mp[cp]));
7143       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7144       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7145       rmapt[cp] = 2;
7146       rmapa[cp] = p->garray;
7147       cmapt[cp] = 2;
7148       cmapa[cp] = globidx;
7149       mptmp[cp] = PETSC_FALSE;
7150       cp++;
7151     }
7152     break;
7153   case MATPRODUCT_PtAP:
7154     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
7155     /* P is product->B */
7156     PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7157     PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
7158     PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP));
7159     PetscCall(MatProductSetFill(mp[cp],product->fill));
7160     PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7161     PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7162     PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7163     mp[cp]->product->api_user = product->api_user;
7164     PetscCall(MatProductSetFromOptions(mp[cp]));
7165     PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7166     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7167     PetscCall(ISGetIndices(glob,&globidx));
7168     rmapt[cp] = 2;
7169     rmapa[cp] = globidx;
7170     cmapt[cp] = 2;
7171     cmapa[cp] = globidx;
7172     mptmp[cp] = PETSC_FALSE;
7173     cp++;
7174     if (mmdata->P_oth) {
7175       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g));
7176       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7177       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7178       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7179       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7180       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7181       PetscCall(MatProductSetFill(mp[cp],product->fill));
7182       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7183       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7184       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7185       mp[cp]->product->api_user = product->api_user;
7186       PetscCall(MatProductSetFromOptions(mp[cp]));
7187       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7188       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7189       mptmp[cp] = PETSC_TRUE;
7190       cp++;
7191       PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]));
7192       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7193       PetscCall(MatProductSetFill(mp[cp],product->fill));
7194       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7195       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7196       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7197       mp[cp]->product->api_user = product->api_user;
7198       PetscCall(MatProductSetFromOptions(mp[cp]));
7199       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7200       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7201       rmapt[cp] = 2;
7202       rmapa[cp] = globidx;
7203       cmapt[cp] = 2;
7204       cmapa[cp] = P_oth_idx;
7205       mptmp[cp] = PETSC_FALSE;
7206       cp++;
7207     }
7208     break;
7209   default:
7210     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
7211   }
7212   /* sanity check */
7213   if (size > 1) for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i);
7214 
7215   PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp));
7216   for (i = 0; i < cp; i++) {
7217     mmdata->mp[i]    = mp[i];
7218     mmdata->mptmp[i] = mptmp[i];
7219   }
7220   mmdata->cp = cp;
7221   C->product->data       = mmdata;
7222   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7223   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7224 
7225   /* memory type */
7226   mmdata->mtype = PETSC_MEMTYPE_HOST;
7227   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,""));
7228   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,""));
7229   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7230   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7231 
7232   /* prepare coo coordinates for values insertion */
7233 
7234   /* count total nonzeros of those intermediate seqaij Mats
7235     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7236     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7237     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7238   */
7239   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7240     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7241     if (mptmp[cp]) continue;
7242     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7243       const PetscInt *rmap = rmapa[cp];
7244       const PetscInt mr = mp[cp]->rmap->n;
7245       const PetscInt rs = C->rmap->rstart;
7246       const PetscInt re = C->rmap->rend;
7247       const PetscInt *ii  = mm->i;
7248       for (i = 0; i < mr; i++) {
7249         const PetscInt gr = rmap[i];
7250         const PetscInt nz = ii[i+1] - ii[i];
7251         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7252         else ncoo_oown += nz; /* this row is local */
7253       }
7254     } else ncoo_d += mm->nz;
7255   }
7256 
7257   /*
7258     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7259 
7260     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7261 
7262     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
7263 
7264     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7265     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7266     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7267 
7268     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7269     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7270   */
7271   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */
7272   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own));
7273 
7274   /* gather (i,j) of nonzeros inserted by remote procs */
7275   if (hasoffproc) {
7276     PetscSF  msf;
7277     PetscInt ncoo2,*coo_i2,*coo_j2;
7278 
7279     PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0]));
7280     PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0]));
7281     PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */
7282 
7283     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7284       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7285       PetscInt   *idxoff = mmdata->off[cp];
7286       PetscInt   *idxown = mmdata->own[cp];
7287       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7288         const PetscInt *rmap = rmapa[cp];
7289         const PetscInt *cmap = cmapa[cp];
7290         const PetscInt *ii  = mm->i;
7291         PetscInt       *coi = coo_i + ncoo_o;
7292         PetscInt       *coj = coo_j + ncoo_o;
7293         const PetscInt mr = mp[cp]->rmap->n;
7294         const PetscInt rs = C->rmap->rstart;
7295         const PetscInt re = C->rmap->rend;
7296         const PetscInt cs = C->cmap->rstart;
7297         for (i = 0; i < mr; i++) {
7298           const PetscInt *jj = mm->j + ii[i];
7299           const PetscInt gr  = rmap[i];
7300           const PetscInt nz  = ii[i+1] - ii[i];
7301           if (gr < rs || gr >= re) { /* this is an offproc row */
7302             for (j = ii[i]; j < ii[i+1]; j++) {
7303               *coi++ = gr;
7304               *idxoff++ = j;
7305             }
7306             if (!cmapt[cp]) { /* already global */
7307               for (j = 0; j < nz; j++) *coj++ = jj[j];
7308             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7309               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7310             } else { /* offdiag */
7311               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7312             }
7313             ncoo_o += nz;
7314           } else { /* this is a local row */
7315             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
7316           }
7317         }
7318       }
7319       mmdata->off[cp + 1] = idxoff;
7320       mmdata->own[cp + 1] = idxown;
7321     }
7322 
7323     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7324     PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i));
7325     PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf));
7326     PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL));
7327     ncoo = ncoo_d + ncoo_oown + ncoo2;
7328     PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2));
7329     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7330     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown));
7331     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7332     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7333     PetscCall(PetscFree2(coo_i,coo_j));
7334     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7335     PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w));
7336     coo_i = coo_i2;
7337     coo_j = coo_j2;
7338   } else { /* no offproc values insertion */
7339     ncoo = ncoo_d;
7340     PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j));
7341 
7342     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7343     PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER));
7344     PetscCall(PetscSFSetUp(mmdata->sf));
7345   }
7346   mmdata->hasoffproc = hasoffproc;
7347 
7348   /* gather (i,j) of nonzeros inserted locally */
7349   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7350     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
7351     PetscInt       *coi = coo_i + ncoo_d;
7352     PetscInt       *coj = coo_j + ncoo_d;
7353     const PetscInt *jj  = mm->j;
7354     const PetscInt *ii  = mm->i;
7355     const PetscInt *cmap = cmapa[cp];
7356     const PetscInt *rmap = rmapa[cp];
7357     const PetscInt mr = mp[cp]->rmap->n;
7358     const PetscInt rs = C->rmap->rstart;
7359     const PetscInt re = C->rmap->rend;
7360     const PetscInt cs = C->cmap->rstart;
7361 
7362     if (mptmp[cp]) continue;
7363     if (rmapt[cp] == 1) { /* consecutive rows */
7364       /* fill coo_i */
7365       for (i = 0; i < mr; i++) {
7366         const PetscInt gr = i + rs;
7367         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
7368       }
7369       /* fill coo_j */
7370       if (!cmapt[cp]) { /* type-0, already global */
7371         PetscCall(PetscArraycpy(coj,jj,mm->nz));
7372       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
7373         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7374       } else { /* type-2, local to global for sparse columns */
7375         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7376       }
7377       ncoo_d += mm->nz;
7378     } else if (rmapt[cp] == 2) { /* sparse rows */
7379       for (i = 0; i < mr; i++) {
7380         const PetscInt *jj = mm->j + ii[i];
7381         const PetscInt gr  = rmap[i];
7382         const PetscInt nz  = ii[i+1] - ii[i];
7383         if (gr >= rs && gr < re) { /* local rows */
7384           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
7385           if (!cmapt[cp]) { /* type-0, already global */
7386             for (j = 0; j < nz; j++) *coj++ = jj[j];
7387           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7388             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7389           } else { /* type-2, local to global for sparse columns */
7390             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7391           }
7392           ncoo_d += nz;
7393         }
7394       }
7395     }
7396   }
7397   if (glob) {
7398     PetscCall(ISRestoreIndices(glob,&globidx));
7399   }
7400   PetscCall(ISDestroy(&glob));
7401   if (P_oth_l2g) {
7402     PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx));
7403   }
7404   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7405   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7406   PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v));
7407 
7408   /* preallocate with COO data */
7409   PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j));
7410   PetscCall(PetscFree2(coo_i,coo_j));
7411   PetscFunctionReturn(0);
7412 }
7413 
7414 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7415 {
7416   Mat_Product *product = mat->product;
7417 #if defined(PETSC_HAVE_DEVICE)
7418   PetscBool    match   = PETSC_FALSE;
7419   PetscBool    usecpu  = PETSC_FALSE;
7420 #else
7421   PetscBool    match   = PETSC_TRUE;
7422 #endif
7423 
7424   PetscFunctionBegin;
7425   MatCheckProduct(mat,1);
7426 #if defined(PETSC_HAVE_DEVICE)
7427   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7428     PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match));
7429   }
7430   if (match) { /* we can always fallback to the CPU if requested */
7431     switch (product->type) {
7432     case MATPRODUCT_AB:
7433       if (product->api_user) {
7434         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");
7435         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7436         PetscOptionsEnd();
7437       } else {
7438         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");
7439         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7440         PetscOptionsEnd();
7441       }
7442       break;
7443     case MATPRODUCT_AtB:
7444       if (product->api_user) {
7445         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");
7446         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7447         PetscOptionsEnd();
7448       } else {
7449         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");
7450         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7451         PetscOptionsEnd();
7452       }
7453       break;
7454     case MATPRODUCT_PtAP:
7455       if (product->api_user) {
7456         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");
7457         PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7458         PetscOptionsEnd();
7459       } else {
7460         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");
7461         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7462         PetscOptionsEnd();
7463       }
7464       break;
7465     default:
7466       break;
7467     }
7468     match = (PetscBool)!usecpu;
7469   }
7470 #endif
7471   if (match) {
7472     switch (product->type) {
7473     case MATPRODUCT_AB:
7474     case MATPRODUCT_AtB:
7475     case MATPRODUCT_PtAP:
7476       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7477       break;
7478     default:
7479       break;
7480     }
7481   }
7482   /* fallback to MPIAIJ ops */
7483   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7484   PetscFunctionReturn(0);
7485 }
7486 
7487 /*
7488     Special version for direct calls from Fortran
7489 */
7490 #include <petsc/private/fortranimpl.h>
7491 
7492 /* Change these macros so can be used in void function */
7493 /* Identical to PetscCallVoid, except it assigns to *_ierr */
7494 #undef  PetscCall
7495 #define PetscCall(...) do {                                                                    \
7496     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__;                                              \
7497     if (PetscUnlikely(ierr_msv_mpiaij)) {                                                      \
7498       *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \
7499       return;                                                                                  \
7500     }                                                                                          \
7501   } while (0)
7502 
7503 #undef SETERRQ
7504 #define SETERRQ(comm,ierr,...) do {                                                            \
7505     *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \
7506     return;                                                                                    \
7507   } while (0)
7508 
7509 #if defined(PETSC_HAVE_FORTRAN_CAPS)
7510 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
7511 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
7512 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
7513 #else
7514 #endif
7515 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
7516 {
7517   Mat          mat  = *mmat;
7518   PetscInt     m    = *mm, n = *mn;
7519   InsertMode   addv = *maddv;
7520   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ*)mat->data;
7521   PetscScalar  value;
7522 
7523   MatCheckPreallocated(mat,1);
7524   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
7525   else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
7526   {
7527     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
7528     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
7529     PetscBool roworiented = aij->roworiented;
7530 
7531     /* Some Variables required in the macro */
7532     Mat        A                    = aij->A;
7533     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
7534     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
7535     MatScalar  *aa;
7536     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
7537     Mat        B                    = aij->B;
7538     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
7539     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
7540     MatScalar  *ba;
7541     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
7542      * cannot use "#if defined" inside a macro. */
7543     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
7544 
7545     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
7546     PetscInt  nonew = a->nonew;
7547     MatScalar *ap1,*ap2;
7548 
7549     PetscFunctionBegin;
7550     PetscCall(MatSeqAIJGetArray(A,&aa));
7551     PetscCall(MatSeqAIJGetArray(B,&ba));
7552     for (i=0; i<m; i++) {
7553       if (im[i] < 0) continue;
7554       PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
7555       if (im[i] >= rstart && im[i] < rend) {
7556         row      = im[i] - rstart;
7557         lastcol1 = -1;
7558         rp1      = aj + ai[row];
7559         ap1      = aa + ai[row];
7560         rmax1    = aimax[row];
7561         nrow1    = ailen[row];
7562         low1     = 0;
7563         high1    = nrow1;
7564         lastcol2 = -1;
7565         rp2      = bj + bi[row];
7566         ap2      = ba + bi[row];
7567         rmax2    = bimax[row];
7568         nrow2    = bilen[row];
7569         low2     = 0;
7570         high2    = nrow2;
7571 
7572         for (j=0; j<n; j++) {
7573           if (roworiented) value = v[i*n+j];
7574           else value = v[i+j*m];
7575           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
7576           if (in[j] >= cstart && in[j] < cend) {
7577             col = in[j] - cstart;
7578             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
7579           } else if (in[j] < 0) continue;
7580           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
7581             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
7582             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
7583           } else {
7584             if (mat->was_assembled) {
7585               if (!aij->colmap) {
7586                 PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
7587               }
7588 #if defined(PETSC_USE_CTABLE)
7589               PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col));
7590               col--;
7591 #else
7592               col = aij->colmap[in[j]] - 1;
7593 #endif
7594               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
7595                 PetscCall(MatDisAssemble_MPIAIJ(mat));
7596                 col  =  in[j];
7597                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
7598                 B        = aij->B;
7599                 b        = (Mat_SeqAIJ*)B->data;
7600                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
7601                 rp2      = bj + bi[row];
7602                 ap2      = ba + bi[row];
7603                 rmax2    = bimax[row];
7604                 nrow2    = bilen[row];
7605                 low2     = 0;
7606                 high2    = nrow2;
7607                 bm       = aij->B->rmap->n;
7608                 ba       = b->a;
7609                 inserted = PETSC_FALSE;
7610               }
7611             } else col = in[j];
7612             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
7613           }
7614         }
7615       } else if (!aij->donotstash) {
7616         if (roworiented) {
7617           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
7618         } else {
7619           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
7620         }
7621       }
7622     }
7623     PetscCall(MatSeqAIJRestoreArray(A,&aa));
7624     PetscCall(MatSeqAIJRestoreArray(B,&ba));
7625   }
7626   PetscFunctionReturnVoid();
7627 }
7628 /* Undefining these here since they were redefined from their original definition above! No
7629  * other PETSc functions should be defined past this point, as it is impossible to recover the
7630  * original definitions */
7631 #undef PetscCall
7632 #undef SETERRQ
7633