xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 6a3d25952bcc09d31b72985d60f0bc62c77fff78)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
10 {
11   Mat            B;
12 
13   PetscFunctionBegin;
14   PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&B));
15   PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject)B));
16   PetscCall(MatGetRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done));
17   PetscFunctionReturn(0);
18 }
19 
20 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
21 {
22   Mat            B;
23 
24   PetscFunctionBegin;
25   PetscCall(PetscObjectQuery((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject*)&B));
26   PetscCall(MatRestoreRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done));
27   PetscCall(MatDestroy(&B));
28   PetscFunctionReturn(0);
29 }
30 
31 /*MC
32    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
33 
34    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
35    and MATMPIAIJ otherwise.  As a result, for single process communicators,
36   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
37   for communicators controlling multiple processes.  It is recommended that you call both of
38   the above preallocation routines for simplicity.
39 
40    Options Database Keys:
41 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
42 
43   Developer Notes:
44     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
45    enough exist.
46 
47   Level: beginner
48 
49 .seealso: `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
50 M*/
51 
52 /*MC
53    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
54 
55    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
56    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
57    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
58   for communicators controlling multiple processes.  It is recommended that you call both of
59   the above preallocation routines for simplicity.
60 
61    Options Database Keys:
62 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
63 
64   Level: beginner
65 
66 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
67 M*/
68 
69 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
70 {
71   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
72 
73   PetscFunctionBegin;
74 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
75   A->boundtocpu = flg;
76 #endif
77   if (a->A) PetscCall(MatBindToCPU(a->A,flg));
78   if (a->B) PetscCall(MatBindToCPU(a->B,flg));
79 
80   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
81    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
82    * to differ from the parent matrix. */
83   if (a->lvec) PetscCall(VecBindToCPU(a->lvec,flg));
84   if (a->diag) PetscCall(VecBindToCPU(a->diag,flg));
85 
86   PetscFunctionReturn(0);
87 }
88 
89 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
90 {
91   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
92 
93   PetscFunctionBegin;
94   if (mat->A) {
95     PetscCall(MatSetBlockSizes(mat->A,rbs,cbs));
96     PetscCall(MatSetBlockSizes(mat->B,rbs,1));
97   }
98   PetscFunctionReturn(0);
99 }
100 
101 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
102 {
103   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
104   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
105   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
106   const PetscInt  *ia,*ib;
107   const MatScalar *aa,*bb,*aav,*bav;
108   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
109   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
110 
111   PetscFunctionBegin;
112   *keptrows = NULL;
113 
114   ia   = a->i;
115   ib   = b->i;
116   PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav));
117   PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav));
118   for (i=0; i<m; i++) {
119     na = ia[i+1] - ia[i];
120     nb = ib[i+1] - ib[i];
121     if (!na && !nb) {
122       cnt++;
123       goto ok1;
124     }
125     aa = aav + ia[i];
126     for (j=0; j<na; j++) {
127       if (aa[j] != 0.0) goto ok1;
128     }
129     bb = bav + ib[i];
130     for (j=0; j <nb; j++) {
131       if (bb[j] != 0.0) goto ok1;
132     }
133     cnt++;
134 ok1:;
135   }
136   PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M)));
137   if (!n0rows) {
138     PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
139     PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
140     PetscFunctionReturn(0);
141   }
142   PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows));
143   cnt  = 0;
144   for (i=0; i<m; i++) {
145     na = ia[i+1] - ia[i];
146     nb = ib[i+1] - ib[i];
147     if (!na && !nb) continue;
148     aa = aav + ia[i];
149     for (j=0; j<na;j++) {
150       if (aa[j] != 0.0) {
151         rows[cnt++] = rstart + i;
152         goto ok2;
153       }
154     }
155     bb = bav + ib[i];
156     for (j=0; j<nb; j++) {
157       if (bb[j] != 0.0) {
158         rows[cnt++] = rstart + i;
159         goto ok2;
160       }
161     }
162 ok2:;
163   }
164   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows));
165   PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
166   PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
167   PetscFunctionReturn(0);
168 }
169 
170 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
171 {
172   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
173   PetscBool         cong;
174 
175   PetscFunctionBegin;
176   PetscCall(MatHasCongruentLayouts(Y,&cong));
177   if (Y->assembled && cong) {
178     PetscCall(MatDiagonalSet(aij->A,D,is));
179   } else {
180     PetscCall(MatDiagonalSet_Default(Y,D,is));
181   }
182   PetscFunctionReturn(0);
183 }
184 
185 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
188   PetscInt       i,rstart,nrows,*rows;
189 
190   PetscFunctionBegin;
191   *zrows = NULL;
192   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows));
193   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
194   for (i=0; i<nrows; i++) rows[i] += rstart;
195   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows));
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
200 {
201   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
202   PetscInt          i,m,n,*garray = aij->garray;
203   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
204   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
205   PetscReal         *work;
206   const PetscScalar *dummy;
207 
208   PetscFunctionBegin;
209   PetscCall(MatGetSize(A,&m,&n));
210   PetscCall(PetscCalloc1(n,&work));
211   PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy));
212   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy));
213   PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy));
214   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy));
215   if (type == NORM_2) {
216     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
217       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
218     }
219     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
220       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
221     }
222   } else if (type == NORM_1) {
223     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
224       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
225     }
226     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
227       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
228     }
229   } else if (type == NORM_INFINITY) {
230     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
231       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
232     }
233     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
234       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
235     }
236   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
237     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
238       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
239     }
240     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
241       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
242     }
243   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
244     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
245       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
246     }
247     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
248       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
249     }
250   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
251   if (type == NORM_INFINITY) {
252     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
253   } else {
254     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
255   }
256   PetscCall(PetscFree(work));
257   if (type == NORM_2) {
258     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
259   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
260     for (i=0; i<n; i++) reductions[i] /= m;
261   }
262   PetscFunctionReturn(0);
263 }
264 
265 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
266 {
267   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
268   IS              sis,gis;
269   const PetscInt  *isis,*igis;
270   PetscInt        n,*iis,nsis,ngis,rstart,i;
271 
272   PetscFunctionBegin;
273   PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis));
274   PetscCall(MatFindNonzeroRows(a->B,&gis));
275   PetscCall(ISGetSize(gis,&ngis));
276   PetscCall(ISGetSize(sis,&nsis));
277   PetscCall(ISGetIndices(sis,&isis));
278   PetscCall(ISGetIndices(gis,&igis));
279 
280   PetscCall(PetscMalloc1(ngis+nsis,&iis));
281   PetscCall(PetscArraycpy(iis,igis,ngis));
282   PetscCall(PetscArraycpy(iis+ngis,isis,nsis));
283   n    = ngis + nsis;
284   PetscCall(PetscSortRemoveDupsInt(&n,iis));
285   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
286   for (i=0; i<n; i++) iis[i] += rstart;
287   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is));
288 
289   PetscCall(ISRestoreIndices(sis,&isis));
290   PetscCall(ISRestoreIndices(gis,&igis));
291   PetscCall(ISDestroy(&sis));
292   PetscCall(ISDestroy(&gis));
293   PetscFunctionReturn(0);
294 }
295 
296 /*
297   Local utility routine that creates a mapping from the global column
298 number to the local number in the off-diagonal part of the local
299 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
300 a slightly higher hash table cost; without it it is not scalable (each processor
301 has an order N integer array but is fast to access.
302 */
303 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscInt       n = aij->B->cmap->n,i;
307 
308   PetscFunctionBegin;
309   PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
310 #if defined(PETSC_USE_CTABLE)
311   PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap));
312   for (i=0; i<n; i++) {
313     PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES));
314   }
315 #else
316   PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap));
317   PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt)));
318   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
319 #endif
320   PetscFunctionReturn(0);
321 }
322 
323 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
324 { \
325     if (col <= lastcol1)  low1 = 0;     \
326     else                 high1 = nrow1; \
327     lastcol1 = col;\
328     while (high1-low1 > 5) { \
329       t = (low1+high1)/2; \
330       if (rp1[t] > col) high1 = t; \
331       else              low1  = t; \
332     } \
333       for (_i=low1; _i<high1; _i++) { \
334         if (rp1[_i] > col) break; \
335         if (rp1[_i] == col) { \
336           if (addv == ADD_VALUES) { \
337             ap1[_i] += value;   \
338             /* Not sure LogFlops will slow dow the code or not */ \
339             (void)PetscLogFlops(1.0);   \
340            } \
341           else                    ap1[_i] = value; \
342           goto a_noinsert; \
343         } \
344       }  \
345       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
346       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
347       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
348       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
349       N = nrow1++ - 1; a->nz++; high1++; \
350       /* shift up all the later entries in this row */ \
351       PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\
352       PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\
353       rp1[_i] = col;  \
354       ap1[_i] = value;  \
355       A->nonzerostate++;\
356       a_noinsert: ; \
357       ailen[row] = nrow1; \
358 }
359 
360 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
361   { \
362     if (col <= lastcol2) low2 = 0;                        \
363     else high2 = nrow2;                                   \
364     lastcol2 = col;                                       \
365     while (high2-low2 > 5) {                              \
366       t = (low2+high2)/2;                                 \
367       if (rp2[t] > col) high2 = t;                        \
368       else             low2  = t;                         \
369     }                                                     \
370     for (_i=low2; _i<high2; _i++) {                       \
371       if (rp2[_i] > col) break;                           \
372       if (rp2[_i] == col) {                               \
373         if (addv == ADD_VALUES) {                         \
374           ap2[_i] += value;                               \
375           (void)PetscLogFlops(1.0);                       \
376         }                                                 \
377         else                    ap2[_i] = value;          \
378         goto b_noinsert;                                  \
379       }                                                   \
380     }                                                     \
381     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
382     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
383     PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
384     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
385     N = nrow2++ - 1; b->nz++; high2++;                    \
386     /* shift up all the later entries in this row */      \
387     PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\
388     PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\
389     rp2[_i] = col;                                        \
390     ap2[_i] = value;                                      \
391     B->nonzerostate++;                                    \
392     b_noinsert: ;                                         \
393     bilen[row] = nrow2;                                   \
394   }
395 
396 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
397 {
398   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
399   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
400   PetscInt       l,*garray = mat->garray,diag;
401   PetscScalar    *aa,*ba;
402 
403   PetscFunctionBegin;
404   /* code only works for square matrices A */
405 
406   /* find size of row to the left of the diagonal part */
407   PetscCall(MatGetOwnershipRange(A,&diag,NULL));
408   row  = row - diag;
409   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
410     if (garray[b->j[b->i[row]+l]] > diag) break;
411   }
412   if (l) {
413     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
414     PetscCall(PetscArraycpy(ba+b->i[row],v,l));
415     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
416   }
417 
418   /* diagonal part */
419   if (a->i[row+1]-a->i[row]) {
420     PetscCall(MatSeqAIJGetArray(mat->A,&aa));
421     PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row])));
422     PetscCall(MatSeqAIJRestoreArray(mat->A,&aa));
423   }
424 
425   /* right of diagonal part */
426   if (b->i[row+1]-b->i[row]-l) {
427     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
428     PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l));
429     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
430   }
431   PetscFunctionReturn(0);
432 }
433 
434 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
435 {
436   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
437   PetscScalar    value = 0.0;
438   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
439   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
440   PetscBool      roworiented = aij->roworiented;
441 
442   /* Some Variables required in the macro */
443   Mat        A                    = aij->A;
444   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
445   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
446   PetscBool  ignorezeroentries    = a->ignorezeroentries;
447   Mat        B                    = aij->B;
448   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
449   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
450   MatScalar  *aa,*ba;
451   PetscInt   *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
452   PetscInt   nonew;
453   MatScalar  *ap1,*ap2;
454 
455   PetscFunctionBegin;
456   PetscCall(MatSeqAIJGetArray(A,&aa));
457   PetscCall(MatSeqAIJGetArray(B,&ba));
458   for (i=0; i<m; i++) {
459     if (im[i] < 0) continue;
460     PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
461     if (im[i] >= rstart && im[i] < rend) {
462       row      = im[i] - rstart;
463       lastcol1 = -1;
464       rp1      = aj + ai[row];
465       ap1      = aa + ai[row];
466       rmax1    = aimax[row];
467       nrow1    = ailen[row];
468       low1     = 0;
469       high1    = nrow1;
470       lastcol2 = -1;
471       rp2      = bj + bi[row];
472       ap2      = ba + bi[row];
473       rmax2    = bimax[row];
474       nrow2    = bilen[row];
475       low2     = 0;
476       high2    = nrow2;
477 
478       for (j=0; j<n; j++) {
479         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
480         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
481         if (in[j] >= cstart && in[j] < cend) {
482           col   = in[j] - cstart;
483           nonew = a->nonew;
484           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
485         } else if (in[j] < 0) {
486           continue;
487         } else {
488           PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
489           if (mat->was_assembled) {
490             if (!aij->colmap) {
491               PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
492             }
493 #if defined(PETSC_USE_CTABLE)
494             PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */
495             col--;
496 #else
497             col = aij->colmap[in[j]] - 1;
498 #endif
499             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
500               PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */
501               col  =  in[j];
502               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
503               B        = aij->B;
504               b        = (Mat_SeqAIJ*)B->data;
505               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
506               rp2      = bj + bi[row];
507               ap2      = ba + bi[row];
508               rmax2    = bimax[row];
509               nrow2    = bilen[row];
510               low2     = 0;
511               high2    = nrow2;
512               bm       = aij->B->rmap->n;
513               ba       = b->a;
514             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
515               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
516                 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j]));
517               } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
518             }
519           } else col = in[j];
520           nonew = b->nonew;
521           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
522         }
523       }
524     } else {
525       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
526       if (!aij->donotstash) {
527         mat->assembled = PETSC_FALSE;
528         if (roworiented) {
529           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
530         } else {
531           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
532         }
533       }
534     }
535   }
536   PetscCall(MatSeqAIJRestoreArray(A,&aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
537   PetscCall(MatSeqAIJRestoreArray(B,&ba));
538   PetscFunctionReturn(0);
539 }
540 
541 /*
542     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
543     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
544     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
545 */
546 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
547 {
548   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
549   Mat            A           = aij->A; /* diagonal part of the matrix */
550   Mat            B           = aij->B; /* offdiagonal part of the matrix */
551   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
552   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
553   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
554   PetscInt       *ailen      = a->ilen,*aj = a->j;
555   PetscInt       *bilen      = b->ilen,*bj = b->j;
556   PetscInt       am          = aij->A->rmap->n,j;
557   PetscInt       diag_so_far = 0,dnz;
558   PetscInt       offd_so_far = 0,onz;
559 
560   PetscFunctionBegin;
561   /* Iterate over all rows of the matrix */
562   for (j=0; j<am; j++) {
563     dnz = onz = 0;
564     /*  Iterate over all non-zero columns of the current row */
565     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
566       /* If column is in the diagonal */
567       if (mat_j[col] >= cstart && mat_j[col] < cend) {
568         aj[diag_so_far++] = mat_j[col] - cstart;
569         dnz++;
570       } else { /* off-diagonal entries */
571         bj[offd_so_far++] = mat_j[col];
572         onz++;
573       }
574     }
575     ailen[j] = dnz;
576     bilen[j] = onz;
577   }
578   PetscFunctionReturn(0);
579 }
580 
581 /*
582     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
583     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
584     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
585     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
586     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
587 */
588 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
589 {
590   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
591   Mat            A      = aij->A; /* diagonal part of the matrix */
592   Mat            B      = aij->B; /* offdiagonal part of the matrix */
593   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
594   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
595   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
596   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
597   PetscInt       *ailen = a->ilen,*aj = a->j;
598   PetscInt       *bilen = b->ilen,*bj = b->j;
599   PetscInt       am     = aij->A->rmap->n,j;
600   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
601   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
602   PetscScalar    *aa = a->a,*ba = b->a;
603 
604   PetscFunctionBegin;
605   /* Iterate over all rows of the matrix */
606   for (j=0; j<am; j++) {
607     dnz_row = onz_row = 0;
608     rowstart_offd = full_offd_i[j];
609     rowstart_diag = full_diag_i[j];
610     /*  Iterate over all non-zero columns of the current row */
611     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
612       /* If column is in the diagonal */
613       if (mat_j[col] >= cstart && mat_j[col] < cend) {
614         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
615         aa[rowstart_diag+dnz_row] = mat_a[col];
616         dnz_row++;
617       } else { /* off-diagonal entries */
618         bj[rowstart_offd+onz_row] = mat_j[col];
619         ba[rowstart_offd+onz_row] = mat_a[col];
620         onz_row++;
621       }
622     }
623     ailen[j] = dnz_row;
624     bilen[j] = onz_row;
625   }
626   PetscFunctionReturn(0);
627 }
628 
629 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
630 {
631   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
632   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
633   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* negative row */
638     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= rstart && idxm[i] < rend) {
640       row = idxm[i] - rstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* negative column */
643         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= cstart && idxn[j] < cend) {
645           col  = idxn[j] - cstart;
646           PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j));
647         } else {
648           if (!aij->colmap) {
649             PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
650           }
651 #if defined(PETSC_USE_CTABLE)
652           PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col));
653           col--;
654 #else
655           col = aij->colmap[idxn[j]] - 1;
656 #endif
657           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
658           else {
659             PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j));
660           }
661         }
662       }
663     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
671   PetscInt       nstash,reallocs;
672 
673   PetscFunctionBegin;
674   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
675 
676   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
677   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
678   PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
679   PetscFunctionReturn(0);
680 }
681 
682 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
683 {
684   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
685   PetscMPIInt    n;
686   PetscInt       i,j,rstart,ncols,flg;
687   PetscInt       *row,*col;
688   PetscBool      other_disassembled;
689   PetscScalar    *val;
690 
691   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
692 
693   PetscFunctionBegin;
694   if (!aij->donotstash && !mat->nooffprocentries) {
695     while (1) {
696       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
697       if (!flg) break;
698 
699       for (i=0; i<n;) {
700         /* Now identify the consecutive vals belonging to the same row */
701         for (j=i,rstart=row[j]; j<n; j++) {
702           if (row[j] != rstart) break;
703         }
704         if (j < n) ncols = j-i;
705         else       ncols = n-i;
706         /* Now assemble all these values with a single function call */
707         PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
708         i    = j;
709       }
710     }
711     PetscCall(MatStashScatterEnd_Private(&mat->stash));
712   }
713 #if defined(PETSC_HAVE_DEVICE)
714   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
715   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
716   if (mat->boundtocpu) {
717     PetscCall(MatBindToCPU(aij->A,PETSC_TRUE));
718     PetscCall(MatBindToCPU(aij->B,PETSC_TRUE));
719   }
720 #endif
721   PetscCall(MatAssemblyBegin(aij->A,mode));
722   PetscCall(MatAssemblyEnd(aij->A,mode));
723 
724   /* determine if any processor has disassembled, if so we must
725      also disassemble ourself, in order that we may reassemble. */
726   /*
727      if nonzero structure of submatrix B cannot change then we know that
728      no processor disassembled thus we can skip this stuff
729   */
730   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
731     PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat)));
732     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
733       PetscCall(MatDisAssemble_MPIAIJ(mat));
734     }
735   }
736   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
737     PetscCall(MatSetUpMultiply_MPIAIJ(mat));
738   }
739   PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE));
740 #if defined(PETSC_HAVE_DEVICE)
741   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
742 #endif
743   PetscCall(MatAssemblyBegin(aij->B,mode));
744   PetscCall(MatAssemblyEnd(aij->B,mode));
745 
746   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
747 
748   aij->rowvalues = NULL;
749 
750   PetscCall(VecDestroy(&aij->diag));
751 
752   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
753   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
754     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
755     PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
756   }
757 #if defined(PETSC_HAVE_DEVICE)
758   mat->offloadmask = PETSC_OFFLOAD_BOTH;
759 #endif
760   PetscFunctionReturn(0);
761 }
762 
763 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
764 {
765   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
766 
767   PetscFunctionBegin;
768   PetscCall(MatZeroEntries(l->A));
769   PetscCall(MatZeroEntries(l->B));
770   PetscFunctionReturn(0);
771 }
772 
773 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
774 {
775   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
776   PetscObjectState sA, sB;
777   PetscInt        *lrows;
778   PetscInt         r, len;
779   PetscBool        cong, lch, gch;
780 
781   PetscFunctionBegin;
782   /* get locally owned rows */
783   PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows));
784   PetscCall(MatHasCongruentLayouts(A,&cong));
785   /* fix right hand side if needed */
786   if (x && b) {
787     const PetscScalar *xx;
788     PetscScalar       *bb;
789 
790     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
791     PetscCall(VecGetArrayRead(x, &xx));
792     PetscCall(VecGetArray(b, &bb));
793     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
794     PetscCall(VecRestoreArrayRead(x, &xx));
795     PetscCall(VecRestoreArray(b, &bb));
796   }
797 
798   sA = mat->A->nonzerostate;
799   sB = mat->B->nonzerostate;
800 
801   if (diag != 0.0 && cong) {
802     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
803     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
804   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
805     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
806     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
807     PetscInt   nnwA, nnwB;
808     PetscBool  nnzA, nnzB;
809 
810     nnwA = aijA->nonew;
811     nnwB = aijB->nonew;
812     nnzA = aijA->keepnonzeropattern;
813     nnzB = aijB->keepnonzeropattern;
814     if (!nnzA) {
815       PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
816       aijA->nonew = 0;
817     }
818     if (!nnzB) {
819       PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
820       aijB->nonew = 0;
821     }
822     /* Must zero here before the next loop */
823     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
824     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
825     for (r = 0; r < len; ++r) {
826       const PetscInt row = lrows[r] + A->rmap->rstart;
827       if (row >= A->cmap->N) continue;
828       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
829     }
830     aijA->nonew = nnwA;
831     aijB->nonew = nnwB;
832   } else {
833     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
834     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
835   }
836   PetscCall(PetscFree(lrows));
837   PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
838   PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
839 
840   /* reduce nonzerostate */
841   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
842   PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A)));
843   if (gch) A->nonzerostate++;
844   PetscFunctionReturn(0);
845 }
846 
847 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
848 {
849   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
850   PetscMPIInt       n = A->rmap->n;
851   PetscInt          i,j,r,m,len = 0;
852   PetscInt          *lrows,*owners = A->rmap->range;
853   PetscMPIInt       p = 0;
854   PetscSFNode       *rrows;
855   PetscSF           sf;
856   const PetscScalar *xx;
857   PetscScalar       *bb,*mask,*aij_a;
858   Vec               xmask,lmask;
859   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
860   const PetscInt    *aj, *ii,*ridx;
861   PetscScalar       *aa;
862 
863   PetscFunctionBegin;
864   /* Create SF where leaves are input rows and roots are owned rows */
865   PetscCall(PetscMalloc1(n, &lrows));
866   for (r = 0; r < n; ++r) lrows[r] = -1;
867   PetscCall(PetscMalloc1(N, &rrows));
868   for (r = 0; r < N; ++r) {
869     const PetscInt idx   = rows[r];
870     PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
871     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
872       PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p));
873     }
874     rrows[r].rank  = p;
875     rrows[r].index = rows[r] - owners[p];
876   }
877   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf));
878   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
879   /* Collect flags for rows to be zeroed */
880   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
881   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
882   PetscCall(PetscSFDestroy(&sf));
883   /* Compress and put in row numbers */
884   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
885   /* zero diagonal part of matrix */
886   PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b));
887   /* handle off diagonal part of matrix */
888   PetscCall(MatCreateVecs(A,&xmask,NULL));
889   PetscCall(VecDuplicate(l->lvec,&lmask));
890   PetscCall(VecGetArray(xmask,&bb));
891   for (i=0; i<len; i++) bb[lrows[i]] = 1;
892   PetscCall(VecRestoreArray(xmask,&bb));
893   PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
894   PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
895   PetscCall(VecDestroy(&xmask));
896   if (x && b) { /* this code is buggy when the row and column layout don't match */
897     PetscBool cong;
898 
899     PetscCall(MatHasCongruentLayouts(A,&cong));
900     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
901     PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
902     PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
903     PetscCall(VecGetArrayRead(l->lvec,&xx));
904     PetscCall(VecGetArray(b,&bb));
905   }
906   PetscCall(VecGetArray(lmask,&mask));
907   /* remove zeroed rows of off diagonal matrix */
908   PetscCall(MatSeqAIJGetArray(l->B,&aij_a));
909   ii = aij->i;
910   for (i=0; i<len; i++) {
911     PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]));
912   }
913   /* loop over all elements of off process part of matrix zeroing removed columns*/
914   if (aij->compressedrow.use) {
915     m    = aij->compressedrow.nrows;
916     ii   = aij->compressedrow.i;
917     ridx = aij->compressedrow.rindex;
918     for (i=0; i<m; i++) {
919       n  = ii[i+1] - ii[i];
920       aj = aij->j + ii[i];
921       aa = aij_a + ii[i];
922 
923       for (j=0; j<n; j++) {
924         if (PetscAbsScalar(mask[*aj])) {
925           if (b) bb[*ridx] -= *aa*xx[*aj];
926           *aa = 0.0;
927         }
928         aa++;
929         aj++;
930       }
931       ridx++;
932     }
933   } else { /* do not use compressed row format */
934     m = l->B->rmap->n;
935     for (i=0; i<m; i++) {
936       n  = ii[i+1] - ii[i];
937       aj = aij->j + ii[i];
938       aa = aij_a + ii[i];
939       for (j=0; j<n; j++) {
940         if (PetscAbsScalar(mask[*aj])) {
941           if (b) bb[i] -= *aa*xx[*aj];
942           *aa = 0.0;
943         }
944         aa++;
945         aj++;
946       }
947     }
948   }
949   if (x && b) {
950     PetscCall(VecRestoreArray(b,&bb));
951     PetscCall(VecRestoreArrayRead(l->lvec,&xx));
952   }
953   PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a));
954   PetscCall(VecRestoreArray(lmask,&mask));
955   PetscCall(VecDestroy(&lmask));
956   PetscCall(PetscFree(lrows));
957 
958   /* only change matrix nonzero state if pattern was allowed to be changed */
959   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
960     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
961     PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A)));
962   }
963   PetscFunctionReturn(0);
964 }
965 
966 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
967 {
968   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
969   PetscInt       nt;
970   VecScatter     Mvctx = a->Mvctx;
971 
972   PetscFunctionBegin;
973   PetscCall(VecGetLocalSize(xx,&nt));
974   PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt);
975   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
976   PetscCall((*a->A->ops->mult)(a->A,xx,yy));
977   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
978   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy));
979   PetscFunctionReturn(0);
980 }
981 
982 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
983 {
984   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
985 
986   PetscFunctionBegin;
987   PetscCall(MatMultDiagonalBlock(a->A,bb,xx));
988   PetscFunctionReturn(0);
989 }
990 
991 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
992 {
993   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
994   VecScatter     Mvctx = a->Mvctx;
995 
996   PetscFunctionBegin;
997   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
998   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz));
999   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
1000   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz));
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1005 {
1006   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1007 
1008   PetscFunctionBegin;
1009   /* do nondiagonal part */
1010   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1011   /* do local part */
1012   PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy));
1013   /* add partial results together */
1014   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1015   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1020 {
1021   MPI_Comm       comm;
1022   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1023   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1024   IS             Me,Notme;
1025   PetscInt       M,N,first,last,*notme,i;
1026   PetscBool      lf;
1027   PetscMPIInt    size;
1028 
1029   PetscFunctionBegin;
1030   /* Easy test: symmetric diagonal block */
1031   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1032   PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf));
1033   PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat)));
1034   if (!*f) PetscFunctionReturn(0);
1035   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
1036   PetscCallMPI(MPI_Comm_size(comm,&size));
1037   if (size == 1) PetscFunctionReturn(0);
1038 
1039   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1040   PetscCall(MatGetSize(Amat,&M,&N));
1041   PetscCall(MatGetOwnershipRange(Amat,&first,&last));
1042   PetscCall(PetscMalloc1(N-last+first,&notme));
1043   for (i=0; i<first; i++) notme[i] = i;
1044   for (i=last; i<M; i++) notme[i-last+first] = i;
1045   PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme));
1046   PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me));
1047   PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs));
1048   Aoff = Aoffs[0];
1049   PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs));
1050   Boff = Boffs[0];
1051   PetscCall(MatIsTranspose(Aoff,Boff,tol,f));
1052   PetscCall(MatDestroyMatrices(1,&Aoffs));
1053   PetscCall(MatDestroyMatrices(1,&Boffs));
1054   PetscCall(ISDestroy(&Me));
1055   PetscCall(ISDestroy(&Notme));
1056   PetscCall(PetscFree(notme));
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1061 {
1062   PetscFunctionBegin;
1063   PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f));
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1068 {
1069   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1070 
1071   PetscFunctionBegin;
1072   /* do nondiagonal part */
1073   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1074   /* do local part */
1075   PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz));
1076   /* add partial results together */
1077   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1078   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 /*
1083   This only works correctly for square matrices where the subblock A->A is the
1084    diagonal block
1085 */
1086 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1087 {
1088   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1089 
1090   PetscFunctionBegin;
1091   PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1092   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1093   PetscCall(MatGetDiagonal(a->A,v));
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1098 {
1099   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1100 
1101   PetscFunctionBegin;
1102   PetscCall(MatScale(a->A,aa));
1103   PetscCall(MatScale(a->B,aa));
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1108 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1109 {
1110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1111 
1112   PetscFunctionBegin;
1113   PetscCall(PetscSFDestroy(&aij->coo_sf));
1114   PetscCall(PetscFree(aij->Aperm1));
1115   PetscCall(PetscFree(aij->Bperm1));
1116   PetscCall(PetscFree(aij->Ajmap1));
1117   PetscCall(PetscFree(aij->Bjmap1));
1118 
1119   PetscCall(PetscFree(aij->Aimap2));
1120   PetscCall(PetscFree(aij->Bimap2));
1121   PetscCall(PetscFree(aij->Aperm2));
1122   PetscCall(PetscFree(aij->Bperm2));
1123   PetscCall(PetscFree(aij->Ajmap2));
1124   PetscCall(PetscFree(aij->Bjmap2));
1125 
1126   PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf));
1127   PetscCall(PetscFree(aij->Cperm1));
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1132 {
1133   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1134 
1135   PetscFunctionBegin;
1136 #if defined(PETSC_USE_LOG)
1137   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1138 #endif
1139   PetscCall(MatStashDestroy_Private(&mat->stash));
1140   PetscCall(VecDestroy(&aij->diag));
1141   PetscCall(MatDestroy(&aij->A));
1142   PetscCall(MatDestroy(&aij->B));
1143 #if defined(PETSC_USE_CTABLE)
1144   PetscCall(PetscTableDestroy(&aij->colmap));
1145 #else
1146   PetscCall(PetscFree(aij->colmap));
1147 #endif
1148   PetscCall(PetscFree(aij->garray));
1149   PetscCall(VecDestroy(&aij->lvec));
1150   PetscCall(VecScatterDestroy(&aij->Mvctx));
1151   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
1152   PetscCall(PetscFree(aij->ld));
1153 
1154   /* Free COO */
1155   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
1156 
1157   PetscCall(PetscFree(mat->data));
1158 
1159   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1160   PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL));
1161 
1162   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
1163   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
1164   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
1165   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL));
1166   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL));
1167   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL));
1168   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL));
1169   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL));
1170   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL));
1171   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL));
1172 #if defined(PETSC_HAVE_CUDA)
1173   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL));
1174 #endif
1175 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1176   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL));
1177 #endif
1178   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
1179 #if defined(PETSC_HAVE_ELEMENTAL)
1180   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL));
1181 #endif
1182 #if defined(PETSC_HAVE_SCALAPACK)
1183   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL));
1184 #endif
1185 #if defined(PETSC_HAVE_HYPRE)
1186   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL));
1187   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL));
1188 #endif
1189   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1190   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL));
1191   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL));
1192   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL));
1193   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL));
1194   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL));
1195 #if defined(PETSC_HAVE_MKL_SPARSE)
1196   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL));
1197 #endif
1198   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL));
1199   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1200   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL));
1201   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL));
1202   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL));
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1207 {
1208   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1209   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1210   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1211   const PetscInt    *garray = aij->garray;
1212   const PetscScalar *aa,*ba;
1213   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1214   PetscInt          *rowlens;
1215   PetscInt          *colidxs;
1216   PetscScalar       *matvals;
1217 
1218   PetscFunctionBegin;
1219   PetscCall(PetscViewerSetUp(viewer));
1220 
1221   M  = mat->rmap->N;
1222   N  = mat->cmap->N;
1223   m  = mat->rmap->n;
1224   rs = mat->rmap->rstart;
1225   cs = mat->cmap->rstart;
1226   nz = A->nz + B->nz;
1227 
1228   /* write matrix header */
1229   header[0] = MAT_FILE_CLASSID;
1230   header[1] = M; header[2] = N; header[3] = nz;
1231   PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat)));
1232   PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT));
1233 
1234   /* fill in and store row lengths  */
1235   PetscCall(PetscMalloc1(m,&rowlens));
1236   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1237   PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT));
1238   PetscCall(PetscFree(rowlens));
1239 
1240   /* fill in and store column indices */
1241   PetscCall(PetscMalloc1(nz,&colidxs));
1242   for (cnt=0, i=0; i<m; i++) {
1243     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1244       if (garray[B->j[jb]] > cs) break;
1245       colidxs[cnt++] = garray[B->j[jb]];
1246     }
1247     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1248       colidxs[cnt++] = A->j[ja] + cs;
1249     for (; jb<B->i[i+1]; jb++)
1250       colidxs[cnt++] = garray[B->j[jb]];
1251   }
1252   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1253   PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
1254   PetscCall(PetscFree(colidxs));
1255 
1256   /* fill in and store nonzero values */
1257   PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa));
1258   PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba));
1259   PetscCall(PetscMalloc1(nz,&matvals));
1260   for (cnt=0, i=0; i<m; i++) {
1261     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1262       if (garray[B->j[jb]] > cs) break;
1263       matvals[cnt++] = ba[jb];
1264     }
1265     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1266       matvals[cnt++] = aa[ja];
1267     for (; jb<B->i[i+1]; jb++)
1268       matvals[cnt++] = ba[jb];
1269   }
1270   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa));
1271   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba));
1272   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1273   PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
1274   PetscCall(PetscFree(matvals));
1275 
1276   /* write block size option to the viewer's .info file */
1277   PetscCall(MatView_Binary_BlockSizes(mat,viewer));
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 #include <petscdraw.h>
1282 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1283 {
1284   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1285   PetscMPIInt       rank = aij->rank,size = aij->size;
1286   PetscBool         isdraw,iascii,isbinary;
1287   PetscViewer       sviewer;
1288   PetscViewerFormat format;
1289 
1290   PetscFunctionBegin;
1291   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1292   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1293   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1294   if (iascii) {
1295     PetscCall(PetscViewerGetFormat(viewer,&format));
1296     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1297       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1298       PetscCall(PetscMalloc1(size,&nz));
1299       PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat)));
1300       for (i=0; i<(PetscInt)size; i++) {
1301         nmax = PetscMax(nmax,nz[i]);
1302         nmin = PetscMin(nmin,nz[i]);
1303         navg += nz[i];
1304       }
1305       PetscCall(PetscFree(nz));
1306       navg = navg/size;
1307       PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n",nmin,navg,nmax));
1308       PetscFunctionReturn(0);
1309     }
1310     PetscCall(PetscViewerGetFormat(viewer,&format));
1311     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1312       MatInfo   info;
1313       PetscInt *inodes=NULL;
1314 
1315       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
1316       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
1317       PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL));
1318       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1319       if (!inodes) {
1320         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n",
1321                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1322       } else {
1323         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n",
1324                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1325       }
1326       PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info));
1327       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1328       PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info));
1329       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1330       PetscCall(PetscViewerFlush(viewer));
1331       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1332       PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
1333       PetscCall(VecScatterView(aij->Mvctx,viewer));
1334       PetscFunctionReturn(0);
1335     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1336       PetscInt inodecount,inodelimit,*inodes;
1337       PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit));
1338       if (inodes) {
1339         PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit));
1340       } else {
1341         PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n"));
1342       }
1343       PetscFunctionReturn(0);
1344     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1345       PetscFunctionReturn(0);
1346     }
1347   } else if (isbinary) {
1348     if (size == 1) {
1349       PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1350       PetscCall(MatView(aij->A,viewer));
1351     } else {
1352       PetscCall(MatView_MPIAIJ_Binary(mat,viewer));
1353     }
1354     PetscFunctionReturn(0);
1355   } else if (iascii && size == 1) {
1356     PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1357     PetscCall(MatView(aij->A,viewer));
1358     PetscFunctionReturn(0);
1359   } else if (isdraw) {
1360     PetscDraw draw;
1361     PetscBool isnull;
1362     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
1363     PetscCall(PetscDrawIsNull(draw,&isnull));
1364     if (isnull) PetscFunctionReturn(0);
1365   }
1366 
1367   { /* assemble the entire matrix onto first processor */
1368     Mat A = NULL, Av;
1369     IS  isrow,iscol;
1370 
1371     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1372     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1373     PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A));
1374     PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL));
1375 /*  The commented code uses MatCreateSubMatrices instead */
1376 /*
1377     Mat *AA, A = NULL, Av;
1378     IS  isrow,iscol;
1379 
1380     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1381     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1382     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1383     if (rank == 0) {
1384        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1385        A    = AA[0];
1386        Av   = AA[0];
1387     }
1388     PetscCall(MatDestroySubMatrices(1,&AA));
1389 */
1390     PetscCall(ISDestroy(&iscol));
1391     PetscCall(ISDestroy(&isrow));
1392     /*
1393        Everyone has to call to draw the matrix since the graphics waits are
1394        synchronized across all processors that share the PetscDraw object
1395     */
1396     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1397     if (rank == 0) {
1398       if (((PetscObject)mat)->name) {
1399         PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name));
1400       }
1401       PetscCall(MatView_SeqAIJ(Av,sviewer));
1402     }
1403     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1404     PetscCall(PetscViewerFlush(viewer));
1405     PetscCall(MatDestroy(&A));
1406   }
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1411 {
1412   PetscBool      iascii,isdraw,issocket,isbinary;
1413 
1414   PetscFunctionBegin;
1415   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1416   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1417   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1418   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
1419   if (iascii || isdraw || isbinary || issocket) {
1420     PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer));
1421   }
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1426 {
1427   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1428   Vec            bb1 = NULL;
1429   PetscBool      hasop;
1430 
1431   PetscFunctionBegin;
1432   if (flag == SOR_APPLY_UPPER) {
1433     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1434     PetscFunctionReturn(0);
1435   }
1436 
1437   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1438     PetscCall(VecDuplicate(bb,&bb1));
1439   }
1440 
1441   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1442     if (flag & SOR_ZERO_INITIAL_GUESS) {
1443       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1444       its--;
1445     }
1446 
1447     while (its--) {
1448       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1449       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1450 
1451       /* update rhs: bb1 = bb - B*x */
1452       PetscCall(VecScale(mat->lvec,-1.0));
1453       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1454 
1455       /* local sweep */
1456       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx));
1457     }
1458   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1459     if (flag & SOR_ZERO_INITIAL_GUESS) {
1460       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1461       its--;
1462     }
1463     while (its--) {
1464       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1465       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1466 
1467       /* update rhs: bb1 = bb - B*x */
1468       PetscCall(VecScale(mat->lvec,-1.0));
1469       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1470 
1471       /* local sweep */
1472       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx));
1473     }
1474   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1475     if (flag & SOR_ZERO_INITIAL_GUESS) {
1476       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1477       its--;
1478     }
1479     while (its--) {
1480       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1481       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1482 
1483       /* update rhs: bb1 = bb - B*x */
1484       PetscCall(VecScale(mat->lvec,-1.0));
1485       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1486 
1487       /* local sweep */
1488       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx));
1489     }
1490   } else if (flag & SOR_EISENSTAT) {
1491     Vec xx1;
1492 
1493     PetscCall(VecDuplicate(bb,&xx1));
1494     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx));
1495 
1496     PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1497     PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1498     if (!mat->diag) {
1499       PetscCall(MatCreateVecs(matin,&mat->diag,NULL));
1500       PetscCall(MatGetDiagonal(matin,mat->diag));
1501     }
1502     PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop));
1503     if (hasop) {
1504       PetscCall(MatMultDiagonalBlock(matin,xx,bb1));
1505     } else {
1506       PetscCall(VecPointwiseMult(bb1,mat->diag,xx));
1507     }
1508     PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb));
1509 
1510     PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1));
1511 
1512     /* local sweep */
1513     PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1));
1514     PetscCall(VecAXPY(xx,1.0,xx1));
1515     PetscCall(VecDestroy(&xx1));
1516   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1517 
1518   PetscCall(VecDestroy(&bb1));
1519 
1520   matin->factorerrortype = mat->A->factorerrortype;
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1525 {
1526   Mat            aA,aB,Aperm;
1527   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1528   PetscScalar    *aa,*ba;
1529   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1530   PetscSF        rowsf,sf;
1531   IS             parcolp = NULL;
1532   PetscBool      done;
1533 
1534   PetscFunctionBegin;
1535   PetscCall(MatGetLocalSize(A,&m,&n));
1536   PetscCall(ISGetIndices(rowp,&rwant));
1537   PetscCall(ISGetIndices(colp,&cwant));
1538   PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest));
1539 
1540   /* Invert row permutation to find out where my rows should go */
1541   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf));
1542   PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant));
1543   PetscCall(PetscSFSetFromOptions(rowsf));
1544   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1545   PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1546   PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1547 
1548   /* Invert column permutation to find out where my columns should go */
1549   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1550   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant));
1551   PetscCall(PetscSFSetFromOptions(sf));
1552   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1553   PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1554   PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1555   PetscCall(PetscSFDestroy(&sf));
1556 
1557   PetscCall(ISRestoreIndices(rowp,&rwant));
1558   PetscCall(ISRestoreIndices(colp,&cwant));
1559   PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols));
1560 
1561   /* Find out where my gcols should go */
1562   PetscCall(MatGetSize(aB,NULL,&ng));
1563   PetscCall(PetscMalloc1(ng,&gcdest));
1564   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1565   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols));
1566   PetscCall(PetscSFSetFromOptions(sf));
1567   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1568   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1569   PetscCall(PetscSFDestroy(&sf));
1570 
1571   PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz));
1572   PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1573   PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1574   for (i=0; i<m; i++) {
1575     PetscInt    row = rdest[i];
1576     PetscMPIInt rowner;
1577     PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner));
1578     for (j=ai[i]; j<ai[i+1]; j++) {
1579       PetscInt    col = cdest[aj[j]];
1580       PetscMPIInt cowner;
1581       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */
1582       if (rowner == cowner) dnnz[i]++;
1583       else onnz[i]++;
1584     }
1585     for (j=bi[i]; j<bi[i+1]; j++) {
1586       PetscInt    col = gcdest[bj[j]];
1587       PetscMPIInt cowner;
1588       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner));
1589       if (rowner == cowner) dnnz[i]++;
1590       else onnz[i]++;
1591     }
1592   }
1593   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1594   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1595   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1596   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1597   PetscCall(PetscSFDestroy(&rowsf));
1598 
1599   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm));
1600   PetscCall(MatSeqAIJGetArray(aA,&aa));
1601   PetscCall(MatSeqAIJGetArray(aB,&ba));
1602   for (i=0; i<m; i++) {
1603     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1604     PetscInt j0,rowlen;
1605     rowlen = ai[i+1] - ai[i];
1606     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1607       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1608       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES));
1609     }
1610     rowlen = bi[i+1] - bi[i];
1611     for (j0=j=0; j<rowlen; j0=j) {
1612       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1613       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES));
1614     }
1615   }
1616   PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY));
1617   PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY));
1618   PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1619   PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1620   PetscCall(MatSeqAIJRestoreArray(aA,&aa));
1621   PetscCall(MatSeqAIJRestoreArray(aB,&ba));
1622   PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz));
1623   PetscCall(PetscFree3(work,rdest,cdest));
1624   PetscCall(PetscFree(gcdest));
1625   if (parcolp) PetscCall(ISDestroy(&colp));
1626   *B = Aperm;
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1631 {
1632   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1633 
1634   PetscFunctionBegin;
1635   PetscCall(MatGetSize(aij->B,NULL,nghosts));
1636   if (ghosts) *ghosts = aij->garray;
1637   PetscFunctionReturn(0);
1638 }
1639 
1640 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1641 {
1642   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1643   Mat            A    = mat->A,B = mat->B;
1644   PetscLogDouble isend[5],irecv[5];
1645 
1646   PetscFunctionBegin;
1647   info->block_size = 1.0;
1648   PetscCall(MatGetInfo(A,MAT_LOCAL,info));
1649 
1650   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1651   isend[3] = info->memory;  isend[4] = info->mallocs;
1652 
1653   PetscCall(MatGetInfo(B,MAT_LOCAL,info));
1654 
1655   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1656   isend[3] += info->memory;  isend[4] += info->mallocs;
1657   if (flag == MAT_LOCAL) {
1658     info->nz_used      = isend[0];
1659     info->nz_allocated = isend[1];
1660     info->nz_unneeded  = isend[2];
1661     info->memory       = isend[3];
1662     info->mallocs      = isend[4];
1663   } else if (flag == MAT_GLOBAL_MAX) {
1664     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
1665 
1666     info->nz_used      = irecv[0];
1667     info->nz_allocated = irecv[1];
1668     info->nz_unneeded  = irecv[2];
1669     info->memory       = irecv[3];
1670     info->mallocs      = irecv[4];
1671   } else if (flag == MAT_GLOBAL_SUM) {
1672     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
1673 
1674     info->nz_used      = irecv[0];
1675     info->nz_allocated = irecv[1];
1676     info->nz_unneeded  = irecv[2];
1677     info->memory       = irecv[3];
1678     info->mallocs      = irecv[4];
1679   }
1680   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1681   info->fill_ratio_needed = 0;
1682   info->factor_mallocs    = 0;
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1687 {
1688   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1689 
1690   PetscFunctionBegin;
1691   switch (op) {
1692   case MAT_NEW_NONZERO_LOCATIONS:
1693   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1694   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1695   case MAT_KEEP_NONZERO_PATTERN:
1696   case MAT_NEW_NONZERO_LOCATION_ERR:
1697   case MAT_USE_INODES:
1698   case MAT_IGNORE_ZERO_ENTRIES:
1699   case MAT_FORM_EXPLICIT_TRANSPOSE:
1700     MatCheckPreallocated(A,1);
1701     PetscCall(MatSetOption(a->A,op,flg));
1702     PetscCall(MatSetOption(a->B,op,flg));
1703     break;
1704   case MAT_ROW_ORIENTED:
1705     MatCheckPreallocated(A,1);
1706     a->roworiented = flg;
1707 
1708     PetscCall(MatSetOption(a->A,op,flg));
1709     PetscCall(MatSetOption(a->B,op,flg));
1710     break;
1711   case MAT_FORCE_DIAGONAL_ENTRIES:
1712   case MAT_SORTED_FULL:
1713     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1714     break;
1715   case MAT_IGNORE_OFF_PROC_ENTRIES:
1716     a->donotstash = flg;
1717     break;
1718   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1719   case MAT_SPD:
1720   case MAT_SYMMETRIC:
1721   case MAT_STRUCTURALLY_SYMMETRIC:
1722   case MAT_HERMITIAN:
1723   case MAT_SYMMETRY_ETERNAL:
1724     break;
1725   case MAT_SUBMAT_SINGLEIS:
1726     A->submat_singleis = flg;
1727     break;
1728   case MAT_STRUCTURE_ONLY:
1729     /* The option is handled directly by MatSetOption() */
1730     break;
1731   default:
1732     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1733   }
1734   PetscFunctionReturn(0);
1735 }
1736 
1737 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1738 {
1739   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1740   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1741   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1742   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1743   PetscInt       *cmap,*idx_p;
1744 
1745   PetscFunctionBegin;
1746   PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1747   mat->getrowactive = PETSC_TRUE;
1748 
1749   if (!mat->rowvalues && (idx || v)) {
1750     /*
1751         allocate enough space to hold information from the longest row.
1752     */
1753     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1754     PetscInt   max = 1,tmp;
1755     for (i=0; i<matin->rmap->n; i++) {
1756       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1757       if (max < tmp) max = tmp;
1758     }
1759     PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices));
1760   }
1761 
1762   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1763   lrow = row - rstart;
1764 
1765   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1766   if (!v)   {pvA = NULL; pvB = NULL;}
1767   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1768   PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
1769   PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1770   nztot = nzA + nzB;
1771 
1772   cmap = mat->garray;
1773   if (v  || idx) {
1774     if (nztot) {
1775       /* Sort by increasing column numbers, assuming A and B already sorted */
1776       PetscInt imark = -1;
1777       if (v) {
1778         *v = v_p = mat->rowvalues;
1779         for (i=0; i<nzB; i++) {
1780           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1781           else break;
1782         }
1783         imark = i;
1784         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1785         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1786       }
1787       if (idx) {
1788         *idx = idx_p = mat->rowindices;
1789         if (imark > -1) {
1790           for (i=0; i<imark; i++) {
1791             idx_p[i] = cmap[cworkB[i]];
1792           }
1793         } else {
1794           for (i=0; i<nzB; i++) {
1795             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1796             else break;
1797           }
1798           imark = i;
1799         }
1800         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1801         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1802       }
1803     } else {
1804       if (idx) *idx = NULL;
1805       if (v)   *v   = NULL;
1806     }
1807   }
1808   *nz  = nztot;
1809   PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
1810   PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1815 {
1816   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1817 
1818   PetscFunctionBegin;
1819   PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1820   aij->getrowactive = PETSC_FALSE;
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1825 {
1826   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ*)mat->data;
1827   Mat_SeqAIJ      *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1828   PetscInt        i,j,cstart = mat->cmap->rstart;
1829   PetscReal       sum = 0.0;
1830   const MatScalar *v,*amata,*bmata;
1831 
1832   PetscFunctionBegin;
1833   if (aij->size == 1) {
1834     PetscCall(MatNorm(aij->A,type,norm));
1835   } else {
1836     PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata));
1837     PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata));
1838     if (type == NORM_FROBENIUS) {
1839       v = amata;
1840       for (i=0; i<amat->nz; i++) {
1841         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1842       }
1843       v = bmata;
1844       for (i=0; i<bmat->nz; i++) {
1845         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1846       }
1847       PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1848       *norm = PetscSqrtReal(*norm);
1849       PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz));
1850     } else if (type == NORM_1) { /* max column norm */
1851       PetscReal *tmp,*tmp2;
1852       PetscInt  *jj,*garray = aij->garray;
1853       PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp));
1854       PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2));
1855       *norm = 0.0;
1856       v     = amata; jj = amat->j;
1857       for (j=0; j<amat->nz; j++) {
1858         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1859       }
1860       v = bmata; jj = bmat->j;
1861       for (j=0; j<bmat->nz; j++) {
1862         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1863       }
1864       PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1865       for (j=0; j<mat->cmap->N; j++) {
1866         if (tmp2[j] > *norm) *norm = tmp2[j];
1867       }
1868       PetscCall(PetscFree(tmp));
1869       PetscCall(PetscFree(tmp2));
1870       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1871     } else if (type == NORM_INFINITY) { /* max row norm */
1872       PetscReal ntemp = 0.0;
1873       for (j=0; j<aij->A->rmap->n; j++) {
1874         v   = amata + amat->i[j];
1875         sum = 0.0;
1876         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1877           sum += PetscAbsScalar(*v); v++;
1878         }
1879         v = bmata + bmat->i[j];
1880         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1881           sum += PetscAbsScalar(*v); v++;
1882         }
1883         if (sum > ntemp) ntemp = sum;
1884       }
1885       PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat)));
1886       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1887     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1888     PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata));
1889     PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata));
1890   }
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1895 {
1896   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1897   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1898   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1899   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1900   Mat             B,A_diag,*B_diag;
1901   const MatScalar *pbv,*bv;
1902 
1903   PetscFunctionBegin;
1904   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1905   ai = Aloc->i; aj = Aloc->j;
1906   bi = Bloc->i; bj = Bloc->j;
1907   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1908     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1909     PetscSFNode          *oloc;
1910     PETSC_UNUSED PetscSF sf;
1911 
1912     PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc));
1913     /* compute d_nnz for preallocation */
1914     PetscCall(PetscArrayzero(d_nnz,na));
1915     for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++;
1916     /* compute local off-diagonal contributions */
1917     PetscCall(PetscArrayzero(g_nnz,nb));
1918     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1919     /* map those to global */
1920     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1921     PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray));
1922     PetscCall(PetscSFSetFromOptions(sf));
1923     PetscCall(PetscArrayzero(o_nnz,na));
1924     PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1925     PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1926     PetscCall(PetscSFDestroy(&sf));
1927 
1928     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
1929     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
1930     PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs)));
1931     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
1932     PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
1933     PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc));
1934   } else {
1935     B    = *matout;
1936     PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
1937   }
1938 
1939   b           = (Mat_MPIAIJ*)B->data;
1940   A_diag      = a->A;
1941   B_diag      = &b->A;
1942   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1943   A_diag_ncol = A_diag->cmap->N;
1944   B_diag_ilen = sub_B_diag->ilen;
1945   B_diag_i    = sub_B_diag->i;
1946 
1947   /* Set ilen for diagonal of B */
1948   for (i=0; i<A_diag_ncol; i++) {
1949     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1950   }
1951 
1952   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1953   very quickly (=without using MatSetValues), because all writes are local. */
1954   PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag));
1955 
1956   /* copy over the B part */
1957   PetscCall(PetscMalloc1(bi[mb],&cols));
1958   PetscCall(MatSeqAIJGetArrayRead(a->B,&bv));
1959   pbv  = bv;
1960   row  = A->rmap->rstart;
1961   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1962   cols_tmp = cols;
1963   for (i=0; i<mb; i++) {
1964     ncol = bi[i+1]-bi[i];
1965     PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES));
1966     row++;
1967     pbv += ncol; cols_tmp += ncol;
1968   }
1969   PetscCall(PetscFree(cols));
1970   PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv));
1971 
1972   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
1973   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
1974   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1975     *matout = B;
1976   } else {
1977     PetscCall(MatHeaderMerge(A,&B));
1978   }
1979   PetscFunctionReturn(0);
1980 }
1981 
1982 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1983 {
1984   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1985   Mat            a    = aij->A,b = aij->B;
1986   PetscInt       s1,s2,s3;
1987 
1988   PetscFunctionBegin;
1989   PetscCall(MatGetLocalSize(mat,&s2,&s3));
1990   if (rr) {
1991     PetscCall(VecGetLocalSize(rr,&s1));
1992     PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1993     /* Overlap communication with computation. */
1994     PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1995   }
1996   if (ll) {
1997     PetscCall(VecGetLocalSize(ll,&s1));
1998     PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1999     PetscCall((*b->ops->diagonalscale)(b,ll,NULL));
2000   }
2001   /* scale  the diagonal block */
2002   PetscCall((*a->ops->diagonalscale)(a,ll,rr));
2003 
2004   if (rr) {
2005     /* Do a scatter end and then right scale the off-diagonal block */
2006     PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
2007     PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec));
2008   }
2009   PetscFunctionReturn(0);
2010 }
2011 
2012 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2013 {
2014   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2015 
2016   PetscFunctionBegin;
2017   PetscCall(MatSetUnfactored(a->A));
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2022 {
2023   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2024   Mat            a,b,c,d;
2025   PetscBool      flg;
2026 
2027   PetscFunctionBegin;
2028   a = matA->A; b = matA->B;
2029   c = matB->A; d = matB->B;
2030 
2031   PetscCall(MatEqual(a,c,&flg));
2032   if (flg) {
2033     PetscCall(MatEqual(b,d,&flg));
2034   }
2035   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2036   PetscFunctionReturn(0);
2037 }
2038 
2039 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2040 {
2041   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2042   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2043 
2044   PetscFunctionBegin;
2045   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2046   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2047     /* because of the column compression in the off-processor part of the matrix a->B,
2048        the number of columns in a->B and b->B may be different, hence we cannot call
2049        the MatCopy() directly on the two parts. If need be, we can provide a more
2050        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2051        then copying the submatrices */
2052     PetscCall(MatCopy_Basic(A,B,str));
2053   } else {
2054     PetscCall(MatCopy(a->A,b->A,str));
2055     PetscCall(MatCopy(a->B,b->B,str));
2056   }
2057   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2058   PetscFunctionReturn(0);
2059 }
2060 
2061 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2062 {
2063   PetscFunctionBegin;
2064   PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 /*
2069    Computes the number of nonzeros per row needed for preallocation when X and Y
2070    have different nonzero structure.
2071 */
2072 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2073 {
2074   PetscInt       i,j,k,nzx,nzy;
2075 
2076   PetscFunctionBegin;
2077   /* Set the number of nonzeros in the new matrix */
2078   for (i=0; i<m; i++) {
2079     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2080     nzx = xi[i+1] - xi[i];
2081     nzy = yi[i+1] - yi[i];
2082     nnz[i] = 0;
2083     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2084       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2085       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2086       nnz[i]++;
2087     }
2088     for (; k<nzy; k++) nnz[i]++;
2089   }
2090   PetscFunctionReturn(0);
2091 }
2092 
2093 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2094 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2095 {
2096   PetscInt       m = Y->rmap->N;
2097   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2098   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2099 
2100   PetscFunctionBegin;
2101   PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz));
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2106 {
2107   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2108 
2109   PetscFunctionBegin;
2110   if (str == SAME_NONZERO_PATTERN) {
2111     PetscCall(MatAXPY(yy->A,a,xx->A,str));
2112     PetscCall(MatAXPY(yy->B,a,xx->B,str));
2113   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2114     PetscCall(MatAXPY_Basic(Y,a,X,str));
2115   } else {
2116     Mat      B;
2117     PetscInt *nnz_d,*nnz_o;
2118 
2119     PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d));
2120     PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o));
2121     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
2122     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
2123     PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap));
2124     PetscCall(MatSetType(B,((PetscObject)Y)->type_name));
2125     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d));
2126     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
2127     PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o));
2128     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
2129     PetscCall(MatHeaderMerge(Y,&B));
2130     PetscCall(PetscFree(nnz_d));
2131     PetscCall(PetscFree(nnz_o));
2132   }
2133   PetscFunctionReturn(0);
2134 }
2135 
2136 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2137 
2138 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2139 {
2140   PetscFunctionBegin;
2141   if (PetscDefined(USE_COMPLEX)) {
2142     Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2143 
2144     PetscCall(MatConjugate_SeqAIJ(aij->A));
2145     PetscCall(MatConjugate_SeqAIJ(aij->B));
2146   }
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2151 {
2152   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2153 
2154   PetscFunctionBegin;
2155   PetscCall(MatRealPart(a->A));
2156   PetscCall(MatRealPart(a->B));
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2161 {
2162   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2163 
2164   PetscFunctionBegin;
2165   PetscCall(MatImaginaryPart(a->A));
2166   PetscCall(MatImaginaryPart(a->B));
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2171 {
2172   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2173   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2174   PetscScalar       *va,*vv;
2175   Vec               vB,vA;
2176   const PetscScalar *vb;
2177 
2178   PetscFunctionBegin;
2179   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA));
2180   PetscCall(MatGetRowMaxAbs(a->A,vA,idx));
2181 
2182   PetscCall(VecGetArrayWrite(vA,&va));
2183   if (idx) {
2184     for (i=0; i<m; i++) {
2185       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2186     }
2187   }
2188 
2189   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB));
2190   PetscCall(PetscMalloc1(m,&idxb));
2191   PetscCall(MatGetRowMaxAbs(a->B,vB,idxb));
2192 
2193   PetscCall(VecGetArrayWrite(v,&vv));
2194   PetscCall(VecGetArrayRead(vB,&vb));
2195   for (i=0; i<m; i++) {
2196     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2197       vv[i] = vb[i];
2198       if (idx) idx[i] = a->garray[idxb[i]];
2199     } else {
2200       vv[i] = va[i];
2201       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2202         idx[i] = a->garray[idxb[i]];
2203     }
2204   }
2205   PetscCall(VecRestoreArrayWrite(vA,&vv));
2206   PetscCall(VecRestoreArrayWrite(vA,&va));
2207   PetscCall(VecRestoreArrayRead(vB,&vb));
2208   PetscCall(PetscFree(idxb));
2209   PetscCall(VecDestroy(&vA));
2210   PetscCall(VecDestroy(&vB));
2211   PetscFunctionReturn(0);
2212 }
2213 
2214 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2215 {
2216   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2217   PetscInt          m = A->rmap->n,n = A->cmap->n;
2218   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2219   PetscInt          *cmap  = mat->garray;
2220   PetscInt          *diagIdx, *offdiagIdx;
2221   Vec               diagV, offdiagV;
2222   PetscScalar       *a, *diagA, *offdiagA;
2223   const PetscScalar *ba,*bav;
2224   PetscInt          r,j,col,ncols,*bi,*bj;
2225   Mat               B = mat->B;
2226   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2227 
2228   PetscFunctionBegin;
2229   /* When a process holds entire A and other processes have no entry */
2230   if (A->cmap->N == n) {
2231     PetscCall(VecGetArrayWrite(v,&diagA));
2232     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2233     PetscCall(MatGetRowMinAbs(mat->A,diagV,idx));
2234     PetscCall(VecDestroy(&diagV));
2235     PetscCall(VecRestoreArrayWrite(v,&diagA));
2236     PetscFunctionReturn(0);
2237   } else if (n == 0) {
2238     if (m) {
2239       PetscCall(VecGetArrayWrite(v,&a));
2240       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2241       PetscCall(VecRestoreArrayWrite(v,&a));
2242     }
2243     PetscFunctionReturn(0);
2244   }
2245 
2246   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2247   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2248   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2249   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2250 
2251   /* Get offdiagIdx[] for implicit 0.0 */
2252   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2253   ba   = bav;
2254   bi   = b->i;
2255   bj   = b->j;
2256   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2257   for (r = 0; r < m; r++) {
2258     ncols = bi[r+1] - bi[r];
2259     if (ncols == A->cmap->N - n) { /* Brow is dense */
2260       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2261     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2262       offdiagA[r] = 0.0;
2263 
2264       /* Find first hole in the cmap */
2265       for (j=0; j<ncols; j++) {
2266         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2267         if (col > j && j < cstart) {
2268           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2269           break;
2270         } else if (col > j + n && j >= cstart) {
2271           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2272           break;
2273         }
2274       }
2275       if (j == ncols && ncols < A->cmap->N - n) {
2276         /* a hole is outside compressed Bcols */
2277         if (ncols == 0) {
2278           if (cstart) {
2279             offdiagIdx[r] = 0;
2280           } else offdiagIdx[r] = cend;
2281         } else { /* ncols > 0 */
2282           offdiagIdx[r] = cmap[ncols-1] + 1;
2283           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2284         }
2285       }
2286     }
2287 
2288     for (j=0; j<ncols; j++) {
2289       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2290       ba++; bj++;
2291     }
2292   }
2293 
2294   PetscCall(VecGetArrayWrite(v, &a));
2295   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2296   for (r = 0; r < m; ++r) {
2297     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2298       a[r]   = diagA[r];
2299       if (idx) idx[r] = cstart + diagIdx[r];
2300     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2301       a[r] = diagA[r];
2302       if (idx) {
2303         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2304           idx[r] = cstart + diagIdx[r];
2305         } else idx[r] = offdiagIdx[r];
2306       }
2307     } else {
2308       a[r]   = offdiagA[r];
2309       if (idx) idx[r] = offdiagIdx[r];
2310     }
2311   }
2312   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2313   PetscCall(VecRestoreArrayWrite(v, &a));
2314   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2315   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2316   PetscCall(VecDestroy(&diagV));
2317   PetscCall(VecDestroy(&offdiagV));
2318   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2319   PetscFunctionReturn(0);
2320 }
2321 
2322 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2323 {
2324   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2325   PetscInt          m = A->rmap->n,n = A->cmap->n;
2326   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2327   PetscInt          *cmap  = mat->garray;
2328   PetscInt          *diagIdx, *offdiagIdx;
2329   Vec               diagV, offdiagV;
2330   PetscScalar       *a, *diagA, *offdiagA;
2331   const PetscScalar *ba,*bav;
2332   PetscInt          r,j,col,ncols,*bi,*bj;
2333   Mat               B = mat->B;
2334   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2335 
2336   PetscFunctionBegin;
2337   /* When a process holds entire A and other processes have no entry */
2338   if (A->cmap->N == n) {
2339     PetscCall(VecGetArrayWrite(v,&diagA));
2340     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2341     PetscCall(MatGetRowMin(mat->A,diagV,idx));
2342     PetscCall(VecDestroy(&diagV));
2343     PetscCall(VecRestoreArrayWrite(v,&diagA));
2344     PetscFunctionReturn(0);
2345   } else if (n == 0) {
2346     if (m) {
2347       PetscCall(VecGetArrayWrite(v,&a));
2348       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2349       PetscCall(VecRestoreArrayWrite(v,&a));
2350     }
2351     PetscFunctionReturn(0);
2352   }
2353 
2354   PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx));
2355   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2356   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2357   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2358 
2359   /* Get offdiagIdx[] for implicit 0.0 */
2360   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2361   ba   = bav;
2362   bi   = b->i;
2363   bj   = b->j;
2364   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2365   for (r = 0; r < m; r++) {
2366     ncols = bi[r+1] - bi[r];
2367     if (ncols == A->cmap->N - n) { /* Brow is dense */
2368       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2369     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2370       offdiagA[r] = 0.0;
2371 
2372       /* Find first hole in the cmap */
2373       for (j=0; j<ncols; j++) {
2374         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2375         if (col > j && j < cstart) {
2376           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2377           break;
2378         } else if (col > j + n && j >= cstart) {
2379           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2380           break;
2381         }
2382       }
2383       if (j == ncols && ncols < A->cmap->N - n) {
2384         /* a hole is outside compressed Bcols */
2385         if (ncols == 0) {
2386           if (cstart) {
2387             offdiagIdx[r] = 0;
2388           } else offdiagIdx[r] = cend;
2389         } else { /* ncols > 0 */
2390           offdiagIdx[r] = cmap[ncols-1] + 1;
2391           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2392         }
2393       }
2394     }
2395 
2396     for (j=0; j<ncols; j++) {
2397       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2398       ba++; bj++;
2399     }
2400   }
2401 
2402   PetscCall(VecGetArrayWrite(v, &a));
2403   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2404   for (r = 0; r < m; ++r) {
2405     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2406       a[r]   = diagA[r];
2407       if (idx) idx[r] = cstart + diagIdx[r];
2408     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2409       a[r] = diagA[r];
2410       if (idx) {
2411         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2412           idx[r] = cstart + diagIdx[r];
2413         } else idx[r] = offdiagIdx[r];
2414       }
2415     } else {
2416       a[r]   = offdiagA[r];
2417       if (idx) idx[r] = offdiagIdx[r];
2418     }
2419   }
2420   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2421   PetscCall(VecRestoreArrayWrite(v, &a));
2422   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2423   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2424   PetscCall(VecDestroy(&diagV));
2425   PetscCall(VecDestroy(&offdiagV));
2426   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2431 {
2432   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2433   PetscInt          m = A->rmap->n,n = A->cmap->n;
2434   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2435   PetscInt          *cmap  = mat->garray;
2436   PetscInt          *diagIdx, *offdiagIdx;
2437   Vec               diagV, offdiagV;
2438   PetscScalar       *a, *diagA, *offdiagA;
2439   const PetscScalar *ba,*bav;
2440   PetscInt          r,j,col,ncols,*bi,*bj;
2441   Mat               B = mat->B;
2442   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2443 
2444   PetscFunctionBegin;
2445   /* When a process holds entire A and other processes have no entry */
2446   if (A->cmap->N == n) {
2447     PetscCall(VecGetArrayWrite(v,&diagA));
2448     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2449     PetscCall(MatGetRowMax(mat->A,diagV,idx));
2450     PetscCall(VecDestroy(&diagV));
2451     PetscCall(VecRestoreArrayWrite(v,&diagA));
2452     PetscFunctionReturn(0);
2453   } else if (n == 0) {
2454     if (m) {
2455       PetscCall(VecGetArrayWrite(v,&a));
2456       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2457       PetscCall(VecRestoreArrayWrite(v,&a));
2458     }
2459     PetscFunctionReturn(0);
2460   }
2461 
2462   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2463   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2464   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2465   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2466 
2467   /* Get offdiagIdx[] for implicit 0.0 */
2468   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2469   ba   = bav;
2470   bi   = b->i;
2471   bj   = b->j;
2472   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2473   for (r = 0; r < m; r++) {
2474     ncols = bi[r+1] - bi[r];
2475     if (ncols == A->cmap->N - n) { /* Brow is dense */
2476       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2477     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2478       offdiagA[r] = 0.0;
2479 
2480       /* Find first hole in the cmap */
2481       for (j=0; j<ncols; j++) {
2482         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2483         if (col > j && j < cstart) {
2484           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2485           break;
2486         } else if (col > j + n && j >= cstart) {
2487           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2488           break;
2489         }
2490       }
2491       if (j == ncols && ncols < A->cmap->N - n) {
2492         /* a hole is outside compressed Bcols */
2493         if (ncols == 0) {
2494           if (cstart) {
2495             offdiagIdx[r] = 0;
2496           } else offdiagIdx[r] = cend;
2497         } else { /* ncols > 0 */
2498           offdiagIdx[r] = cmap[ncols-1] + 1;
2499           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2500         }
2501       }
2502     }
2503 
2504     for (j=0; j<ncols; j++) {
2505       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2506       ba++; bj++;
2507     }
2508   }
2509 
2510   PetscCall(VecGetArrayWrite(v,    &a));
2511   PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA));
2512   for (r = 0; r < m; ++r) {
2513     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2514       a[r] = diagA[r];
2515       if (idx) idx[r] = cstart + diagIdx[r];
2516     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2517       a[r] = diagA[r];
2518       if (idx) {
2519         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2520           idx[r] = cstart + diagIdx[r];
2521         } else idx[r] = offdiagIdx[r];
2522       }
2523     } else {
2524       a[r] = offdiagA[r];
2525       if (idx) idx[r] = offdiagIdx[r];
2526     }
2527   }
2528   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2529   PetscCall(VecRestoreArrayWrite(v,       &a));
2530   PetscCall(VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA));
2531   PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA));
2532   PetscCall(VecDestroy(&diagV));
2533   PetscCall(VecDestroy(&offdiagV));
2534   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2535   PetscFunctionReturn(0);
2536 }
2537 
2538 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2539 {
2540   Mat            *dummy;
2541 
2542   PetscFunctionBegin;
2543   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy));
2544   *newmat = *dummy;
2545   PetscCall(PetscFree(dummy));
2546   PetscFunctionReturn(0);
2547 }
2548 
2549 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2550 {
2551   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2552 
2553   PetscFunctionBegin;
2554   PetscCall(MatInvertBlockDiagonal(a->A,values));
2555   A->factorerrortype = a->A->factorerrortype;
2556   PetscFunctionReturn(0);
2557 }
2558 
2559 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2560 {
2561   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2562 
2563   PetscFunctionBegin;
2564   PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2565   PetscCall(MatSetRandom(aij->A,rctx));
2566   if (x->assembled) {
2567     PetscCall(MatSetRandom(aij->B,rctx));
2568   } else {
2569     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx));
2570   }
2571   PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY));
2572   PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY));
2573   PetscFunctionReturn(0);
2574 }
2575 
2576 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2577 {
2578   PetscFunctionBegin;
2579   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2580   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 /*@
2585    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2586 
2587    Collective on Mat
2588 
2589    Input Parameters:
2590 +    A - the matrix
2591 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2592 
2593  Level: advanced
2594 
2595 @*/
2596 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2597 {
2598   PetscFunctionBegin;
2599   PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));
2600   PetscFunctionReturn(0);
2601 }
2602 
2603 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2604 {
2605   PetscBool            sc = PETSC_FALSE,flg;
2606 
2607   PetscFunctionBegin;
2608   PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options");
2609   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2610   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg));
2611   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc));
2612   PetscOptionsHeadEnd();
2613   PetscFunctionReturn(0);
2614 }
2615 
2616 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2617 {
2618   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2619   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2620 
2621   PetscFunctionBegin;
2622   if (!Y->preallocated) {
2623     PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL));
2624   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2625     PetscInt nonew = aij->nonew;
2626     PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL));
2627     aij->nonew = nonew;
2628   }
2629   PetscCall(MatShift_Basic(Y,a));
2630   PetscFunctionReturn(0);
2631 }
2632 
2633 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2634 {
2635   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2636 
2637   PetscFunctionBegin;
2638   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2639   PetscCall(MatMissingDiagonal(a->A,missing,d));
2640   if (d) {
2641     PetscInt rstart;
2642     PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
2643     *d += rstart;
2644 
2645   }
2646   PetscFunctionReturn(0);
2647 }
2648 
2649 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2650 {
2651   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2652 
2653   PetscFunctionBegin;
2654   PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag));
2655   PetscFunctionReturn(0);
2656 }
2657 
2658 /* -------------------------------------------------------------------*/
2659 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2660                                        MatGetRow_MPIAIJ,
2661                                        MatRestoreRow_MPIAIJ,
2662                                        MatMult_MPIAIJ,
2663                                 /* 4*/ MatMultAdd_MPIAIJ,
2664                                        MatMultTranspose_MPIAIJ,
2665                                        MatMultTransposeAdd_MPIAIJ,
2666                                        NULL,
2667                                        NULL,
2668                                        NULL,
2669                                 /*10*/ NULL,
2670                                        NULL,
2671                                        NULL,
2672                                        MatSOR_MPIAIJ,
2673                                        MatTranspose_MPIAIJ,
2674                                 /*15*/ MatGetInfo_MPIAIJ,
2675                                        MatEqual_MPIAIJ,
2676                                        MatGetDiagonal_MPIAIJ,
2677                                        MatDiagonalScale_MPIAIJ,
2678                                        MatNorm_MPIAIJ,
2679                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2680                                        MatAssemblyEnd_MPIAIJ,
2681                                        MatSetOption_MPIAIJ,
2682                                        MatZeroEntries_MPIAIJ,
2683                                 /*24*/ MatZeroRows_MPIAIJ,
2684                                        NULL,
2685                                        NULL,
2686                                        NULL,
2687                                        NULL,
2688                                 /*29*/ MatSetUp_MPIAIJ,
2689                                        NULL,
2690                                        NULL,
2691                                        MatGetDiagonalBlock_MPIAIJ,
2692                                        NULL,
2693                                 /*34*/ MatDuplicate_MPIAIJ,
2694                                        NULL,
2695                                        NULL,
2696                                        NULL,
2697                                        NULL,
2698                                 /*39*/ MatAXPY_MPIAIJ,
2699                                        MatCreateSubMatrices_MPIAIJ,
2700                                        MatIncreaseOverlap_MPIAIJ,
2701                                        MatGetValues_MPIAIJ,
2702                                        MatCopy_MPIAIJ,
2703                                 /*44*/ MatGetRowMax_MPIAIJ,
2704                                        MatScale_MPIAIJ,
2705                                        MatShift_MPIAIJ,
2706                                        MatDiagonalSet_MPIAIJ,
2707                                        MatZeroRowsColumns_MPIAIJ,
2708                                 /*49*/ MatSetRandom_MPIAIJ,
2709                                        MatGetRowIJ_MPIAIJ,
2710                                        MatRestoreRowIJ_MPIAIJ,
2711                                        NULL,
2712                                        NULL,
2713                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2714                                        NULL,
2715                                        MatSetUnfactored_MPIAIJ,
2716                                        MatPermute_MPIAIJ,
2717                                        NULL,
2718                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2719                                        MatDestroy_MPIAIJ,
2720                                        MatView_MPIAIJ,
2721                                        NULL,
2722                                        NULL,
2723                                 /*64*/ NULL,
2724                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2725                                        NULL,
2726                                        NULL,
2727                                        NULL,
2728                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2729                                        MatGetRowMinAbs_MPIAIJ,
2730                                        NULL,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                 /*75*/ MatFDColoringApply_AIJ,
2735                                        MatSetFromOptions_MPIAIJ,
2736                                        NULL,
2737                                        NULL,
2738                                        MatFindZeroDiagonals_MPIAIJ,
2739                                 /*80*/ NULL,
2740                                        NULL,
2741                                        NULL,
2742                                 /*83*/ MatLoad_MPIAIJ,
2743                                        MatIsSymmetric_MPIAIJ,
2744                                        NULL,
2745                                        NULL,
2746                                        NULL,
2747                                        NULL,
2748                                 /*89*/ NULL,
2749                                        NULL,
2750                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2751                                        NULL,
2752                                        NULL,
2753                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2754                                        NULL,
2755                                        NULL,
2756                                        NULL,
2757                                        MatBindToCPU_MPIAIJ,
2758                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2759                                        NULL,
2760                                        NULL,
2761                                        MatConjugate_MPIAIJ,
2762                                        NULL,
2763                                 /*104*/MatSetValuesRow_MPIAIJ,
2764                                        MatRealPart_MPIAIJ,
2765                                        MatImaginaryPart_MPIAIJ,
2766                                        NULL,
2767                                        NULL,
2768                                 /*109*/NULL,
2769                                        NULL,
2770                                        MatGetRowMin_MPIAIJ,
2771                                        NULL,
2772                                        MatMissingDiagonal_MPIAIJ,
2773                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2774                                        NULL,
2775                                        MatGetGhosts_MPIAIJ,
2776                                        NULL,
2777                                        NULL,
2778                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2779                                        NULL,
2780                                        NULL,
2781                                        NULL,
2782                                        MatGetMultiProcBlock_MPIAIJ,
2783                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2784                                        MatGetColumnReductions_MPIAIJ,
2785                                        MatInvertBlockDiagonal_MPIAIJ,
2786                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2787                                        MatCreateSubMatricesMPI_MPIAIJ,
2788                                 /*129*/NULL,
2789                                        NULL,
2790                                        NULL,
2791                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2792                                        NULL,
2793                                 /*134*/NULL,
2794                                        NULL,
2795                                        NULL,
2796                                        NULL,
2797                                        NULL,
2798                                 /*139*/MatSetBlockSizes_MPIAIJ,
2799                                        NULL,
2800                                        NULL,
2801                                        MatFDColoringSetUp_MPIXAIJ,
2802                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2803                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2804                                 /*145*/NULL,
2805                                        NULL,
2806                                        NULL,
2807                                        MatCreateGraph_Simple_AIJ,
2808                                        MatFilter_AIJ
2809 };
2810 
2811 /* ----------------------------------------------------------------------------------------*/
2812 
2813 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2814 {
2815   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2816 
2817   PetscFunctionBegin;
2818   PetscCall(MatStoreValues(aij->A));
2819   PetscCall(MatStoreValues(aij->B));
2820   PetscFunctionReturn(0);
2821 }
2822 
2823 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2824 {
2825   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2826 
2827   PetscFunctionBegin;
2828   PetscCall(MatRetrieveValues(aij->A));
2829   PetscCall(MatRetrieveValues(aij->B));
2830   PetscFunctionReturn(0);
2831 }
2832 
2833 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2834 {
2835   Mat_MPIAIJ     *b;
2836   PetscMPIInt    size;
2837 
2838   PetscFunctionBegin;
2839   PetscCall(PetscLayoutSetUp(B->rmap));
2840   PetscCall(PetscLayoutSetUp(B->cmap));
2841   b = (Mat_MPIAIJ*)B->data;
2842 
2843 #if defined(PETSC_USE_CTABLE)
2844   PetscCall(PetscTableDestroy(&b->colmap));
2845 #else
2846   PetscCall(PetscFree(b->colmap));
2847 #endif
2848   PetscCall(PetscFree(b->garray));
2849   PetscCall(VecDestroy(&b->lvec));
2850   PetscCall(VecScatterDestroy(&b->Mvctx));
2851 
2852   /* Because the B will have been resized we simply destroy it and create a new one each time */
2853   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
2854   PetscCall(MatDestroy(&b->B));
2855   PetscCall(MatCreate(PETSC_COMM_SELF,&b->B));
2856   PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
2857   PetscCall(MatSetBlockSizesFromMats(b->B,B,B));
2858   PetscCall(MatSetType(b->B,MATSEQAIJ));
2859   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
2860 
2861   if (!B->preallocated) {
2862     PetscCall(MatCreate(PETSC_COMM_SELF,&b->A));
2863     PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
2864     PetscCall(MatSetBlockSizesFromMats(b->A,B,B));
2865     PetscCall(MatSetType(b->A,MATSEQAIJ));
2866     PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
2867   }
2868 
2869   PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz));
2870   PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz));
2871   B->preallocated  = PETSC_TRUE;
2872   B->was_assembled = PETSC_FALSE;
2873   B->assembled     = PETSC_FALSE;
2874   PetscFunctionReturn(0);
2875 }
2876 
2877 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2878 {
2879   Mat_MPIAIJ     *b;
2880 
2881   PetscFunctionBegin;
2882   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2883   PetscCall(PetscLayoutSetUp(B->rmap));
2884   PetscCall(PetscLayoutSetUp(B->cmap));
2885   b = (Mat_MPIAIJ*)B->data;
2886 
2887 #if defined(PETSC_USE_CTABLE)
2888   PetscCall(PetscTableDestroy(&b->colmap));
2889 #else
2890   PetscCall(PetscFree(b->colmap));
2891 #endif
2892   PetscCall(PetscFree(b->garray));
2893   PetscCall(VecDestroy(&b->lvec));
2894   PetscCall(VecScatterDestroy(&b->Mvctx));
2895 
2896   PetscCall(MatResetPreallocation(b->A));
2897   PetscCall(MatResetPreallocation(b->B));
2898   B->preallocated  = PETSC_TRUE;
2899   B->was_assembled = PETSC_FALSE;
2900   B->assembled = PETSC_FALSE;
2901   PetscFunctionReturn(0);
2902 }
2903 
2904 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2905 {
2906   Mat            mat;
2907   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2908 
2909   PetscFunctionBegin;
2910   *newmat = NULL;
2911   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
2912   PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
2913   PetscCall(MatSetBlockSizesFromMats(mat,matin,matin));
2914   PetscCall(MatSetType(mat,((PetscObject)matin)->type_name));
2915   a       = (Mat_MPIAIJ*)mat->data;
2916 
2917   mat->factortype   = matin->factortype;
2918   mat->assembled    = matin->assembled;
2919   mat->insertmode   = NOT_SET_VALUES;
2920   mat->preallocated = matin->preallocated;
2921 
2922   a->size         = oldmat->size;
2923   a->rank         = oldmat->rank;
2924   a->donotstash   = oldmat->donotstash;
2925   a->roworiented  = oldmat->roworiented;
2926   a->rowindices   = NULL;
2927   a->rowvalues    = NULL;
2928   a->getrowactive = PETSC_FALSE;
2929 
2930   PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap));
2931   PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap));
2932 
2933   if (oldmat->colmap) {
2934 #if defined(PETSC_USE_CTABLE)
2935     PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
2936 #else
2937     PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap));
2938     PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt)));
2939     PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N));
2940 #endif
2941   } else a->colmap = NULL;
2942   if (oldmat->garray) {
2943     PetscInt len;
2944     len  = oldmat->B->cmap->n;
2945     PetscCall(PetscMalloc1(len+1,&a->garray));
2946     PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
2947     if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len));
2948   } else a->garray = NULL;
2949 
2950   /* It may happen MatDuplicate is called with a non-assembled matrix
2951      In fact, MatDuplicate only requires the matrix to be preallocated
2952      This may happen inside a DMCreateMatrix_Shell */
2953   if (oldmat->lvec) {
2954     PetscCall(VecDuplicate(oldmat->lvec,&a->lvec));
2955     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
2956   }
2957   if (oldmat->Mvctx) {
2958     PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
2959     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
2960   }
2961   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2962   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2963   PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B));
2964   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
2965   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
2966   *newmat = mat;
2967   PetscFunctionReturn(0);
2968 }
2969 
2970 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2971 {
2972   PetscBool      isbinary, ishdf5;
2973 
2974   PetscFunctionBegin;
2975   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2976   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2977   /* force binary viewer to load .info file if it has not yet done so */
2978   PetscCall(PetscViewerSetUp(viewer));
2979   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2980   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
2981   if (isbinary) {
2982     PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer));
2983   } else if (ishdf5) {
2984 #if defined(PETSC_HAVE_HDF5)
2985     PetscCall(MatLoad_AIJ_HDF5(newMat,viewer));
2986 #else
2987     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2988 #endif
2989   } else {
2990     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2991   }
2992   PetscFunctionReturn(0);
2993 }
2994 
2995 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
2996 {
2997   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
2998   PetscInt       *rowidxs,*colidxs;
2999   PetscScalar    *matvals;
3000 
3001   PetscFunctionBegin;
3002   PetscCall(PetscViewerSetUp(viewer));
3003 
3004   /* read in matrix header */
3005   PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT));
3006   PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3007   M  = header[1]; N = header[2]; nz = header[3];
3008   PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
3009   PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
3010   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3011 
3012   /* set block sizes from the viewer's .info file */
3013   PetscCall(MatLoad_Binary_BlockSizes(mat,viewer));
3014   /* set global sizes if not set already */
3015   if (mat->rmap->N < 0) mat->rmap->N = M;
3016   if (mat->cmap->N < 0) mat->cmap->N = N;
3017   PetscCall(PetscLayoutSetUp(mat->rmap));
3018   PetscCall(PetscLayoutSetUp(mat->cmap));
3019 
3020   /* check if the matrix sizes are correct */
3021   PetscCall(MatGetSize(mat,&rows,&cols));
3022   PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3023 
3024   /* read in row lengths and build row indices */
3025   PetscCall(MatGetLocalSize(mat,&m,NULL));
3026   PetscCall(PetscMalloc1(m+1,&rowidxs));
3027   PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT));
3028   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3029   PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer)));
3030   PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3031   /* read in column indices and matrix values */
3032   PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals));
3033   PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
3034   PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
3035   /* store matrix indices and values */
3036   PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals));
3037   PetscCall(PetscFree(rowidxs));
3038   PetscCall(PetscFree2(colidxs,matvals));
3039   PetscFunctionReturn(0);
3040 }
3041 
3042 /* Not scalable because of ISAllGather() unless getting all columns. */
3043 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3044 {
3045   IS             iscol_local;
3046   PetscBool      isstride;
3047   PetscMPIInt    lisstride=0,gisstride;
3048 
3049   PetscFunctionBegin;
3050   /* check if we are grabbing all columns*/
3051   PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride));
3052 
3053   if (isstride) {
3054     PetscInt  start,len,mstart,mlen;
3055     PetscCall(ISStrideGetInfo(iscol,&start,NULL));
3056     PetscCall(ISGetLocalSize(iscol,&len));
3057     PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen));
3058     if (mstart == start && mlen-mstart == len) lisstride = 1;
3059   }
3060 
3061   PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat)));
3062   if (gisstride) {
3063     PetscInt N;
3064     PetscCall(MatGetSize(mat,NULL,&N));
3065     PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local));
3066     PetscCall(ISSetIdentity(iscol_local));
3067     PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3068   } else {
3069     PetscInt cbs;
3070     PetscCall(ISGetBlockSize(iscol,&cbs));
3071     PetscCall(ISAllGather(iscol,&iscol_local));
3072     PetscCall(ISSetBlockSize(iscol_local,cbs));
3073   }
3074 
3075   *isseq = iscol_local;
3076   PetscFunctionReturn(0);
3077 }
3078 
3079 /*
3080  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3081  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3082 
3083  Input Parameters:
3084    mat - matrix
3085    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3086            i.e., mat->rstart <= isrow[i] < mat->rend
3087    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3088            i.e., mat->cstart <= iscol[i] < mat->cend
3089  Output Parameter:
3090    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3091    iscol_o - sequential column index set for retrieving mat->B
3092    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3093  */
3094 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3095 {
3096   Vec            x,cmap;
3097   const PetscInt *is_idx;
3098   PetscScalar    *xarray,*cmaparray;
3099   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3100   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3101   Mat            B=a->B;
3102   Vec            lvec=a->lvec,lcmap;
3103   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3104   MPI_Comm       comm;
3105   VecScatter     Mvctx=a->Mvctx;
3106 
3107   PetscFunctionBegin;
3108   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3109   PetscCall(ISGetLocalSize(iscol,&ncols));
3110 
3111   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3112   PetscCall(MatCreateVecs(mat,&x,NULL));
3113   PetscCall(VecSet(x,-1.0));
3114   PetscCall(VecDuplicate(x,&cmap));
3115   PetscCall(VecSet(cmap,-1.0));
3116 
3117   /* Get start indices */
3118   PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm));
3119   isstart -= ncols;
3120   PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend));
3121 
3122   PetscCall(ISGetIndices(iscol,&is_idx));
3123   PetscCall(VecGetArray(x,&xarray));
3124   PetscCall(VecGetArray(cmap,&cmaparray));
3125   PetscCall(PetscMalloc1(ncols,&idx));
3126   for (i=0; i<ncols; i++) {
3127     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3128     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3129     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3130   }
3131   PetscCall(VecRestoreArray(x,&xarray));
3132   PetscCall(VecRestoreArray(cmap,&cmaparray));
3133   PetscCall(ISRestoreIndices(iscol,&is_idx));
3134 
3135   /* Get iscol_d */
3136   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d));
3137   PetscCall(ISGetBlockSize(iscol,&i));
3138   PetscCall(ISSetBlockSize(*iscol_d,i));
3139 
3140   /* Get isrow_d */
3141   PetscCall(ISGetLocalSize(isrow,&m));
3142   rstart = mat->rmap->rstart;
3143   PetscCall(PetscMalloc1(m,&idx));
3144   PetscCall(ISGetIndices(isrow,&is_idx));
3145   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3146   PetscCall(ISRestoreIndices(isrow,&is_idx));
3147 
3148   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d));
3149   PetscCall(ISGetBlockSize(isrow,&i));
3150   PetscCall(ISSetBlockSize(*isrow_d,i));
3151 
3152   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3153   PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3154   PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3155 
3156   PetscCall(VecDuplicate(lvec,&lcmap));
3157 
3158   PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3159   PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3160 
3161   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3162   /* off-process column indices */
3163   count = 0;
3164   PetscCall(PetscMalloc1(Bn,&idx));
3165   PetscCall(PetscMalloc1(Bn,&cmap1));
3166 
3167   PetscCall(VecGetArray(lvec,&xarray));
3168   PetscCall(VecGetArray(lcmap,&cmaparray));
3169   for (i=0; i<Bn; i++) {
3170     if (PetscRealPart(xarray[i]) > -1.0) {
3171       idx[count]     = i;                   /* local column index in off-diagonal part B */
3172       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3173       count++;
3174     }
3175   }
3176   PetscCall(VecRestoreArray(lvec,&xarray));
3177   PetscCall(VecRestoreArray(lcmap,&cmaparray));
3178 
3179   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o));
3180   /* cannot ensure iscol_o has same blocksize as iscol! */
3181 
3182   PetscCall(PetscFree(idx));
3183   *garray = cmap1;
3184 
3185   PetscCall(VecDestroy(&x));
3186   PetscCall(VecDestroy(&cmap));
3187   PetscCall(VecDestroy(&lcmap));
3188   PetscFunctionReturn(0);
3189 }
3190 
3191 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3192 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3193 {
3194   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3195   Mat            M = NULL;
3196   MPI_Comm       comm;
3197   IS             iscol_d,isrow_d,iscol_o;
3198   Mat            Asub = NULL,Bsub = NULL;
3199   PetscInt       n;
3200 
3201   PetscFunctionBegin;
3202   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3203 
3204   if (call == MAT_REUSE_MATRIX) {
3205     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3206     PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d));
3207     PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3208 
3209     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d));
3210     PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3211 
3212     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o));
3213     PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3214 
3215     /* Update diagonal and off-diagonal portions of submat */
3216     asub = (Mat_MPIAIJ*)(*submat)->data;
3217     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A));
3218     PetscCall(ISGetLocalSize(iscol_o,&n));
3219     if (n) {
3220       PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B));
3221     }
3222     PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY));
3223     PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY));
3224 
3225   } else { /* call == MAT_INITIAL_MATRIX) */
3226     const PetscInt *garray;
3227     PetscInt        BsubN;
3228 
3229     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3230     PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray));
3231 
3232     /* Create local submatrices Asub and Bsub */
3233     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub));
3234     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub));
3235 
3236     /* Create submatrix M */
3237     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M));
3238 
3239     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3240     asub = (Mat_MPIAIJ*)M->data;
3241 
3242     PetscCall(ISGetLocalSize(iscol_o,&BsubN));
3243     n = asub->B->cmap->N;
3244     if (BsubN > n) {
3245       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3246       const PetscInt *idx;
3247       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3248       PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN));
3249 
3250       PetscCall(PetscMalloc1(n,&idx_new));
3251       j = 0;
3252       PetscCall(ISGetIndices(iscol_o,&idx));
3253       for (i=0; i<n; i++) {
3254         if (j >= BsubN) break;
3255         while (subgarray[i] > garray[j]) j++;
3256 
3257         if (subgarray[i] == garray[j]) {
3258           idx_new[i] = idx[j++];
3259         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]);
3260       }
3261       PetscCall(ISRestoreIndices(iscol_o,&idx));
3262 
3263       PetscCall(ISDestroy(&iscol_o));
3264       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o));
3265 
3266     } else if (BsubN < n) {
3267       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N);
3268     }
3269 
3270     PetscCall(PetscFree(garray));
3271     *submat = M;
3272 
3273     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3274     PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d));
3275     PetscCall(ISDestroy(&isrow_d));
3276 
3277     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d));
3278     PetscCall(ISDestroy(&iscol_d));
3279 
3280     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o));
3281     PetscCall(ISDestroy(&iscol_o));
3282   }
3283   PetscFunctionReturn(0);
3284 }
3285 
3286 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3287 {
3288   IS             iscol_local=NULL,isrow_d;
3289   PetscInt       csize;
3290   PetscInt       n,i,j,start,end;
3291   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3292   MPI_Comm       comm;
3293 
3294   PetscFunctionBegin;
3295   /* If isrow has same processor distribution as mat,
3296      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3297   if (call == MAT_REUSE_MATRIX) {
3298     PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d));
3299     if (isrow_d) {
3300       sameRowDist  = PETSC_TRUE;
3301       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3302     } else {
3303       PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local));
3304       if (iscol_local) {
3305         sameRowDist  = PETSC_TRUE;
3306         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3307       }
3308     }
3309   } else {
3310     /* Check if isrow has same processor distribution as mat */
3311     sameDist[0] = PETSC_FALSE;
3312     PetscCall(ISGetLocalSize(isrow,&n));
3313     if (!n) {
3314       sameDist[0] = PETSC_TRUE;
3315     } else {
3316       PetscCall(ISGetMinMax(isrow,&i,&j));
3317       PetscCall(MatGetOwnershipRange(mat,&start,&end));
3318       if (i >= start && j < end) {
3319         sameDist[0] = PETSC_TRUE;
3320       }
3321     }
3322 
3323     /* Check if iscol has same processor distribution as mat */
3324     sameDist[1] = PETSC_FALSE;
3325     PetscCall(ISGetLocalSize(iscol,&n));
3326     if (!n) {
3327       sameDist[1] = PETSC_TRUE;
3328     } else {
3329       PetscCall(ISGetMinMax(iscol,&i,&j));
3330       PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end));
3331       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3332     }
3333 
3334     PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3335     PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm));
3336     sameRowDist = tsameDist[0];
3337   }
3338 
3339   if (sameRowDist) {
3340     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3341       /* isrow and iscol have same processor distribution as mat */
3342       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat));
3343       PetscFunctionReturn(0);
3344     } else { /* sameRowDist */
3345       /* isrow has same processor distribution as mat */
3346       if (call == MAT_INITIAL_MATRIX) {
3347         PetscBool sorted;
3348         PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3349         PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */
3350         PetscCall(ISGetSize(iscol,&i));
3351         PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i);
3352 
3353         PetscCall(ISSorted(iscol_local,&sorted));
3354         if (sorted) {
3355           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3356           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat));
3357           PetscFunctionReturn(0);
3358         }
3359       } else { /* call == MAT_REUSE_MATRIX */
3360         IS iscol_sub;
3361         PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3362         if (iscol_sub) {
3363           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat));
3364           PetscFunctionReturn(0);
3365         }
3366       }
3367     }
3368   }
3369 
3370   /* General case: iscol -> iscol_local which has global size of iscol */
3371   if (call == MAT_REUSE_MATRIX) {
3372     PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
3373     PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3374   } else {
3375     if (!iscol_local) {
3376       PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3377     }
3378   }
3379 
3380   PetscCall(ISGetLocalSize(iscol,&csize));
3381   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat));
3382 
3383   if (call == MAT_INITIAL_MATRIX) {
3384     PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
3385     PetscCall(ISDestroy(&iscol_local));
3386   }
3387   PetscFunctionReturn(0);
3388 }
3389 
3390 /*@C
3391      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3392          and "off-diagonal" part of the matrix in CSR format.
3393 
3394    Collective
3395 
3396    Input Parameters:
3397 +  comm - MPI communicator
3398 .  A - "diagonal" portion of matrix
3399 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3400 -  garray - global index of B columns
3401 
3402    Output Parameter:
3403 .   mat - the matrix, with input A as its local diagonal matrix
3404    Level: advanced
3405 
3406    Notes:
3407        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3408        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3409 
3410 .seealso: `MatCreateMPIAIJWithSplitArrays()`
3411 @*/
3412 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3413 {
3414   Mat_MPIAIJ        *maij;
3415   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3416   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3417   const PetscScalar *oa;
3418   Mat               Bnew;
3419   PetscInt          m,n,N;
3420 
3421   PetscFunctionBegin;
3422   PetscCall(MatCreate(comm,mat));
3423   PetscCall(MatGetSize(A,&m,&n));
3424   PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N);
3425   PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs);
3426   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3427   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3428 
3429   /* Get global columns of mat */
3430   PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm));
3431 
3432   PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N));
3433   PetscCall(MatSetType(*mat,MATMPIAIJ));
3434   PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs));
3435   maij = (Mat_MPIAIJ*)(*mat)->data;
3436 
3437   (*mat)->preallocated = PETSC_TRUE;
3438 
3439   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3440   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3441 
3442   /* Set A as diagonal portion of *mat */
3443   maij->A = A;
3444 
3445   nz = oi[m];
3446   for (i=0; i<nz; i++) {
3447     col   = oj[i];
3448     oj[i] = garray[col];
3449   }
3450 
3451   /* Set Bnew as off-diagonal portion of *mat */
3452   PetscCall(MatSeqAIJGetArrayRead(B,&oa));
3453   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew));
3454   PetscCall(MatSeqAIJRestoreArrayRead(B,&oa));
3455   bnew        = (Mat_SeqAIJ*)Bnew->data;
3456   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3457   maij->B     = Bnew;
3458 
3459   PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N);
3460 
3461   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3462   b->free_a       = PETSC_FALSE;
3463   b->free_ij      = PETSC_FALSE;
3464   PetscCall(MatDestroy(&B));
3465 
3466   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3467   bnew->free_a       = PETSC_TRUE;
3468   bnew->free_ij      = PETSC_TRUE;
3469 
3470   /* condense columns of maij->B */
3471   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
3472   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
3473   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
3474   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
3475   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3476   PetscFunctionReturn(0);
3477 }
3478 
3479 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3480 
3481 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3482 {
3483   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3484   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3485   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3486   Mat            M,Msub,B=a->B;
3487   MatScalar      *aa;
3488   Mat_SeqAIJ     *aij;
3489   PetscInt       *garray = a->garray,*colsub,Ncols;
3490   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3491   IS             iscol_sub,iscmap;
3492   const PetscInt *is_idx,*cmap;
3493   PetscBool      allcolumns=PETSC_FALSE;
3494   MPI_Comm       comm;
3495 
3496   PetscFunctionBegin;
3497   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3498   if (call == MAT_REUSE_MATRIX) {
3499     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3500     PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3501     PetscCall(ISGetLocalSize(iscol_sub,&count));
3502 
3503     PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap));
3504     PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3505 
3506     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub));
3507     PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3508 
3509     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub));
3510 
3511   } else { /* call == MAT_INITIAL_MATRIX) */
3512     PetscBool flg;
3513 
3514     PetscCall(ISGetLocalSize(iscol,&n));
3515     PetscCall(ISGetSize(iscol,&Ncols));
3516 
3517     /* (1) iscol -> nonscalable iscol_local */
3518     /* Check for special case: each processor gets entire matrix columns */
3519     PetscCall(ISIdentity(iscol_local,&flg));
3520     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3521     PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3522     if (allcolumns) {
3523       iscol_sub = iscol_local;
3524       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3525       PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap));
3526 
3527     } else {
3528       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3529       PetscInt *idx,*cmap1,k;
3530       PetscCall(PetscMalloc1(Ncols,&idx));
3531       PetscCall(PetscMalloc1(Ncols,&cmap1));
3532       PetscCall(ISGetIndices(iscol_local,&is_idx));
3533       count = 0;
3534       k     = 0;
3535       for (i=0; i<Ncols; i++) {
3536         j = is_idx[i];
3537         if (j >= cstart && j < cend) {
3538           /* diagonal part of mat */
3539           idx[count]     = j;
3540           cmap1[count++] = i; /* column index in submat */
3541         } else if (Bn) {
3542           /* off-diagonal part of mat */
3543           if (j == garray[k]) {
3544             idx[count]     = j;
3545             cmap1[count++] = i;  /* column index in submat */
3546           } else if (j > garray[k]) {
3547             while (j > garray[k] && k < Bn-1) k++;
3548             if (j == garray[k]) {
3549               idx[count]     = j;
3550               cmap1[count++] = i; /* column index in submat */
3551             }
3552           }
3553         }
3554       }
3555       PetscCall(ISRestoreIndices(iscol_local,&is_idx));
3556 
3557       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub));
3558       PetscCall(ISGetBlockSize(iscol,&cbs));
3559       PetscCall(ISSetBlockSize(iscol_sub,cbs));
3560 
3561       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap));
3562     }
3563 
3564     /* (3) Create sequential Msub */
3565     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub));
3566   }
3567 
3568   PetscCall(ISGetLocalSize(iscol_sub,&count));
3569   aij  = (Mat_SeqAIJ*)(Msub)->data;
3570   ii   = aij->i;
3571   PetscCall(ISGetIndices(iscmap,&cmap));
3572 
3573   /*
3574       m - number of local rows
3575       Ncols - number of columns (same on all processors)
3576       rstart - first row in new global matrix generated
3577   */
3578   PetscCall(MatGetSize(Msub,&m,NULL));
3579 
3580   if (call == MAT_INITIAL_MATRIX) {
3581     /* (4) Create parallel newmat */
3582     PetscMPIInt    rank,size;
3583     PetscInt       csize;
3584 
3585     PetscCallMPI(MPI_Comm_size(comm,&size));
3586     PetscCallMPI(MPI_Comm_rank(comm,&rank));
3587 
3588     /*
3589         Determine the number of non-zeros in the diagonal and off-diagonal
3590         portions of the matrix in order to do correct preallocation
3591     */
3592 
3593     /* first get start and end of "diagonal" columns */
3594     PetscCall(ISGetLocalSize(iscol,&csize));
3595     if (csize == PETSC_DECIDE) {
3596       PetscCall(ISGetSize(isrow,&mglobal));
3597       if (mglobal == Ncols) { /* square matrix */
3598         nlocal = m;
3599       } else {
3600         nlocal = Ncols/size + ((Ncols % size) > rank);
3601       }
3602     } else {
3603       nlocal = csize;
3604     }
3605     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3606     rstart = rend - nlocal;
3607     PetscCheck(rank != size - 1 || rend == Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols);
3608 
3609     /* next, compute all the lengths */
3610     jj    = aij->j;
3611     PetscCall(PetscMalloc1(2*m+1,&dlens));
3612     olens = dlens + m;
3613     for (i=0; i<m; i++) {
3614       jend = ii[i+1] - ii[i];
3615       olen = 0;
3616       dlen = 0;
3617       for (j=0; j<jend; j++) {
3618         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3619         else dlen++;
3620         jj++;
3621       }
3622       olens[i] = olen;
3623       dlens[i] = dlen;
3624     }
3625 
3626     PetscCall(ISGetBlockSize(isrow,&bs));
3627     PetscCall(ISGetBlockSize(iscol,&cbs));
3628 
3629     PetscCall(MatCreate(comm,&M));
3630     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols));
3631     PetscCall(MatSetBlockSizes(M,bs,cbs));
3632     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3633     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3634     PetscCall(PetscFree(dlens));
3635 
3636   } else { /* call == MAT_REUSE_MATRIX */
3637     M    = *newmat;
3638     PetscCall(MatGetLocalSize(M,&i,NULL));
3639     PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3640     PetscCall(MatZeroEntries(M));
3641     /*
3642          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3643        rather than the slower MatSetValues().
3644     */
3645     M->was_assembled = PETSC_TRUE;
3646     M->assembled     = PETSC_FALSE;
3647   }
3648 
3649   /* (5) Set values of Msub to *newmat */
3650   PetscCall(PetscMalloc1(count,&colsub));
3651   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
3652 
3653   jj   = aij->j;
3654   PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa));
3655   for (i=0; i<m; i++) {
3656     row = rstart + i;
3657     nz  = ii[i+1] - ii[i];
3658     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3659     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES));
3660     jj += nz; aa += nz;
3661   }
3662   PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa));
3663   PetscCall(ISRestoreIndices(iscmap,&cmap));
3664 
3665   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3666   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3667 
3668   PetscCall(PetscFree(colsub));
3669 
3670   /* save Msub, iscol_sub and iscmap used in processor for next request */
3671   if (call == MAT_INITIAL_MATRIX) {
3672     *newmat = M;
3673     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub));
3674     PetscCall(MatDestroy(&Msub));
3675 
3676     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub));
3677     PetscCall(ISDestroy(&iscol_sub));
3678 
3679     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap));
3680     PetscCall(ISDestroy(&iscmap));
3681 
3682     if (iscol_local) {
3683       PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local));
3684       PetscCall(ISDestroy(&iscol_local));
3685     }
3686   }
3687   PetscFunctionReturn(0);
3688 }
3689 
3690 /*
3691     Not great since it makes two copies of the submatrix, first an SeqAIJ
3692   in local and then by concatenating the local matrices the end result.
3693   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3694 
3695   Note: This requires a sequential iscol with all indices.
3696 */
3697 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3698 {
3699   PetscMPIInt    rank,size;
3700   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3701   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3702   Mat            M,Mreuse;
3703   MatScalar      *aa,*vwork;
3704   MPI_Comm       comm;
3705   Mat_SeqAIJ     *aij;
3706   PetscBool      colflag,allcolumns=PETSC_FALSE;
3707 
3708   PetscFunctionBegin;
3709   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3710   PetscCallMPI(MPI_Comm_rank(comm,&rank));
3711   PetscCallMPI(MPI_Comm_size(comm,&size));
3712 
3713   /* Check for special case: each processor gets entire matrix columns */
3714   PetscCall(ISIdentity(iscol,&colflag));
3715   PetscCall(ISGetLocalSize(iscol,&n));
3716   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3717   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3718 
3719   if (call ==  MAT_REUSE_MATRIX) {
3720     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse));
3721     PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3722     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse));
3723   } else {
3724     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse));
3725   }
3726 
3727   /*
3728       m - number of local rows
3729       n - number of columns (same on all processors)
3730       rstart - first row in new global matrix generated
3731   */
3732   PetscCall(MatGetSize(Mreuse,&m,&n));
3733   PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs));
3734   if (call == MAT_INITIAL_MATRIX) {
3735     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3736     ii  = aij->i;
3737     jj  = aij->j;
3738 
3739     /*
3740         Determine the number of non-zeros in the diagonal and off-diagonal
3741         portions of the matrix in order to do correct preallocation
3742     */
3743 
3744     /* first get start and end of "diagonal" columns */
3745     if (csize == PETSC_DECIDE) {
3746       PetscCall(ISGetSize(isrow,&mglobal));
3747       if (mglobal == n) { /* square matrix */
3748         nlocal = m;
3749       } else {
3750         nlocal = n/size + ((n % size) > rank);
3751       }
3752     } else {
3753       nlocal = csize;
3754     }
3755     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3756     rstart = rend - nlocal;
3757     PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
3758 
3759     /* next, compute all the lengths */
3760     PetscCall(PetscMalloc1(2*m+1,&dlens));
3761     olens = dlens + m;
3762     for (i=0; i<m; i++) {
3763       jend = ii[i+1] - ii[i];
3764       olen = 0;
3765       dlen = 0;
3766       for (j=0; j<jend; j++) {
3767         if (*jj < rstart || *jj >= rend) olen++;
3768         else dlen++;
3769         jj++;
3770       }
3771       olens[i] = olen;
3772       dlens[i] = dlen;
3773     }
3774     PetscCall(MatCreate(comm,&M));
3775     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n));
3776     PetscCall(MatSetBlockSizes(M,bs,cbs));
3777     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3778     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3779     PetscCall(PetscFree(dlens));
3780   } else {
3781     PetscInt ml,nl;
3782 
3783     M    = *newmat;
3784     PetscCall(MatGetLocalSize(M,&ml,&nl));
3785     PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3786     PetscCall(MatZeroEntries(M));
3787     /*
3788          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3789        rather than the slower MatSetValues().
3790     */
3791     M->was_assembled = PETSC_TRUE;
3792     M->assembled     = PETSC_FALSE;
3793   }
3794   PetscCall(MatGetOwnershipRange(M,&rstart,&rend));
3795   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3796   ii   = aij->i;
3797   jj   = aij->j;
3798 
3799   /* trigger copy to CPU if needed */
3800   PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa));
3801   for (i=0; i<m; i++) {
3802     row   = rstart + i;
3803     nz    = ii[i+1] - ii[i];
3804     cwork = jj; jj += nz;
3805     vwork = aa; aa += nz;
3806     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES));
3807   }
3808   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa));
3809 
3810   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3811   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3812   *newmat = M;
3813 
3814   /* save submatrix used in processor for next request */
3815   if (call ==  MAT_INITIAL_MATRIX) {
3816     PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse));
3817     PetscCall(MatDestroy(&Mreuse));
3818   }
3819   PetscFunctionReturn(0);
3820 }
3821 
3822 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3823 {
3824   PetscInt       m,cstart, cend,j,nnz,i,d,*ld;
3825   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3826   const PetscInt *JJ;
3827   PetscBool      nooffprocentries;
3828   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)B->data;
3829 
3830   PetscFunctionBegin;
3831   PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]);
3832 
3833   PetscCall(PetscLayoutSetUp(B->rmap));
3834   PetscCall(PetscLayoutSetUp(B->cmap));
3835   m      = B->rmap->n;
3836   cstart = B->cmap->rstart;
3837   cend   = B->cmap->rend;
3838   rstart = B->rmap->rstart;
3839 
3840   PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz));
3841 
3842   if (PetscDefined(USE_DEBUG)) {
3843     for (i=0; i<m; i++) {
3844       nnz = Ii[i+1]- Ii[i];
3845       JJ  = J + Ii[i];
3846       PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz);
3847       PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]);
3848       PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N);
3849     }
3850   }
3851 
3852   for (i=0; i<m; i++) {
3853     nnz     = Ii[i+1]- Ii[i];
3854     JJ      = J + Ii[i];
3855     nnz_max = PetscMax(nnz_max,nnz);
3856     d       = 0;
3857     for (j=0; j<nnz; j++) {
3858       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3859     }
3860     d_nnz[i] = d;
3861     o_nnz[i] = nnz - d;
3862   }
3863   PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
3864   PetscCall(PetscFree2(d_nnz,o_nnz));
3865 
3866   for (i=0; i<m; i++) {
3867     ii   = i + rstart;
3868     PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES));
3869   }
3870   nooffprocentries    = B->nooffprocentries;
3871   B->nooffprocentries = PETSC_TRUE;
3872   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
3873   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3874   B->nooffprocentries = nooffprocentries;
3875 
3876   /* count number of entries below block diagonal */
3877   PetscCall(PetscFree(Aij->ld));
3878   PetscCall(PetscCalloc1(m,&ld));
3879   Aij->ld = ld;
3880   for (i=0; i<m; i++) {
3881     nnz  = Ii[i+1] - Ii[i];
3882     j     = 0;
3883     while  (j < nnz && J[j] < cstart) {j++;}
3884     ld[i] = j;
3885     J     += nnz;
3886   }
3887 
3888   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3889   PetscFunctionReturn(0);
3890 }
3891 
3892 /*@
3893    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3894    (the default parallel PETSc format).
3895 
3896    Collective
3897 
3898    Input Parameters:
3899 +  B - the matrix
3900 .  i - the indices into j for the start of each local row (starts with zero)
3901 .  j - the column indices for each local row (starts with zero)
3902 -  v - optional values in the matrix
3903 
3904    Level: developer
3905 
3906    Notes:
3907        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3908      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3909      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3910 
3911        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3912 
3913        The format which is used for the sparse matrix input, is equivalent to a
3914     row-major ordering.. i.e for the following matrix, the input data expected is
3915     as shown
3916 
3917 $        1 0 0
3918 $        2 0 3     P0
3919 $       -------
3920 $        4 5 6     P1
3921 $
3922 $     Process0 [P0]: rows_owned=[0,1]
3923 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3924 $        j =  {0,0,2}  [size = 3]
3925 $        v =  {1,2,3}  [size = 3]
3926 $
3927 $     Process1 [P1]: rows_owned=[2]
3928 $        i =  {0,3}    [size = nrow+1  = 1+1]
3929 $        j =  {0,1,2}  [size = 3]
3930 $        v =  {4,5,6}  [size = 3]
3931 
3932 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
3933           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
3934 @*/
3935 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3936 {
3937   PetscFunctionBegin;
3938   PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3939   PetscFunctionReturn(0);
3940 }
3941 
3942 /*@C
3943    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3944    (the default parallel PETSc format).  For good matrix assembly performance
3945    the user should preallocate the matrix storage by setting the parameters
3946    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3947    performance can be increased by more than a factor of 50.
3948 
3949    Collective
3950 
3951    Input Parameters:
3952 +  B - the matrix
3953 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3954            (same value is used for all local rows)
3955 .  d_nnz - array containing the number of nonzeros in the various rows of the
3956            DIAGONAL portion of the local submatrix (possibly different for each row)
3957            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3958            The size of this array is equal to the number of local rows, i.e 'm'.
3959            For matrices that will be factored, you must leave room for (and set)
3960            the diagonal entry even if it is zero.
3961 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3962            submatrix (same value is used for all local rows).
3963 -  o_nnz - array containing the number of nonzeros in the various rows of the
3964            OFF-DIAGONAL portion of the local submatrix (possibly different for
3965            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3966            structure. The size of this array is equal to the number
3967            of local rows, i.e 'm'.
3968 
3969    If the *_nnz parameter is given then the *_nz parameter is ignored
3970 
3971    The AIJ format (also called the Yale sparse matrix format or
3972    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3973    storage.  The stored row and column indices begin with zero.
3974    See Users-Manual: ch_mat for details.
3975 
3976    The parallel matrix is partitioned such that the first m0 rows belong to
3977    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3978    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3979 
3980    The DIAGONAL portion of the local submatrix of a processor can be defined
3981    as the submatrix which is obtained by extraction the part corresponding to
3982    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3983    first row that belongs to the processor, r2 is the last row belonging to
3984    the this processor, and c1-c2 is range of indices of the local part of a
3985    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3986    common case of a square matrix, the row and column ranges are the same and
3987    the DIAGONAL part is also square. The remaining portion of the local
3988    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3989 
3990    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3991 
3992    You can call MatGetInfo() to get information on how effective the preallocation was;
3993    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3994    You can also run with the option -info and look for messages with the string
3995    malloc in them to see if additional memory allocation was needed.
3996 
3997    Example usage:
3998 
3999    Consider the following 8x8 matrix with 34 non-zero values, that is
4000    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4001    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4002    as follows:
4003 
4004 .vb
4005             1  2  0  |  0  3  0  |  0  4
4006     Proc0   0  5  6  |  7  0  0  |  8  0
4007             9  0 10  | 11  0  0  | 12  0
4008     -------------------------------------
4009            13  0 14  | 15 16 17  |  0  0
4010     Proc1   0 18  0  | 19 20 21  |  0  0
4011             0  0  0  | 22 23  0  | 24  0
4012     -------------------------------------
4013     Proc2  25 26 27  |  0  0 28  | 29  0
4014            30  0  0  | 31 32 33  |  0 34
4015 .ve
4016 
4017    This can be represented as a collection of submatrices as:
4018 
4019 .vb
4020       A B C
4021       D E F
4022       G H I
4023 .ve
4024 
4025    Where the submatrices A,B,C are owned by proc0, D,E,F are
4026    owned by proc1, G,H,I are owned by proc2.
4027 
4028    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4029    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4030    The 'M','N' parameters are 8,8, and have the same values on all procs.
4031 
4032    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4033    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4034    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4035    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4036    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4037    matrix, ans [DF] as another SeqAIJ matrix.
4038 
4039    When d_nz, o_nz parameters are specified, d_nz storage elements are
4040    allocated for every row of the local diagonal submatrix, and o_nz
4041    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4042    One way to choose d_nz and o_nz is to use the max nonzerors per local
4043    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4044    In this case, the values of d_nz,o_nz are:
4045 .vb
4046      proc0 : dnz = 2, o_nz = 2
4047      proc1 : dnz = 3, o_nz = 2
4048      proc2 : dnz = 1, o_nz = 4
4049 .ve
4050    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4051    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4052    for proc3. i.e we are using 12+15+10=37 storage locations to store
4053    34 values.
4054 
4055    When d_nnz, o_nnz parameters are specified, the storage is specified
4056    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4057    In the above case the values for d_nnz,o_nnz are:
4058 .vb
4059      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4060      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4061      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4062 .ve
4063    Here the space allocated is sum of all the above values i.e 34, and
4064    hence pre-allocation is perfect.
4065 
4066    Level: intermediate
4067 
4068 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4069           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4070 @*/
4071 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4072 {
4073   PetscFunctionBegin;
4074   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4075   PetscValidType(B,1);
4076   PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
4077   PetscFunctionReturn(0);
4078 }
4079 
4080 /*@
4081      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4082          CSR format for the local rows.
4083 
4084    Collective
4085 
4086    Input Parameters:
4087 +  comm - MPI communicator
4088 .  m - number of local rows (Cannot be PETSC_DECIDE)
4089 .  n - This value should be the same as the local size used in creating the
4090        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4091        calculated if N is given) For square matrices n is almost always m.
4092 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4093 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4094 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4095 .   j - column indices
4096 -   a - matrix values
4097 
4098    Output Parameter:
4099 .   mat - the matrix
4100 
4101    Level: intermediate
4102 
4103    Notes:
4104        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4105      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4106      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4107 
4108        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4109 
4110        The format which is used for the sparse matrix input, is equivalent to a
4111     row-major ordering.. i.e for the following matrix, the input data expected is
4112     as shown
4113 
4114        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4115 
4116 $        1 0 0
4117 $        2 0 3     P0
4118 $       -------
4119 $        4 5 6     P1
4120 $
4121 $     Process0 [P0]: rows_owned=[0,1]
4122 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4123 $        j =  {0,0,2}  [size = 3]
4124 $        v =  {1,2,3}  [size = 3]
4125 $
4126 $     Process1 [P1]: rows_owned=[2]
4127 $        i =  {0,3}    [size = nrow+1  = 1+1]
4128 $        j =  {0,1,2}  [size = 3]
4129 $        v =  {4,5,6}  [size = 3]
4130 
4131 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4132           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4133 @*/
4134 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4135 {
4136   PetscFunctionBegin;
4137   PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4138   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4139   PetscCall(MatCreate(comm,mat));
4140   PetscCall(MatSetSizes(*mat,m,n,M,N));
4141   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4142   PetscCall(MatSetType(*mat,MATMPIAIJ));
4143   PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a));
4144   PetscFunctionReturn(0);
4145 }
4146 
4147 /*@
4148      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4149          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from MatCreateMPIAIJWithArrays()
4150 
4151      Deprecated: Use `MatUpdateMPIAIJWithArray()`
4152 
4153    Collective
4154 
4155    Input Parameters:
4156 +  mat - the matrix
4157 .  m - number of local rows (Cannot be PETSC_DECIDE)
4158 .  n - This value should be the same as the local size used in creating the
4159        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4160        calculated if N is given) For square matrices n is almost always m.
4161 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4162 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4163 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4164 .  J - column indices
4165 -  v - matrix values
4166 
4167    Level: intermediate
4168 
4169 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4170           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()`
4171 @*/
4172 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4173 {
4174   PetscInt       nnz,i;
4175   PetscBool      nooffprocentries;
4176   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4177   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4178   PetscScalar    *ad,*ao;
4179   PetscInt       ldi,Iii,md;
4180   const PetscInt *Adi = Ad->i;
4181   PetscInt       *ld = Aij->ld;
4182 
4183   PetscFunctionBegin;
4184   PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4185   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4186   PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4187   PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4188 
4189   PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad));
4190   PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao));
4191 
4192   for (i=0; i<m; i++) {
4193     nnz  = Ii[i+1]- Ii[i];
4194     Iii  = Ii[i];
4195     ldi  = ld[i];
4196     md   = Adi[i+1]-Adi[i];
4197     PetscCall(PetscArraycpy(ao,v + Iii,ldi));
4198     PetscCall(PetscArraycpy(ad,v + Iii + ldi,md));
4199     PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md));
4200     ad  += md;
4201     ao  += nnz - md;
4202   }
4203   nooffprocentries      = mat->nooffprocentries;
4204   mat->nooffprocentries = PETSC_TRUE;
4205   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad));
4206   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao));
4207   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4208   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4209   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4210   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
4211   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
4212   mat->nooffprocentries = nooffprocentries;
4213   PetscFunctionReturn(0);
4214 }
4215 
4216 /*@
4217      MatUpdateMPIAIJWithArray - updates an MPI AIJ matrix using an array that contains the nonzero values
4218 
4219    Collective
4220 
4221    Input Parameters:
4222 +  mat - the matrix
4223 -  v - matrix values, stored by row
4224 
4225    Level: intermediate
4226 
4227    Notes:
4228    The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4229 
4230 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4231           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()`
4232 @*/
4233 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat,const PetscScalar v[])
4234 {
4235   PetscInt       nnz,i,m;
4236   PetscBool      nooffprocentries;
4237   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4238   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4239   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4240   PetscScalar    *ad,*ao;
4241   const PetscInt *Adi = Ad->i,*Adj = Ao->i;
4242   PetscInt       ldi,Iii,md;
4243   PetscInt       *ld = Aij->ld;
4244 
4245   PetscFunctionBegin;
4246   m = mat->rmap->n;
4247 
4248   PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad));
4249   PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao));
4250   Iii = 0;
4251   for (i=0; i<m; i++) {
4252     nnz  = Adi[i+1]-Adi[i] + Adj[i+1]-Adj[i];
4253     ldi  = ld[i];
4254     md   = Adi[i+1]-Adi[i];
4255     PetscCall(PetscArraycpy(ao,v + Iii,ldi));
4256     PetscCall(PetscArraycpy(ad,v + Iii + ldi,md));
4257     PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md));
4258     ad  += md;
4259     ao  += nnz - md;
4260     Iii += nnz;
4261   }
4262   nooffprocentries      = mat->nooffprocentries;
4263   mat->nooffprocentries = PETSC_TRUE;
4264   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad));
4265   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao));
4266   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4267   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4268   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4269   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
4270   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
4271   mat->nooffprocentries = nooffprocentries;
4272   PetscFunctionReturn(0);
4273 }
4274 
4275 /*@C
4276    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4277    (the default parallel PETSc format).  For good matrix assembly performance
4278    the user should preallocate the matrix storage by setting the parameters
4279    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4280    performance can be increased by more than a factor of 50.
4281 
4282    Collective
4283 
4284    Input Parameters:
4285 +  comm - MPI communicator
4286 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4287            This value should be the same as the local size used in creating the
4288            y vector for the matrix-vector product y = Ax.
4289 .  n - This value should be the same as the local size used in creating the
4290        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4291        calculated if N is given) For square matrices n is almost always m.
4292 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4293 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4294 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4295            (same value is used for all local rows)
4296 .  d_nnz - array containing the number of nonzeros in the various rows of the
4297            DIAGONAL portion of the local submatrix (possibly different for each row)
4298            or NULL, if d_nz is used to specify the nonzero structure.
4299            The size of this array is equal to the number of local rows, i.e 'm'.
4300 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4301            submatrix (same value is used for all local rows).
4302 -  o_nnz - array containing the number of nonzeros in the various rows of the
4303            OFF-DIAGONAL portion of the local submatrix (possibly different for
4304            each row) or NULL, if o_nz is used to specify the nonzero
4305            structure. The size of this array is equal to the number
4306            of local rows, i.e 'm'.
4307 
4308    Output Parameter:
4309 .  A - the matrix
4310 
4311    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4312    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4313    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4314 
4315    Notes:
4316    If the *_nnz parameter is given then the *_nz parameter is ignored
4317 
4318    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4319    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4320    storage requirements for this matrix.
4321 
4322    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4323    processor than it must be used on all processors that share the object for
4324    that argument.
4325 
4326    The user MUST specify either the local or global matrix dimensions
4327    (possibly both).
4328 
4329    The parallel matrix is partitioned across processors such that the
4330    first m0 rows belong to process 0, the next m1 rows belong to
4331    process 1, the next m2 rows belong to process 2 etc.. where
4332    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4333    values corresponding to [m x N] submatrix.
4334 
4335    The columns are logically partitioned with the n0 columns belonging
4336    to 0th partition, the next n1 columns belonging to the next
4337    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4338 
4339    The DIAGONAL portion of the local submatrix on any given processor
4340    is the submatrix corresponding to the rows and columns m,n
4341    corresponding to the given processor. i.e diagonal matrix on
4342    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4343    etc. The remaining portion of the local submatrix [m x (N-n)]
4344    constitute the OFF-DIAGONAL portion. The example below better
4345    illustrates this concept.
4346 
4347    For a square global matrix we define each processor's diagonal portion
4348    to be its local rows and the corresponding columns (a square submatrix);
4349    each processor's off-diagonal portion encompasses the remainder of the
4350    local matrix (a rectangular submatrix).
4351 
4352    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4353 
4354    When calling this routine with a single process communicator, a matrix of
4355    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4356    type of communicator, use the construction mechanism
4357 .vb
4358      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4359 .ve
4360 
4361 $     MatCreate(...,&A);
4362 $     MatSetType(A,MATMPIAIJ);
4363 $     MatSetSizes(A, m,n,M,N);
4364 $     MatMPIAIJSetPreallocation(A,...);
4365 
4366    By default, this format uses inodes (identical nodes) when possible.
4367    We search for consecutive rows with the same nonzero structure, thereby
4368    reusing matrix information to achieve increased efficiency.
4369 
4370    Options Database Keys:
4371 +  -mat_no_inode  - Do not use inodes
4372 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4373 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
4374         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4375         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.
4376 
4377    Example usage:
4378 
4379    Consider the following 8x8 matrix with 34 non-zero values, that is
4380    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4381    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4382    as follows
4383 
4384 .vb
4385             1  2  0  |  0  3  0  |  0  4
4386     Proc0   0  5  6  |  7  0  0  |  8  0
4387             9  0 10  | 11  0  0  | 12  0
4388     -------------------------------------
4389            13  0 14  | 15 16 17  |  0  0
4390     Proc1   0 18  0  | 19 20 21  |  0  0
4391             0  0  0  | 22 23  0  | 24  0
4392     -------------------------------------
4393     Proc2  25 26 27  |  0  0 28  | 29  0
4394            30  0  0  | 31 32 33  |  0 34
4395 .ve
4396 
4397    This can be represented as a collection of submatrices as
4398 
4399 .vb
4400       A B C
4401       D E F
4402       G H I
4403 .ve
4404 
4405    Where the submatrices A,B,C are owned by proc0, D,E,F are
4406    owned by proc1, G,H,I are owned by proc2.
4407 
4408    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4409    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4410    The 'M','N' parameters are 8,8, and have the same values on all procs.
4411 
4412    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4413    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4414    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4415    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4416    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4417    matrix, ans [DF] as another SeqAIJ matrix.
4418 
4419    When d_nz, o_nz parameters are specified, d_nz storage elements are
4420    allocated for every row of the local diagonal submatrix, and o_nz
4421    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4422    One way to choose d_nz and o_nz is to use the max nonzerors per local
4423    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4424    In this case, the values of d_nz,o_nz are
4425 .vb
4426      proc0 : dnz = 2, o_nz = 2
4427      proc1 : dnz = 3, o_nz = 2
4428      proc2 : dnz = 1, o_nz = 4
4429 .ve
4430    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4431    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4432    for proc3. i.e we are using 12+15+10=37 storage locations to store
4433    34 values.
4434 
4435    When d_nnz, o_nnz parameters are specified, the storage is specified
4436    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4437    In the above case the values for d_nnz,o_nnz are
4438 .vb
4439      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4440      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4441      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4442 .ve
4443    Here the space allocated is sum of all the above values i.e 34, and
4444    hence pre-allocation is perfect.
4445 
4446    Level: intermediate
4447 
4448 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4449           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4450 @*/
4451 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4452 {
4453   PetscMPIInt    size;
4454 
4455   PetscFunctionBegin;
4456   PetscCall(MatCreate(comm,A));
4457   PetscCall(MatSetSizes(*A,m,n,M,N));
4458   PetscCallMPI(MPI_Comm_size(comm,&size));
4459   if (size > 1) {
4460     PetscCall(MatSetType(*A,MATMPIAIJ));
4461     PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz));
4462   } else {
4463     PetscCall(MatSetType(*A,MATSEQAIJ));
4464     PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz));
4465   }
4466   PetscFunctionReturn(0);
4467 }
4468 
4469 /*@C
4470   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4471 
4472   Not collective
4473 
4474   Input Parameter:
4475 . A - The MPIAIJ matrix
4476 
4477   Output Parameters:
4478 + Ad - The local diagonal block as a SeqAIJ matrix
4479 . Ao - The local off-diagonal block as a SeqAIJ matrix
4480 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4481 
4482   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4483   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4484   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4485   local column numbers to global column numbers in the original matrix.
4486 
4487   Level: intermediate
4488 
4489 .seealso: `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4490 @*/
4491 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4492 {
4493   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4494   PetscBool      flg;
4495 
4496   PetscFunctionBegin;
4497   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg));
4498   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4499   if (Ad)     *Ad     = a->A;
4500   if (Ao)     *Ao     = a->B;
4501   if (colmap) *colmap = a->garray;
4502   PetscFunctionReturn(0);
4503 }
4504 
4505 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4506 {
4507   PetscInt       m,N,i,rstart,nnz,Ii;
4508   PetscInt       *indx;
4509   PetscScalar    *values;
4510   MatType        rootType;
4511 
4512   PetscFunctionBegin;
4513   PetscCall(MatGetSize(inmat,&m,&N));
4514   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4515     PetscInt       *dnz,*onz,sum,bs,cbs;
4516 
4517     if (n == PETSC_DECIDE) {
4518       PetscCall(PetscSplitOwnership(comm,&n,&N));
4519     }
4520     /* Check sum(n) = N */
4521     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
4522     PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
4523 
4524     PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm));
4525     rstart -= m;
4526 
4527     MatPreallocateBegin(comm,m,n,dnz,onz);
4528     for (i=0; i<m; i++) {
4529       PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4530       PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz));
4531       PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4532     }
4533 
4534     PetscCall(MatCreate(comm,outmat));
4535     PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4536     PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
4537     PetscCall(MatSetBlockSizes(*outmat,bs,cbs));
4538     PetscCall(MatGetRootType_Private(inmat,&rootType));
4539     PetscCall(MatSetType(*outmat,rootType));
4540     PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz));
4541     PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz));
4542     MatPreallocateEnd(dnz,onz);
4543     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
4544   }
4545 
4546   /* numeric phase */
4547   PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL));
4548   for (i=0; i<m; i++) {
4549     PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4550     Ii   = i + rstart;
4551     PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
4552     PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4553   }
4554   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
4555   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
4556   PetscFunctionReturn(0);
4557 }
4558 
4559 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4560 {
4561   PetscMPIInt       rank;
4562   PetscInt          m,N,i,rstart,nnz;
4563   size_t            len;
4564   const PetscInt    *indx;
4565   PetscViewer       out;
4566   char              *name;
4567   Mat               B;
4568   const PetscScalar *values;
4569 
4570   PetscFunctionBegin;
4571   PetscCall(MatGetLocalSize(A,&m,NULL));
4572   PetscCall(MatGetSize(A,NULL,&N));
4573   /* Should this be the type of the diagonal block of A? */
4574   PetscCall(MatCreate(PETSC_COMM_SELF,&B));
4575   PetscCall(MatSetSizes(B,m,N,m,N));
4576   PetscCall(MatSetBlockSizesFromMats(B,A,A));
4577   PetscCall(MatSetType(B,MATSEQAIJ));
4578   PetscCall(MatSeqAIJSetPreallocation(B,0,NULL));
4579   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
4580   for (i=0; i<m; i++) {
4581     PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values));
4582     PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES));
4583     PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values));
4584   }
4585   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
4586   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
4587 
4588   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
4589   PetscCall(PetscStrlen(outfile,&len));
4590   PetscCall(PetscMalloc1(len+6,&name));
4591   PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank));
4592   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out));
4593   PetscCall(PetscFree(name));
4594   PetscCall(MatView(B,out));
4595   PetscCall(PetscViewerDestroy(&out));
4596   PetscCall(MatDestroy(&B));
4597   PetscFunctionReturn(0);
4598 }
4599 
4600 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4601 {
4602   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4603 
4604   PetscFunctionBegin;
4605   if (!merge) PetscFunctionReturn(0);
4606   PetscCall(PetscFree(merge->id_r));
4607   PetscCall(PetscFree(merge->len_s));
4608   PetscCall(PetscFree(merge->len_r));
4609   PetscCall(PetscFree(merge->bi));
4610   PetscCall(PetscFree(merge->bj));
4611   PetscCall(PetscFree(merge->buf_ri[0]));
4612   PetscCall(PetscFree(merge->buf_ri));
4613   PetscCall(PetscFree(merge->buf_rj[0]));
4614   PetscCall(PetscFree(merge->buf_rj));
4615   PetscCall(PetscFree(merge->coi));
4616   PetscCall(PetscFree(merge->coj));
4617   PetscCall(PetscFree(merge->owners_co));
4618   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4619   PetscCall(PetscFree(merge));
4620   PetscFunctionReturn(0);
4621 }
4622 
4623 #include <../src/mat/utils/freespace.h>
4624 #include <petscbt.h>
4625 
4626 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4627 {
4628   MPI_Comm            comm;
4629   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4630   PetscMPIInt         size,rank,taga,*len_s;
4631   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4632   PetscInt            proc,m;
4633   PetscInt            **buf_ri,**buf_rj;
4634   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4635   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4636   MPI_Request         *s_waits,*r_waits;
4637   MPI_Status          *status;
4638   const MatScalar     *aa,*a_a;
4639   MatScalar           **abuf_r,*ba_i;
4640   Mat_Merge_SeqsToMPI *merge;
4641   PetscContainer      container;
4642 
4643   PetscFunctionBegin;
4644   PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm));
4645   PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0));
4646 
4647   PetscCallMPI(MPI_Comm_size(comm,&size));
4648   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4649 
4650   PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container));
4651   PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4652   PetscCall(PetscContainerGetPointer(container,(void**)&merge));
4653   PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a));
4654   aa   = a_a;
4655 
4656   bi     = merge->bi;
4657   bj     = merge->bj;
4658   buf_ri = merge->buf_ri;
4659   buf_rj = merge->buf_rj;
4660 
4661   PetscCall(PetscMalloc1(size,&status));
4662   owners = merge->rowmap->range;
4663   len_s  = merge->len_s;
4664 
4665   /* send and recv matrix values */
4666   /*-----------------------------*/
4667   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga));
4668   PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits));
4669 
4670   PetscCall(PetscMalloc1(merge->nsend+1,&s_waits));
4671   for (proc=0,k=0; proc<size; proc++) {
4672     if (!len_s[proc]) continue;
4673     i    = owners[proc];
4674     PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k));
4675     k++;
4676   }
4677 
4678   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status));
4679   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status));
4680   PetscCall(PetscFree(status));
4681 
4682   PetscCall(PetscFree(s_waits));
4683   PetscCall(PetscFree(r_waits));
4684 
4685   /* insert mat values of mpimat */
4686   /*----------------------------*/
4687   PetscCall(PetscMalloc1(N,&ba_i));
4688   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4689 
4690   for (k=0; k<merge->nrecv; k++) {
4691     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4692     nrows       = *(buf_ri_k[k]);
4693     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4694     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4695   }
4696 
4697   /* set values of ba */
4698   m    = merge->rowmap->n;
4699   for (i=0; i<m; i++) {
4700     arow = owners[rank] + i;
4701     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4702     bnzi = bi[i+1] - bi[i];
4703     PetscCall(PetscArrayzero(ba_i,bnzi));
4704 
4705     /* add local non-zero vals of this proc's seqmat into ba */
4706     anzi   = ai[arow+1] - ai[arow];
4707     aj     = a->j + ai[arow];
4708     aa     = a_a + ai[arow];
4709     nextaj = 0;
4710     for (j=0; nextaj<anzi; j++) {
4711       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4712         ba_i[j] += aa[nextaj++];
4713       }
4714     }
4715 
4716     /* add received vals into ba */
4717     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4718       /* i-th row */
4719       if (i == *nextrow[k]) {
4720         anzi   = *(nextai[k]+1) - *nextai[k];
4721         aj     = buf_rj[k] + *(nextai[k]);
4722         aa     = abuf_r[k] + *(nextai[k]);
4723         nextaj = 0;
4724         for (j=0; nextaj<anzi; j++) {
4725           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4726             ba_i[j] += aa[nextaj++];
4727           }
4728         }
4729         nextrow[k]++; nextai[k]++;
4730       }
4731     }
4732     PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES));
4733   }
4734   PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a));
4735   PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY));
4736   PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY));
4737 
4738   PetscCall(PetscFree(abuf_r[0]));
4739   PetscCall(PetscFree(abuf_r));
4740   PetscCall(PetscFree(ba_i));
4741   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4742   PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0));
4743   PetscFunctionReturn(0);
4744 }
4745 
4746 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4747 {
4748   Mat                 B_mpi;
4749   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4750   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4751   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4752   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4753   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4754   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4755   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4756   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4757   MPI_Status          *status;
4758   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4759   PetscBT             lnkbt;
4760   Mat_Merge_SeqsToMPI *merge;
4761   PetscContainer      container;
4762 
4763   PetscFunctionBegin;
4764   PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0));
4765 
4766   /* make sure it is a PETSc comm */
4767   PetscCall(PetscCommDuplicate(comm,&comm,NULL));
4768   PetscCallMPI(MPI_Comm_size(comm,&size));
4769   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4770 
4771   PetscCall(PetscNew(&merge));
4772   PetscCall(PetscMalloc1(size,&status));
4773 
4774   /* determine row ownership */
4775   /*---------------------------------------------------------*/
4776   PetscCall(PetscLayoutCreate(comm,&merge->rowmap));
4777   PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m));
4778   PetscCall(PetscLayoutSetSize(merge->rowmap,M));
4779   PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1));
4780   PetscCall(PetscLayoutSetUp(merge->rowmap));
4781   PetscCall(PetscMalloc1(size,&len_si));
4782   PetscCall(PetscMalloc1(size,&merge->len_s));
4783 
4784   m      = merge->rowmap->n;
4785   owners = merge->rowmap->range;
4786 
4787   /* determine the number of messages to send, their lengths */
4788   /*---------------------------------------------------------*/
4789   len_s = merge->len_s;
4790 
4791   len          = 0; /* length of buf_si[] */
4792   merge->nsend = 0;
4793   for (proc=0; proc<size; proc++) {
4794     len_si[proc] = 0;
4795     if (proc == rank) {
4796       len_s[proc] = 0;
4797     } else {
4798       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4799       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4800     }
4801     if (len_s[proc]) {
4802       merge->nsend++;
4803       nrows = 0;
4804       for (i=owners[proc]; i<owners[proc+1]; i++) {
4805         if (ai[i+1] > ai[i]) nrows++;
4806       }
4807       len_si[proc] = 2*(nrows+1);
4808       len         += len_si[proc];
4809     }
4810   }
4811 
4812   /* determine the number and length of messages to receive for ij-structure */
4813   /*-------------------------------------------------------------------------*/
4814   PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv));
4815   PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri));
4816 
4817   /* post the Irecv of j-structure */
4818   /*-------------------------------*/
4819   PetscCall(PetscCommGetNewTag(comm,&tagj));
4820   PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits));
4821 
4822   /* post the Isend of j-structure */
4823   /*--------------------------------*/
4824   PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits));
4825 
4826   for (proc=0, k=0; proc<size; proc++) {
4827     if (!len_s[proc]) continue;
4828     i    = owners[proc];
4829     PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k));
4830     k++;
4831   }
4832 
4833   /* receives and sends of j-structure are complete */
4834   /*------------------------------------------------*/
4835   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status));
4836   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status));
4837 
4838   /* send and recv i-structure */
4839   /*---------------------------*/
4840   PetscCall(PetscCommGetNewTag(comm,&tagi));
4841   PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits));
4842 
4843   PetscCall(PetscMalloc1(len+1,&buf_s));
4844   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4845   for (proc=0,k=0; proc<size; proc++) {
4846     if (!len_s[proc]) continue;
4847     /* form outgoing message for i-structure:
4848          buf_si[0]:                 nrows to be sent
4849                [1:nrows]:           row index (global)
4850                [nrows+1:2*nrows+1]: i-structure index
4851     */
4852     /*-------------------------------------------*/
4853     nrows       = len_si[proc]/2 - 1;
4854     buf_si_i    = buf_si + nrows+1;
4855     buf_si[0]   = nrows;
4856     buf_si_i[0] = 0;
4857     nrows       = 0;
4858     for (i=owners[proc]; i<owners[proc+1]; i++) {
4859       anzi = ai[i+1] - ai[i];
4860       if (anzi) {
4861         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4862         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4863         nrows++;
4864       }
4865     }
4866     PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k));
4867     k++;
4868     buf_si += len_si[proc];
4869   }
4870 
4871   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status));
4872   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status));
4873 
4874   PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv));
4875   for (i=0; i<merge->nrecv; i++) {
4876     PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));
4877   }
4878 
4879   PetscCall(PetscFree(len_si));
4880   PetscCall(PetscFree(len_ri));
4881   PetscCall(PetscFree(rj_waits));
4882   PetscCall(PetscFree2(si_waits,sj_waits));
4883   PetscCall(PetscFree(ri_waits));
4884   PetscCall(PetscFree(buf_s));
4885   PetscCall(PetscFree(status));
4886 
4887   /* compute a local seq matrix in each processor */
4888   /*----------------------------------------------*/
4889   /* allocate bi array and free space for accumulating nonzero column info */
4890   PetscCall(PetscMalloc1(m+1,&bi));
4891   bi[0] = 0;
4892 
4893   /* create and initialize a linked list */
4894   nlnk = N+1;
4895   PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt));
4896 
4897   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4898   len  = ai[owners[rank+1]] - ai[owners[rank]];
4899   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space));
4900 
4901   current_space = free_space;
4902 
4903   /* determine symbolic info for each local row */
4904   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4905 
4906   for (k=0; k<merge->nrecv; k++) {
4907     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4908     nrows       = *buf_ri_k[k];
4909     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4910     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4911   }
4912 
4913   MatPreallocateBegin(comm,m,n,dnz,onz);
4914   len  = 0;
4915   for (i=0; i<m; i++) {
4916     bnzi = 0;
4917     /* add local non-zero cols of this proc's seqmat into lnk */
4918     arow  = owners[rank] + i;
4919     anzi  = ai[arow+1] - ai[arow];
4920     aj    = a->j + ai[arow];
4921     PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4922     bnzi += nlnk;
4923     /* add received col data into lnk */
4924     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4925       if (i == *nextrow[k]) { /* i-th row */
4926         anzi  = *(nextai[k]+1) - *nextai[k];
4927         aj    = buf_rj[k] + *nextai[k];
4928         PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4929         bnzi += nlnk;
4930         nextrow[k]++; nextai[k]++;
4931       }
4932     }
4933     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4934 
4935     /* if free space is not available, make more free space */
4936     if (current_space->local_remaining<bnzi) {
4937       PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space));
4938       nspacedouble++;
4939     }
4940     /* copy data into free space, then initialize lnk */
4941     PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt));
4942     PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz));
4943 
4944     current_space->array           += bnzi;
4945     current_space->local_used      += bnzi;
4946     current_space->local_remaining -= bnzi;
4947 
4948     bi[i+1] = bi[i] + bnzi;
4949   }
4950 
4951   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4952 
4953   PetscCall(PetscMalloc1(bi[m]+1,&bj));
4954   PetscCall(PetscFreeSpaceContiguous(&free_space,bj));
4955   PetscCall(PetscLLDestroy(lnk,lnkbt));
4956 
4957   /* create symbolic parallel matrix B_mpi */
4958   /*---------------------------------------*/
4959   PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs));
4960   PetscCall(MatCreate(comm,&B_mpi));
4961   if (n==PETSC_DECIDE) {
4962     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N));
4963   } else {
4964     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4965   }
4966   PetscCall(MatSetBlockSizes(B_mpi,bs,cbs));
4967   PetscCall(MatSetType(B_mpi,MATMPIAIJ));
4968   PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz));
4969   MatPreallocateEnd(dnz,onz);
4970   PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE));
4971 
4972   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4973   B_mpi->assembled  = PETSC_FALSE;
4974   merge->bi         = bi;
4975   merge->bj         = bj;
4976   merge->buf_ri     = buf_ri;
4977   merge->buf_rj     = buf_rj;
4978   merge->coi        = NULL;
4979   merge->coj        = NULL;
4980   merge->owners_co  = NULL;
4981 
4982   PetscCall(PetscCommDestroy(&comm));
4983 
4984   /* attach the supporting struct to B_mpi for reuse */
4985   PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container));
4986   PetscCall(PetscContainerSetPointer(container,merge));
4987   PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI));
4988   PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container));
4989   PetscCall(PetscContainerDestroy(&container));
4990   *mpimat = B_mpi;
4991 
4992   PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0));
4993   PetscFunctionReturn(0);
4994 }
4995 
4996 /*@C
4997       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4998                  matrices from each processor
4999 
5000     Collective
5001 
5002    Input Parameters:
5003 +    comm - the communicators the parallel matrix will live on
5004 .    seqmat - the input sequential matrices
5005 .    m - number of local rows (or PETSC_DECIDE)
5006 .    n - number of local columns (or PETSC_DECIDE)
5007 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5008 
5009    Output Parameter:
5010 .    mpimat - the parallel matrix generated
5011 
5012     Level: advanced
5013 
5014    Notes:
5015      The dimensions of the sequential matrix in each processor MUST be the same.
5016      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5017      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5018 @*/
5019 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5020 {
5021   PetscMPIInt    size;
5022 
5023   PetscFunctionBegin;
5024   PetscCallMPI(MPI_Comm_size(comm,&size));
5025   if (size == 1) {
5026     PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
5027     if (scall == MAT_INITIAL_MATRIX) {
5028       PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat));
5029     } else {
5030       PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN));
5031     }
5032     PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
5033     PetscFunctionReturn(0);
5034   }
5035   PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
5036   if (scall == MAT_INITIAL_MATRIX) {
5037     PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat));
5038   }
5039   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat));
5040   PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
5041   PetscFunctionReturn(0);
5042 }
5043 
5044 /*@
5045      MatAIJGetLocalMat - Creates a SeqAIJ from a MATAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5046           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5047           with MatGetSize()
5048 
5049     Not Collective
5050 
5051    Input Parameters:
5052 +    A - the matrix
5053 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5054 
5055    Output Parameter:
5056 .    A_loc - the local sequential matrix generated
5057 
5058     Level: developer
5059 
5060    Notes:
5061      In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix.
5062 
5063      Destroy the matrix with MatDestroy()
5064 
5065 .seealso: MatMPIAIJGetLocalMat()
5066 
5067 @*/
5068 PetscErrorCode MatAIJGetLocalMat(Mat A,Mat *A_loc)
5069 {
5070   PetscBool      mpi;
5071 
5072   PetscFunctionBegin;
5073   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&mpi));
5074   if (mpi) {
5075     PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,A_loc));
5076   } else {
5077     *A_loc = A;
5078     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5079   }
5080   PetscFunctionReturn(0);
5081 }
5082 
5083 /*@
5084      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5085           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5086           with MatGetSize()
5087 
5088     Not Collective
5089 
5090    Input Parameters:
5091 +    A - the matrix
5092 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5093 
5094    Output Parameter:
5095 .    A_loc - the local sequential matrix generated
5096 
5097     Level: developer
5098 
5099    Notes:
5100      In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix.
5101 
5102      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5103      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5104      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5105      modify the values of the returned A_loc.
5106 
5107 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5108 @*/
5109 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5110 {
5111   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5112   Mat_SeqAIJ        *mat,*a,*b;
5113   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5114   const PetscScalar *aa,*ba,*aav,*bav;
5115   PetscScalar       *ca,*cam;
5116   PetscMPIInt       size;
5117   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5118   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5119   PetscBool         match;
5120 
5121   PetscFunctionBegin;
5122   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match));
5123   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5124   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5125   if (size == 1) {
5126     if (scall == MAT_INITIAL_MATRIX) {
5127       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5128       *A_loc = mpimat->A;
5129     } else if (scall == MAT_REUSE_MATRIX) {
5130       PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN));
5131     }
5132     PetscFunctionReturn(0);
5133   }
5134 
5135   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5136   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5137   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5138   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5139   PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav));
5140   PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav));
5141   aa   = aav;
5142   ba   = bav;
5143   if (scall == MAT_INITIAL_MATRIX) {
5144     PetscCall(PetscMalloc1(1+am,&ci));
5145     ci[0] = 0;
5146     for (i=0; i<am; i++) {
5147       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5148     }
5149     PetscCall(PetscMalloc1(1+ci[am],&cj));
5150     PetscCall(PetscMalloc1(1+ci[am],&ca));
5151     k    = 0;
5152     for (i=0; i<am; i++) {
5153       ncols_o = bi[i+1] - bi[i];
5154       ncols_d = ai[i+1] - ai[i];
5155       /* off-diagonal portion of A */
5156       for (jo=0; jo<ncols_o; jo++) {
5157         col = cmap[*bj];
5158         if (col >= cstart) break;
5159         cj[k]   = col; bj++;
5160         ca[k++] = *ba++;
5161       }
5162       /* diagonal portion of A */
5163       for (j=0; j<ncols_d; j++) {
5164         cj[k]   = cstart + *aj++;
5165         ca[k++] = *aa++;
5166       }
5167       /* off-diagonal portion of A */
5168       for (j=jo; j<ncols_o; j++) {
5169         cj[k]   = cmap[*bj++];
5170         ca[k++] = *ba++;
5171       }
5172     }
5173     /* put together the new matrix */
5174     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc));
5175     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5176     /* Since these are PETSc arrays, change flags to free them as necessary. */
5177     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5178     mat->free_a  = PETSC_TRUE;
5179     mat->free_ij = PETSC_TRUE;
5180     mat->nonew   = 0;
5181   } else if (scall == MAT_REUSE_MATRIX) {
5182     mat  =(Mat_SeqAIJ*)(*A_loc)->data;
5183     ci   = mat->i;
5184     cj   = mat->j;
5185     PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam));
5186     for (i=0; i<am; i++) {
5187       /* off-diagonal portion of A */
5188       ncols_o = bi[i+1] - bi[i];
5189       for (jo=0; jo<ncols_o; jo++) {
5190         col = cmap[*bj];
5191         if (col >= cstart) break;
5192         *cam++ = *ba++; bj++;
5193       }
5194       /* diagonal portion of A */
5195       ncols_d = ai[i+1] - ai[i];
5196       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5197       /* off-diagonal portion of A */
5198       for (j=jo; j<ncols_o; j++) {
5199         *cam++ = *ba++; bj++;
5200       }
5201     }
5202     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam));
5203   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5204   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav));
5205   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav));
5206   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5207   PetscFunctionReturn(0);
5208 }
5209 
5210 /*@
5211      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5212           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5213 
5214     Not Collective
5215 
5216    Input Parameters:
5217 +    A - the matrix
5218 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5219 
5220    Output Parameters:
5221 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5222 -    A_loc - the local sequential matrix generated
5223 
5224     Level: developer
5225 
5226    Notes:
5227      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5228 
5229 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5230 
5231 @*/
5232 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5233 {
5234   Mat            Ao,Ad;
5235   const PetscInt *cmap;
5236   PetscMPIInt    size;
5237   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5238 
5239   PetscFunctionBegin;
5240   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap));
5241   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5242   if (size == 1) {
5243     if (scall == MAT_INITIAL_MATRIX) {
5244       PetscCall(PetscObjectReference((PetscObject)Ad));
5245       *A_loc = Ad;
5246     } else if (scall == MAT_REUSE_MATRIX) {
5247       PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN));
5248     }
5249     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob));
5250     PetscFunctionReturn(0);
5251   }
5252   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f));
5253   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5254   if (f) {
5255     PetscCall((*f)(A,scall,glob,A_loc));
5256   } else {
5257     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5258     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5259     Mat_SeqAIJ        *c;
5260     PetscInt          *ai = a->i, *aj = a->j;
5261     PetscInt          *bi = b->i, *bj = b->j;
5262     PetscInt          *ci,*cj;
5263     const PetscScalar *aa,*ba;
5264     PetscScalar       *ca;
5265     PetscInt          i,j,am,dn,on;
5266 
5267     PetscCall(MatGetLocalSize(Ad,&am,&dn));
5268     PetscCall(MatGetLocalSize(Ao,NULL,&on));
5269     PetscCall(MatSeqAIJGetArrayRead(Ad,&aa));
5270     PetscCall(MatSeqAIJGetArrayRead(Ao,&ba));
5271     if (scall == MAT_INITIAL_MATRIX) {
5272       PetscInt k;
5273       PetscCall(PetscMalloc1(1+am,&ci));
5274       PetscCall(PetscMalloc1(ai[am]+bi[am],&cj));
5275       PetscCall(PetscMalloc1(ai[am]+bi[am],&ca));
5276       ci[0] = 0;
5277       for (i=0,k=0; i<am; i++) {
5278         const PetscInt ncols_o = bi[i+1] - bi[i];
5279         const PetscInt ncols_d = ai[i+1] - ai[i];
5280         ci[i+1] = ci[i] + ncols_o + ncols_d;
5281         /* diagonal portion of A */
5282         for (j=0; j<ncols_d; j++,k++) {
5283           cj[k] = *aj++;
5284           ca[k] = *aa++;
5285         }
5286         /* off-diagonal portion of A */
5287         for (j=0; j<ncols_o; j++,k++) {
5288           cj[k] = dn + *bj++;
5289           ca[k] = *ba++;
5290         }
5291       }
5292       /* put together the new matrix */
5293       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc));
5294       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5295       /* Since these are PETSc arrays, change flags to free them as necessary. */
5296       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5297       c->free_a  = PETSC_TRUE;
5298       c->free_ij = PETSC_TRUE;
5299       c->nonew   = 0;
5300       PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name));
5301     } else if (scall == MAT_REUSE_MATRIX) {
5302       PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca));
5303       for (i=0; i<am; i++) {
5304         const PetscInt ncols_d = ai[i+1] - ai[i];
5305         const PetscInt ncols_o = bi[i+1] - bi[i];
5306         /* diagonal portion of A */
5307         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5308         /* off-diagonal portion of A */
5309         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5310       }
5311       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca));
5312     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5313     PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa));
5314     PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa));
5315     if (glob) {
5316       PetscInt cst, *gidx;
5317 
5318       PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL));
5319       PetscCall(PetscMalloc1(dn+on,&gidx));
5320       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5321       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5322       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob));
5323     }
5324   }
5325   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5326   PetscFunctionReturn(0);
5327 }
5328 
5329 /*@C
5330      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5331 
5332     Not Collective
5333 
5334    Input Parameters:
5335 +    A - the matrix
5336 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5337 -    row, col - index sets of rows and columns to extract (or NULL)
5338 
5339    Output Parameter:
5340 .    A_loc - the local sequential matrix generated
5341 
5342     Level: developer
5343 
5344 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5345 
5346 @*/
5347 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5348 {
5349   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5350   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5351   IS             isrowa,iscola;
5352   Mat            *aloc;
5353   PetscBool      match;
5354 
5355   PetscFunctionBegin;
5356   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match));
5357   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5358   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0));
5359   if (!row) {
5360     start = A->rmap->rstart; end = A->rmap->rend;
5361     PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa));
5362   } else {
5363     isrowa = *row;
5364   }
5365   if (!col) {
5366     start = A->cmap->rstart;
5367     cmap  = a->garray;
5368     nzA   = a->A->cmap->n;
5369     nzB   = a->B->cmap->n;
5370     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5371     ncols = 0;
5372     for (i=0; i<nzB; i++) {
5373       if (cmap[i] < start) idx[ncols++] = cmap[i];
5374       else break;
5375     }
5376     imark = i;
5377     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5378     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5379     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola));
5380   } else {
5381     iscola = *col;
5382   }
5383   if (scall != MAT_INITIAL_MATRIX) {
5384     PetscCall(PetscMalloc1(1,&aloc));
5385     aloc[0] = *A_loc;
5386   }
5387   PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc));
5388   if (!col) { /* attach global id of condensed columns */
5389     PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola));
5390   }
5391   *A_loc = aloc[0];
5392   PetscCall(PetscFree(aloc));
5393   if (!row) {
5394     PetscCall(ISDestroy(&isrowa));
5395   }
5396   if (!col) {
5397     PetscCall(ISDestroy(&iscola));
5398   }
5399   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0));
5400   PetscFunctionReturn(0);
5401 }
5402 
5403 /*
5404  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5405  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5406  * on a global size.
5407  * */
5408 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5409 {
5410   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5411   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5412   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5413   PetscMPIInt              owner;
5414   PetscSFNode              *iremote,*oiremote;
5415   const PetscInt           *lrowindices;
5416   PetscSF                  sf,osf;
5417   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5418   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5419   MPI_Comm                 comm;
5420   ISLocalToGlobalMapping   mapping;
5421   const PetscScalar        *pd_a,*po_a;
5422 
5423   PetscFunctionBegin;
5424   PetscCall(PetscObjectGetComm((PetscObject)P,&comm));
5425   /* plocalsize is the number of roots
5426    * nrows is the number of leaves
5427    * */
5428   PetscCall(MatGetLocalSize(P,&plocalsize,NULL));
5429   PetscCall(ISGetLocalSize(rows,&nrows));
5430   PetscCall(PetscCalloc1(nrows,&iremote));
5431   PetscCall(ISGetIndices(rows,&lrowindices));
5432   for (i=0;i<nrows;i++) {
5433     /* Find a remote index and an owner for a row
5434      * The row could be local or remote
5435      * */
5436     owner = 0;
5437     lidx  = 0;
5438     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx));
5439     iremote[i].index = lidx;
5440     iremote[i].rank  = owner;
5441   }
5442   /* Create SF to communicate how many nonzero columns for each row */
5443   PetscCall(PetscSFCreate(comm,&sf));
5444   /* SF will figure out the number of nonzero colunms for each row, and their
5445    * offsets
5446    * */
5447   PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5448   PetscCall(PetscSFSetFromOptions(sf));
5449   PetscCall(PetscSFSetUp(sf));
5450 
5451   PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets));
5452   PetscCall(PetscCalloc1(2*plocalsize,&nrcols));
5453   PetscCall(PetscCalloc1(nrows,&pnnz));
5454   roffsets[0] = 0;
5455   roffsets[1] = 0;
5456   for (i=0;i<plocalsize;i++) {
5457     /* diag */
5458     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5459     /* off diag */
5460     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5461     /* compute offsets so that we relative location for each row */
5462     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5463     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5464   }
5465   PetscCall(PetscCalloc1(2*nrows,&nlcols));
5466   PetscCall(PetscCalloc1(2*nrows,&loffsets));
5467   /* 'r' means root, and 'l' means leaf */
5468   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5469   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5470   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5471   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5472   PetscCall(PetscSFDestroy(&sf));
5473   PetscCall(PetscFree(roffsets));
5474   PetscCall(PetscFree(nrcols));
5475   dntotalcols = 0;
5476   ontotalcols = 0;
5477   ncol = 0;
5478   for (i=0;i<nrows;i++) {
5479     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5480     ncol = PetscMax(pnnz[i],ncol);
5481     /* diag */
5482     dntotalcols += nlcols[i*2+0];
5483     /* off diag */
5484     ontotalcols += nlcols[i*2+1];
5485   }
5486   /* We do not need to figure the right number of columns
5487    * since all the calculations will be done by going through the raw data
5488    * */
5489   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth));
5490   PetscCall(MatSetUp(*P_oth));
5491   PetscCall(PetscFree(pnnz));
5492   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5493   /* diag */
5494   PetscCall(PetscCalloc1(dntotalcols,&iremote));
5495   /* off diag */
5496   PetscCall(PetscCalloc1(ontotalcols,&oiremote));
5497   /* diag */
5498   PetscCall(PetscCalloc1(dntotalcols,&ilocal));
5499   /* off diag */
5500   PetscCall(PetscCalloc1(ontotalcols,&oilocal));
5501   dntotalcols = 0;
5502   ontotalcols = 0;
5503   ntotalcols  = 0;
5504   for (i=0;i<nrows;i++) {
5505     owner = 0;
5506     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL));
5507     /* Set iremote for diag matrix */
5508     for (j=0;j<nlcols[i*2+0];j++) {
5509       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5510       iremote[dntotalcols].rank    = owner;
5511       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5512       ilocal[dntotalcols++]        = ntotalcols++;
5513     }
5514     /* off diag */
5515     for (j=0;j<nlcols[i*2+1];j++) {
5516       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5517       oiremote[ontotalcols].rank    = owner;
5518       oilocal[ontotalcols++]        = ntotalcols++;
5519     }
5520   }
5521   PetscCall(ISRestoreIndices(rows,&lrowindices));
5522   PetscCall(PetscFree(loffsets));
5523   PetscCall(PetscFree(nlcols));
5524   PetscCall(PetscSFCreate(comm,&sf));
5525   /* P serves as roots and P_oth is leaves
5526    * Diag matrix
5527    * */
5528   PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5529   PetscCall(PetscSFSetFromOptions(sf));
5530   PetscCall(PetscSFSetUp(sf));
5531 
5532   PetscCall(PetscSFCreate(comm,&osf));
5533   /* Off diag */
5534   PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER));
5535   PetscCall(PetscSFSetFromOptions(osf));
5536   PetscCall(PetscSFSetUp(osf));
5537   PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5538   PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5539   /* We operate on the matrix internal data for saving memory */
5540   PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5541   PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5542   PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL));
5543   /* Convert to global indices for diag matrix */
5544   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5545   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5546   /* We want P_oth store global indices */
5547   PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping));
5548   /* Use memory scalable approach */
5549   PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH));
5550   PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j));
5551   PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5552   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5553   /* Convert back to local indices */
5554   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5555   PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5556   nout = 0;
5557   PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j));
5558   PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout);
5559   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5560   /* Exchange values */
5561   PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5562   PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5563   PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5564   PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5565   /* Stop PETSc from shrinking memory */
5566   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5567   PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY));
5568   PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY));
5569   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5570   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf));
5571   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf));
5572   PetscCall(PetscSFDestroy(&sf));
5573   PetscCall(PetscSFDestroy(&osf));
5574   PetscFunctionReturn(0);
5575 }
5576 
5577 /*
5578  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5579  * This supports MPIAIJ and MAIJ
5580  * */
5581 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5582 {
5583   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5584   Mat_SeqAIJ            *p_oth;
5585   IS                    rows,map;
5586   PetscHMapI            hamp;
5587   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5588   MPI_Comm              comm;
5589   PetscSF               sf,osf;
5590   PetscBool             has;
5591 
5592   PetscFunctionBegin;
5593   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5594   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0));
5595   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5596    *  and then create a submatrix (that often is an overlapping matrix)
5597    * */
5598   if (reuse == MAT_INITIAL_MATRIX) {
5599     /* Use a hash table to figure out unique keys */
5600     PetscCall(PetscHMapICreate(&hamp));
5601     PetscCall(PetscHMapIResize(hamp,a->B->cmap->n));
5602     PetscCall(PetscCalloc1(a->B->cmap->n,&mapping));
5603     count = 0;
5604     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5605     for (i=0;i<a->B->cmap->n;i++) {
5606       key  = a->garray[i]/dof;
5607       PetscCall(PetscHMapIHas(hamp,key,&has));
5608       if (!has) {
5609         mapping[i] = count;
5610         PetscCall(PetscHMapISet(hamp,key,count++));
5611       } else {
5612         /* Current 'i' has the same value the previous step */
5613         mapping[i] = count-1;
5614       }
5615     }
5616     PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map));
5617     PetscCall(PetscHMapIGetSize(hamp,&htsize));
5618     PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count);
5619     PetscCall(PetscCalloc1(htsize,&rowindices));
5620     off = 0;
5621     PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices));
5622     PetscCall(PetscHMapIDestroy(&hamp));
5623     PetscCall(PetscSortInt(htsize,rowindices));
5624     PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows));
5625     /* In case, the matrix was already created but users want to recreate the matrix */
5626     PetscCall(MatDestroy(P_oth));
5627     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth));
5628     PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map));
5629     PetscCall(ISDestroy(&map));
5630     PetscCall(ISDestroy(&rows));
5631   } else if (reuse == MAT_REUSE_MATRIX) {
5632     /* If matrix was already created, we simply update values using SF objects
5633      * that as attached to the matrix ealier.
5634      */
5635     const PetscScalar *pd_a,*po_a;
5636 
5637     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf));
5638     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf));
5639     PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5640     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5641     /* Update values in place */
5642     PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5643     PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5644     PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5645     PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5646     PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5647     PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5648     PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5649     PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5650   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5651   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0));
5652   PetscFunctionReturn(0);
5653 }
5654 
5655 /*@C
5656   MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5657 
5658   Collective on Mat
5659 
5660   Input Parameters:
5661 + A - the first matrix in mpiaij format
5662 . B - the second matrix in mpiaij format
5663 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5664 
5665   Output Parameters:
5666 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5667 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5668 - B_seq - the sequential matrix generated
5669 
5670   Level: developer
5671 
5672 @*/
5673 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5674 {
5675   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5676   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5677   IS             isrowb,iscolb;
5678   Mat            *bseq=NULL;
5679 
5680   PetscFunctionBegin;
5681   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5682     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5683   }
5684   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0));
5685 
5686   if (scall == MAT_INITIAL_MATRIX) {
5687     start = A->cmap->rstart;
5688     cmap  = a->garray;
5689     nzA   = a->A->cmap->n;
5690     nzB   = a->B->cmap->n;
5691     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5692     ncols = 0;
5693     for (i=0; i<nzB; i++) {  /* row < local row index */
5694       if (cmap[i] < start) idx[ncols++] = cmap[i];
5695       else break;
5696     }
5697     imark = i;
5698     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5699     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5700     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb));
5701     PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb));
5702   } else {
5703     PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5704     isrowb  = *rowb; iscolb = *colb;
5705     PetscCall(PetscMalloc1(1,&bseq));
5706     bseq[0] = *B_seq;
5707   }
5708   PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq));
5709   *B_seq = bseq[0];
5710   PetscCall(PetscFree(bseq));
5711   if (!rowb) {
5712     PetscCall(ISDestroy(&isrowb));
5713   } else {
5714     *rowb = isrowb;
5715   }
5716   if (!colb) {
5717     PetscCall(ISDestroy(&iscolb));
5718   } else {
5719     *colb = iscolb;
5720   }
5721   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0));
5722   PetscFunctionReturn(0);
5723 }
5724 
5725 /*
5726     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5727     of the OFF-DIAGONAL portion of local A
5728 
5729     Collective on Mat
5730 
5731    Input Parameters:
5732 +    A,B - the matrices in mpiaij format
5733 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5734 
5735    Output Parameter:
5736 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5737 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5738 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5739 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5740 
5741     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5742      for this matrix. This is not desirable..
5743 
5744     Level: developer
5745 
5746 */
5747 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5748 {
5749   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5750   Mat_SeqAIJ             *b_oth;
5751   VecScatter             ctx;
5752   MPI_Comm               comm;
5753   const PetscMPIInt      *rprocs,*sprocs;
5754   const PetscInt         *srow,*rstarts,*sstarts;
5755   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5756   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5757   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5758   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5759   PetscMPIInt            size,tag,rank,nreqs;
5760 
5761   PetscFunctionBegin;
5762   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5763   PetscCallMPI(MPI_Comm_size(comm,&size));
5764 
5765   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5766     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5767   }
5768   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0));
5769   PetscCallMPI(MPI_Comm_rank(comm,&rank));
5770 
5771   if (size == 1) {
5772     startsj_s = NULL;
5773     bufa_ptr  = NULL;
5774     *B_oth    = NULL;
5775     PetscFunctionReturn(0);
5776   }
5777 
5778   ctx = a->Mvctx;
5779   tag = ((PetscObject)ctx)->tag;
5780 
5781   PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs));
5782   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5783   PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs));
5784   PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs));
5785   PetscCall(PetscMalloc1(nreqs,&reqs));
5786   rwaits = reqs;
5787   swaits = reqs + nrecvs;
5788 
5789   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5790   if (scall == MAT_INITIAL_MATRIX) {
5791     /* i-array */
5792     /*---------*/
5793     /*  post receives */
5794     if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */
5795     for (i=0; i<nrecvs; i++) {
5796       rowlen = rvalues + rstarts[i]*rbs;
5797       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5798       PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5799     }
5800 
5801     /* pack the outgoing message */
5802     PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj));
5803 
5804     sstartsj[0] = 0;
5805     rstartsj[0] = 0;
5806     len         = 0; /* total length of j or a array to be sent */
5807     if (nsends) {
5808       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5809       PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues));
5810     }
5811     for (i=0; i<nsends; i++) {
5812       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5813       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5814       for (j=0; j<nrows; j++) {
5815         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5816         for (l=0; l<sbs; l++) {
5817           PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */
5818 
5819           rowlen[j*sbs+l] = ncols;
5820 
5821           len += ncols;
5822           PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL));
5823         }
5824         k++;
5825       }
5826       PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i));
5827 
5828       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5829     }
5830     /* recvs and sends of i-array are completed */
5831     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5832     PetscCall(PetscFree(svalues));
5833 
5834     /* allocate buffers for sending j and a arrays */
5835     PetscCall(PetscMalloc1(len+1,&bufj));
5836     PetscCall(PetscMalloc1(len+1,&bufa));
5837 
5838     /* create i-array of B_oth */
5839     PetscCall(PetscMalloc1(aBn+2,&b_othi));
5840 
5841     b_othi[0] = 0;
5842     len       = 0; /* total length of j or a array to be received */
5843     k         = 0;
5844     for (i=0; i<nrecvs; i++) {
5845       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5846       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5847       for (j=0; j<nrows; j++) {
5848         b_othi[k+1] = b_othi[k] + rowlen[j];
5849         PetscCall(PetscIntSumError(rowlen[j],len,&len));
5850         k++;
5851       }
5852       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5853     }
5854     PetscCall(PetscFree(rvalues));
5855 
5856     /* allocate space for j and a arrays of B_oth */
5857     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj));
5858     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha));
5859 
5860     /* j-array */
5861     /*---------*/
5862     /*  post receives of j-array */
5863     for (i=0; i<nrecvs; i++) {
5864       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5865       PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5866     }
5867 
5868     /* pack the outgoing message j-array */
5869     if (nsends) k = sstarts[0];
5870     for (i=0; i<nsends; i++) {
5871       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5872       bufJ  = bufj+sstartsj[i];
5873       for (j=0; j<nrows; j++) {
5874         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5875         for (ll=0; ll<sbs; ll++) {
5876           PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5877           for (l=0; l<ncols; l++) {
5878             *bufJ++ = cols[l];
5879           }
5880           PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5881         }
5882       }
5883       PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i));
5884     }
5885 
5886     /* recvs and sends of j-array are completed */
5887     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5888   } else if (scall == MAT_REUSE_MATRIX) {
5889     sstartsj = *startsj_s;
5890     rstartsj = *startsj_r;
5891     bufa     = *bufa_ptr;
5892     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5893     PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha));
5894   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5895 
5896   /* a-array */
5897   /*---------*/
5898   /*  post receives of a-array */
5899   for (i=0; i<nrecvs; i++) {
5900     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5901     PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i));
5902   }
5903 
5904   /* pack the outgoing message a-array */
5905   if (nsends) k = sstarts[0];
5906   for (i=0; i<nsends; i++) {
5907     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5908     bufA  = bufa+sstartsj[i];
5909     for (j=0; j<nrows; j++) {
5910       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5911       for (ll=0; ll<sbs; ll++) {
5912         PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5913         for (l=0; l<ncols; l++) {
5914           *bufA++ = vals[l];
5915         }
5916         PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5917       }
5918     }
5919     PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i));
5920   }
5921   /* recvs and sends of a-array are completed */
5922   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5923   PetscCall(PetscFree(reqs));
5924 
5925   if (scall == MAT_INITIAL_MATRIX) {
5926     /* put together the new matrix */
5927     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth));
5928 
5929     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5930     /* Since these are PETSc arrays, change flags to free them as necessary. */
5931     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5932     b_oth->free_a  = PETSC_TRUE;
5933     b_oth->free_ij = PETSC_TRUE;
5934     b_oth->nonew   = 0;
5935 
5936     PetscCall(PetscFree(bufj));
5937     if (!startsj_s || !bufa_ptr) {
5938       PetscCall(PetscFree2(sstartsj,rstartsj));
5939       PetscCall(PetscFree(bufa_ptr));
5940     } else {
5941       *startsj_s = sstartsj;
5942       *startsj_r = rstartsj;
5943       *bufa_ptr  = bufa;
5944     }
5945   } else if (scall == MAT_REUSE_MATRIX) {
5946     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha));
5947   }
5948 
5949   PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs));
5950   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs));
5951   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0));
5952   PetscFunctionReturn(0);
5953 }
5954 
5955 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5956 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5957 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5958 #if defined(PETSC_HAVE_MKL_SPARSE)
5959 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5960 #endif
5961 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5962 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5963 #if defined(PETSC_HAVE_ELEMENTAL)
5964 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5965 #endif
5966 #if defined(PETSC_HAVE_SCALAPACK)
5967 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5968 #endif
5969 #if defined(PETSC_HAVE_HYPRE)
5970 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5971 #endif
5972 #if defined(PETSC_HAVE_CUDA)
5973 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5974 #endif
5975 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5976 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5977 #endif
5978 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5979 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5980 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5981 
5982 /*
5983     Computes (B'*A')' since computing B*A directly is untenable
5984 
5985                n                       p                          p
5986         [             ]       [             ]         [                 ]
5987       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5988         [             ]       [             ]         [                 ]
5989 
5990 */
5991 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5992 {
5993   Mat            At,Bt,Ct;
5994 
5995   PetscFunctionBegin;
5996   PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At));
5997   PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt));
5998   PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct));
5999   PetscCall(MatDestroy(&At));
6000   PetscCall(MatDestroy(&Bt));
6001   PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C));
6002   PetscCall(MatDestroy(&Ct));
6003   PetscFunctionReturn(0);
6004 }
6005 
6006 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
6007 {
6008   PetscBool      cisdense;
6009 
6010   PetscFunctionBegin;
6011   PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n);
6012   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N));
6013   PetscCall(MatSetBlockSizesFromMats(C,A,B));
6014   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
6015   if (!cisdense) {
6016     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
6017   }
6018   PetscCall(MatSetUp(C));
6019 
6020   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6021   PetscFunctionReturn(0);
6022 }
6023 
6024 /* ----------------------------------------------------------------*/
6025 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6026 {
6027   Mat_Product *product = C->product;
6028   Mat         A = product->A,B=product->B;
6029 
6030   PetscFunctionBegin;
6031   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6032     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6033 
6034   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6035   C->ops->productsymbolic = MatProductSymbolic_AB;
6036   PetscFunctionReturn(0);
6037 }
6038 
6039 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6040 {
6041   Mat_Product    *product = C->product;
6042 
6043   PetscFunctionBegin;
6044   if (product->type == MATPRODUCT_AB) {
6045     PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6046   }
6047   PetscFunctionReturn(0);
6048 }
6049 
6050 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6051 
6052   Input Parameters:
6053 
6054     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
6055     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
6056 
6057     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6058 
6059     For Set1, j1[] contains column indices of the nonzeros.
6060     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6061     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6062     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6063 
6064     Similar for Set2.
6065 
6066     This routine merges the two sets of nonzeros row by row and removes repeats.
6067 
6068   Output Parameters: (memory is allocated by the caller)
6069 
6070     i[],j[]: the CSR of the merged matrix, which has m rows.
6071     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6072     imap2[]: similar to imap1[], but for Set2.
6073     Note we order nonzeros row-by-row and from left to right.
6074 */
6075 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[],
6076   const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[],
6077   PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[])
6078 {
6079   PetscInt       r,m; /* Row index of mat */
6080   PetscCount     t,t1,t2,b1,e1,b2,e2;
6081 
6082   PetscFunctionBegin;
6083   PetscCall(MatGetLocalSize(mat,&m,NULL));
6084   t1   = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6085   i[0] = 0;
6086   for (r=0; r<m; r++) { /* Do row by row merging */
6087     b1   = rowBegin1[r];
6088     e1   = rowEnd1[r];
6089     b2   = rowBegin2[r];
6090     e2   = rowEnd2[r];
6091     while (b1 < e1 && b2 < e2) {
6092       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6093         j[t]      = j1[b1];
6094         imap1[t1] = t;
6095         imap2[t2] = t;
6096         b1       += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */
6097         b2       += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6098         t1++; t2++; t++;
6099       } else if (j1[b1] < j2[b2]) {
6100         j[t]      = j1[b1];
6101         imap1[t1] = t;
6102         b1       += jmap1[t1+1] - jmap1[t1];
6103         t1++; t++;
6104       } else {
6105         j[t]      = j2[b2];
6106         imap2[t2] = t;
6107         b2       += jmap2[t2+1] - jmap2[t2];
6108         t2++; t++;
6109       }
6110     }
6111     /* Merge the remaining in either j1[] or j2[] */
6112     while (b1 < e1) {
6113       j[t]      = j1[b1];
6114       imap1[t1] = t;
6115       b1       += jmap1[t1+1] - jmap1[t1];
6116       t1++; t++;
6117     }
6118     while (b2 < e2) {
6119       j[t]      = j2[b2];
6120       imap2[t2] = t;
6121       b2       += jmap2[t2+1] - jmap2[t2];
6122       t2++; t++;
6123     }
6124     i[r+1] = t;
6125   }
6126   PetscFunctionReturn(0);
6127 }
6128 
6129 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6130 
6131   Input Parameters:
6132     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6133     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6134       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6135 
6136       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6137       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6138 
6139   Output Parameters:
6140     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6141     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6142       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6143       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6144 
6145     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6146       Atot: number of entries belonging to the diagonal block.
6147       Annz: number of unique nonzeros belonging to the diagonal block.
6148       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6149         repeats (i.e., same 'i,j' pair).
6150       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6151         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6152 
6153       Atot: number of entries belonging to the diagonal block
6154       Annz: number of unique nonzeros belonging to the diagonal block.
6155 
6156     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6157 
6158     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6159 */
6160 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[],
6161   PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[],
6162   PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_,
6163   PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_)
6164 {
6165   PetscInt          cstart,cend,rstart,rend,row,col;
6166   PetscCount        Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6167   PetscCount        Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6168   PetscCount        k,m,p,q,r,s,mid;
6169   PetscCount        *Aperm,*Bperm,*Ajmap,*Bjmap;
6170 
6171   PetscFunctionBegin;
6172   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6173   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6174   m    = rend - rstart;
6175 
6176   for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */
6177 
6178   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6179      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6180   */
6181   while (k<n) {
6182     row = i[k];
6183     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6184     for (s=k; s<n; s++) if (i[s] != row) break;
6185     for (p=k; p<s; p++) {
6186       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6187       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]);
6188     }
6189     PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k));
6190     PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6191     rowBegin[row-rstart] = k;
6192     rowMid[row-rstart]   = mid;
6193     rowEnd[row-rstart]   = s;
6194 
6195     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6196     Atot += mid - k;
6197     Btot += s - mid;
6198 
6199     /* Count unique nonzeros of this diag/offdiag row */
6200     for (p=k; p<mid;) {
6201       col = j[p];
6202       do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */
6203       Annz++;
6204     }
6205 
6206     for (p=mid; p<s;) {
6207       col = j[p];
6208       do {p++;} while (p<s && j[p] == col);
6209       Bnnz++;
6210     }
6211     k = s;
6212   }
6213 
6214   /* Allocation according to Atot, Btot, Annz, Bnnz */
6215   PetscCall(PetscMalloc1(Atot,&Aperm));
6216   PetscCall(PetscMalloc1(Btot,&Bperm));
6217   PetscCall(PetscMalloc1(Annz+1,&Ajmap));
6218   PetscCall(PetscMalloc1(Bnnz+1,&Bjmap));
6219 
6220   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6221   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6222   for (r=0; r<m; r++) {
6223     k     = rowBegin[r];
6224     mid   = rowMid[r];
6225     s     = rowEnd[r];
6226     PetscCall(PetscArraycpy(Aperm+Atot,perm+k,  mid-k));
6227     PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid));
6228     Atot += mid - k;
6229     Btot += s - mid;
6230 
6231     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6232     for (p=k; p<mid;) {
6233       col = j[p];
6234       q   = p;
6235       do {p++;} while (p<mid && j[p] == col);
6236       Ajmap[Annz+1] = Ajmap[Annz] + (p - q);
6237       Annz++;
6238     }
6239 
6240     for (p=mid; p<s;) {
6241       col = j[p];
6242       q   = p;
6243       do {p++;} while (p<s && j[p] == col);
6244       Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q);
6245       Bnnz++;
6246     }
6247   }
6248   /* Output */
6249   *Aperm_ = Aperm;
6250   *Annz_  = Annz;
6251   *Atot_  = Atot;
6252   *Ajmap_ = Ajmap;
6253   *Bperm_ = Bperm;
6254   *Bnnz_  = Bnnz;
6255   *Btot_  = Btot;
6256   *Bjmap_ = Bjmap;
6257   PetscFunctionReturn(0);
6258 }
6259 
6260 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6261 
6262   Input Parameters:
6263     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6264     nnz:  number of unique nonzeros in the merged matrix
6265     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6266     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6267 
6268   Output Parameter: (memory is allocated by the caller)
6269     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6270 
6271   Example:
6272     nnz1 = 4
6273     nnz  = 6
6274     imap = [1,3,4,5]
6275     jmap = [0,3,5,6,7]
6276    then,
6277     jmap_new = [0,0,3,3,5,6,7]
6278 */
6279 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[])
6280 {
6281   PetscCount k,p;
6282 
6283   PetscFunctionBegin;
6284   jmap_new[0] = 0;
6285   p = nnz; /* p loops over jmap_new[] backwards */
6286   for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */
6287     for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1];
6288   }
6289   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6290   PetscFunctionReturn(0);
6291 }
6292 
6293 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[])
6294 {
6295   MPI_Comm                  comm;
6296   PetscMPIInt               rank,size;
6297   PetscInt                  m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6298   PetscCount                k,p,q,rem; /* Loop variables over coo arrays */
6299   Mat_MPIAIJ                *mpiaij = (Mat_MPIAIJ*)mat->data;
6300 
6301   PetscFunctionBegin;
6302   PetscCall(PetscFree(mpiaij->garray));
6303   PetscCall(VecDestroy(&mpiaij->lvec));
6304 #if defined(PETSC_USE_CTABLE)
6305   PetscCall(PetscTableDestroy(&mpiaij->colmap));
6306 #else
6307   PetscCall(PetscFree(mpiaij->colmap));
6308 #endif
6309   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6310   mat->assembled = PETSC_FALSE;
6311   mat->was_assembled = PETSC_FALSE;
6312   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6313 
6314   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
6315   PetscCallMPI(MPI_Comm_size(comm,&size));
6316   PetscCallMPI(MPI_Comm_rank(comm,&rank));
6317   PetscCall(PetscLayoutSetUp(mat->rmap));
6318   PetscCall(PetscLayoutSetUp(mat->cmap));
6319   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6320   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6321   PetscCall(MatGetLocalSize(mat,&m,&n));
6322   PetscCall(MatGetSize(mat,&M,&N));
6323 
6324   /* ---------------------------------------------------------------------------*/
6325   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6326   /* entries come first, then local rows, then remote rows.                     */
6327   /* ---------------------------------------------------------------------------*/
6328   PetscCount n1 = coo_n,*perm1;
6329   PetscInt   *i1,*j1; /* Copies of input COOs along with a permutation array */
6330   PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1));
6331   PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */
6332   PetscCall(PetscArraycpy(j1,coo_j,n1));
6333   for (k=0; k<n1; k++) perm1[k] = k;
6334 
6335   /* Manipulate indices so that entries with negative row or col indices will have smallest
6336      row indices, local entries will have greater but negative row indices, and remote entries
6337      will have positive row indices.
6338   */
6339   for (k=0; k<n1; k++) {
6340     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */
6341     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6342     else {
6343       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6344       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6345     }
6346   }
6347 
6348   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6349   PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1));
6350   for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */
6351   PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */
6352   for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/
6353 
6354   /* ---------------------------------------------------------------------------*/
6355   /*           Split local rows into diag/offdiag portions                      */
6356   /* ---------------------------------------------------------------------------*/
6357   PetscCount   *rowBegin1,*rowMid1,*rowEnd1;
6358   PetscCount   *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1;
6359   PetscCount   Annz1,Bnnz1,Atot1,Btot1;
6360 
6361   PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1));
6362   PetscCall(PetscMalloc1(n1-rem,&Cperm1));
6363   PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1));
6364 
6365   /* ---------------------------------------------------------------------------*/
6366   /*           Send remote rows to their owner                                  */
6367   /* ---------------------------------------------------------------------------*/
6368   /* Find which rows should be sent to which remote ranks*/
6369   PetscInt       nsend = 0; /* Number of MPI ranks to send data to */
6370   PetscMPIInt    *sendto; /* [nsend], storing remote ranks */
6371   PetscInt       *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6372   const PetscInt *ranges;
6373   PetscInt       maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6374 
6375   PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges));
6376   PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries));
6377   for (k=rem; k<n1;) {
6378     PetscMPIInt  owner;
6379     PetscInt     firstRow,lastRow;
6380 
6381     /* Locate a row range */
6382     firstRow = i1[k]; /* first row of this owner */
6383     PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner));
6384     lastRow  = ranges[owner+1]-1; /* last row of this owner */
6385 
6386     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6387     PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p));
6388 
6389     /* All entries in [k,p) belong to this remote owner */
6390     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6391       PetscMPIInt *sendto2;
6392       PetscInt    *nentries2;
6393       PetscInt    maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size;
6394 
6395       PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2));
6396       PetscCall(PetscArraycpy(sendto2,sendto,maxNsend));
6397       PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1));
6398       PetscCall(PetscFree2(sendto,nentries2));
6399       sendto      = sendto2;
6400       nentries    = nentries2;
6401       maxNsend    = maxNsend2;
6402     }
6403     sendto[nsend]   = owner;
6404     nentries[nsend] = p - k;
6405     PetscCall(PetscCountCast(p-k,&nentries[nsend]));
6406     nsend++;
6407     k = p;
6408   }
6409 
6410   /* Build 1st SF to know offsets on remote to send data */
6411   PetscSF     sf1;
6412   PetscInt    nroots = 1,nroots2 = 0;
6413   PetscInt    nleaves = nsend,nleaves2 = 0;
6414   PetscInt    *offsets;
6415   PetscSFNode *iremote;
6416 
6417   PetscCall(PetscSFCreate(comm,&sf1));
6418   PetscCall(PetscMalloc1(nsend,&iremote));
6419   PetscCall(PetscMalloc1(nsend,&offsets));
6420   for (k=0; k<nsend; k++) {
6421     iremote[k].rank  = sendto[k];
6422     iremote[k].index = 0;
6423     nleaves2        += nentries[k];
6424     PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt");
6425   }
6426   PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6427   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM));
6428   PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6429   PetscCall(PetscSFDestroy(&sf1));
6430   PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem);
6431 
6432   /* Build 2nd SF to send remote COOs to their owner */
6433   PetscSF sf2;
6434   nroots  = nroots2;
6435   nleaves = nleaves2;
6436   PetscCall(PetscSFCreate(comm,&sf2));
6437   PetscCall(PetscSFSetFromOptions(sf2));
6438   PetscCall(PetscMalloc1(nleaves,&iremote));
6439   p       = 0;
6440   for (k=0; k<nsend; k++) {
6441     PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt");
6442     for (q=0; q<nentries[k]; q++,p++) {
6443       iremote[p].rank  = sendto[k];
6444       iremote[p].index = offsets[k] + q;
6445     }
6446   }
6447   PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6448 
6449   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6450   PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem));
6451 
6452   /* Send the remote COOs to their owner */
6453   PetscInt   n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6454   PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6455   PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2));
6456   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE));
6457   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE));
6458   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE));
6459   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE));
6460 
6461   PetscCall(PetscFree(offsets));
6462   PetscCall(PetscFree2(sendto,nentries));
6463 
6464   /* ---------------------------------------------------------------*/
6465   /* Sort received COOs by row along with the permutation array     */
6466   /* ---------------------------------------------------------------*/
6467   for (k=0; k<n2; k++) perm2[k] = k;
6468   PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2));
6469 
6470   /* ---------------------------------------------------------------*/
6471   /* Split received COOs into diag/offdiag portions                 */
6472   /* ---------------------------------------------------------------*/
6473   PetscCount  *rowBegin2,*rowMid2,*rowEnd2;
6474   PetscCount  *Ajmap2,*Aperm2,*Bjmap2,*Bperm2;
6475   PetscCount  Annz2,Bnnz2,Atot2,Btot2;
6476 
6477   PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2));
6478   PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2));
6479 
6480   /* --------------------------------------------------------------------------*/
6481   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6482   /* --------------------------------------------------------------------------*/
6483   PetscInt   *Ai,*Bi;
6484   PetscInt   *Aj,*Bj;
6485 
6486   PetscCall(PetscMalloc1(m+1,&Ai));
6487   PetscCall(PetscMalloc1(m+1,&Bi));
6488   PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6489   PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj));
6490 
6491   PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2;
6492   PetscCall(PetscMalloc1(Annz1,&Aimap1));
6493   PetscCall(PetscMalloc1(Bnnz1,&Bimap1));
6494   PetscCall(PetscMalloc1(Annz2,&Aimap2));
6495   PetscCall(PetscMalloc1(Bnnz2,&Bimap2));
6496 
6497   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj));
6498   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1,  rowEnd1,rowMid2,  rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj));
6499 
6500   /* --------------------------------------------------------------------------*/
6501   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6502   /* expect nonzeros in A/B most likely have local contributing entries        */
6503   /* --------------------------------------------------------------------------*/
6504   PetscInt Annz = Ai[m];
6505   PetscInt Bnnz = Bi[m];
6506   PetscCount *Ajmap1_new,*Bjmap1_new;
6507 
6508   PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new));
6509   PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new));
6510 
6511   PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new));
6512   PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new));
6513 
6514   PetscCall(PetscFree(Aimap1));
6515   PetscCall(PetscFree(Ajmap1));
6516   PetscCall(PetscFree(Bimap1));
6517   PetscCall(PetscFree(Bjmap1));
6518   PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1));
6519   PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2));
6520   PetscCall(PetscFree3(i1,j1,perm1));
6521   PetscCall(PetscFree3(i2,j2,perm2));
6522 
6523   Ajmap1 = Ajmap1_new;
6524   Bjmap1 = Bjmap1_new;
6525 
6526   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6527   if (Annz < Annz1 + Annz2) {
6528     PetscInt *Aj_new;
6529     PetscCall(PetscMalloc1(Annz,&Aj_new));
6530     PetscCall(PetscArraycpy(Aj_new,Aj,Annz));
6531     PetscCall(PetscFree(Aj));
6532     Aj   = Aj_new;
6533   }
6534 
6535   if (Bnnz < Bnnz1 + Bnnz2) {
6536     PetscInt *Bj_new;
6537     PetscCall(PetscMalloc1(Bnnz,&Bj_new));
6538     PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz));
6539     PetscCall(PetscFree(Bj));
6540     Bj   = Bj_new;
6541   }
6542 
6543   /* --------------------------------------------------------------------------------*/
6544   /* Create new submatrices for on-process and off-process coupling                  */
6545   /* --------------------------------------------------------------------------------*/
6546   PetscScalar   *Aa,*Ba;
6547   MatType       rtype;
6548   Mat_SeqAIJ    *a,*b;
6549   PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */
6550   PetscCall(PetscCalloc1(Bnnz,&Ba));
6551   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6552   if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;}
6553   PetscCall(MatDestroy(&mpiaij->A));
6554   PetscCall(MatDestroy(&mpiaij->B));
6555   PetscCall(MatGetRootType_Private(mat,&rtype));
6556   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A));
6557   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B));
6558   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6559 
6560   a = (Mat_SeqAIJ*)mpiaij->A->data;
6561   b = (Mat_SeqAIJ*)mpiaij->B->data;
6562   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6563   a->free_a       = b->free_a       = PETSC_TRUE;
6564   a->free_ij      = b->free_ij      = PETSC_TRUE;
6565 
6566   /* conversion must happen AFTER multiply setup */
6567   PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A));
6568   PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B));
6569   PetscCall(VecDestroy(&mpiaij->lvec));
6570   PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL));
6571   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec));
6572 
6573   mpiaij->coo_n   = coo_n;
6574   mpiaij->coo_sf  = sf2;
6575   mpiaij->sendlen = nleaves;
6576   mpiaij->recvlen = nroots;
6577 
6578   mpiaij->Annz    = Annz;
6579   mpiaij->Bnnz    = Bnnz;
6580 
6581   mpiaij->Annz2   = Annz2;
6582   mpiaij->Bnnz2   = Bnnz2;
6583 
6584   mpiaij->Atot1   = Atot1;
6585   mpiaij->Atot2   = Atot2;
6586   mpiaij->Btot1   = Btot1;
6587   mpiaij->Btot2   = Btot2;
6588 
6589   mpiaij->Ajmap1  = Ajmap1;
6590   mpiaij->Aperm1  = Aperm1;
6591 
6592   mpiaij->Bjmap1  = Bjmap1;
6593   mpiaij->Bperm1  = Bperm1;
6594 
6595   mpiaij->Aimap2  = Aimap2;
6596   mpiaij->Ajmap2  = Ajmap2;
6597   mpiaij->Aperm2  = Aperm2;
6598 
6599   mpiaij->Bimap2  = Bimap2;
6600   mpiaij->Bjmap2  = Bjmap2;
6601   mpiaij->Bperm2  = Bperm2;
6602 
6603   mpiaij->Cperm1  = Cperm1;
6604 
6605   /* Allocate in preallocation. If not used, it has zero cost on host */
6606   PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf));
6607   PetscFunctionReturn(0);
6608 }
6609 
6610 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode)
6611 {
6612   Mat_MPIAIJ           *mpiaij = (Mat_MPIAIJ*)mat->data;
6613   Mat                  A = mpiaij->A,B = mpiaij->B;
6614   PetscCount           Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2;
6615   PetscScalar          *Aa,*Ba;
6616   PetscScalar          *sendbuf = mpiaij->sendbuf;
6617   PetscScalar          *recvbuf = mpiaij->recvbuf;
6618   const PetscCount     *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2;
6619   const PetscCount     *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2;
6620   const PetscCount     *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2;
6621   const PetscCount     *Cperm1 = mpiaij->Cperm1;
6622 
6623   PetscFunctionBegin;
6624   PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */
6625   PetscCall(MatSeqAIJGetArray(B,&Ba));
6626 
6627   /* Pack entries to be sent to remote */
6628   for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6629 
6630   /* Send remote entries to their owner and overlap the communication with local computation */
6631   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE));
6632   /* Add local entries to A and B */
6633   for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6634     PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */
6635     for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]];
6636     Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum;
6637   }
6638   for (PetscCount i=0; i<Bnnz; i++) {
6639     PetscScalar sum = 0.0;
6640     for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]];
6641     Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum;
6642   }
6643   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE));
6644 
6645   /* Add received remote entries to A and B */
6646   for (PetscCount i=0; i<Annz2; i++) {
6647     for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6648   }
6649   for (PetscCount i=0; i<Bnnz2; i++) {
6650     for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6651   }
6652   PetscCall(MatSeqAIJRestoreArray(A,&Aa));
6653   PetscCall(MatSeqAIJRestoreArray(B,&Ba));
6654   PetscFunctionReturn(0);
6655 }
6656 
6657 /* ----------------------------------------------------------------*/
6658 
6659 /*MC
6660    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6661 
6662    Options Database Keys:
6663 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6664 
6665    Level: beginner
6666 
6667    Notes:
6668     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6669     in this case the values associated with the rows and columns one passes in are set to zero
6670     in the matrix
6671 
6672     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6673     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6674 
6675 .seealso: `MatCreateAIJ()`
6676 M*/
6677 
6678 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6679 {
6680   Mat_MPIAIJ     *b;
6681   PetscMPIInt    size;
6682 
6683   PetscFunctionBegin;
6684   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
6685 
6686   PetscCall(PetscNewLog(B,&b));
6687   B->data       = (void*)b;
6688   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
6689   B->assembled  = PETSC_FALSE;
6690   B->insertmode = NOT_SET_VALUES;
6691   b->size       = size;
6692 
6693   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
6694 
6695   /* build cache for off array entries formed */
6696   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
6697 
6698   b->donotstash  = PETSC_FALSE;
6699   b->colmap      = NULL;
6700   b->garray      = NULL;
6701   b->roworiented = PETSC_TRUE;
6702 
6703   /* stuff used for matrix vector multiply */
6704   b->lvec  = NULL;
6705   b->Mvctx = NULL;
6706 
6707   /* stuff for MatGetRow() */
6708   b->rowindices   = NULL;
6709   b->rowvalues    = NULL;
6710   b->getrowactive = PETSC_FALSE;
6711 
6712   /* flexible pointer used in CUSPARSE classes */
6713   b->spptr = NULL;
6714 
6715   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6716   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ));
6717   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ));
6718   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ));
6719   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ));
6720   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ));
6721   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ));
6722   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ));
6723   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM));
6724   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL));
6725 #if defined(PETSC_HAVE_CUDA)
6726   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6727 #endif
6728 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6729   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos));
6730 #endif
6731 #if defined(PETSC_HAVE_MKL_SPARSE)
6732   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL));
6733 #endif
6734   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL));
6735   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ));
6736   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ));
6737   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
6738 #if defined(PETSC_HAVE_ELEMENTAL)
6739   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental));
6740 #endif
6741 #if defined(PETSC_HAVE_SCALAPACK)
6742   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK));
6743 #endif
6744   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS));
6745   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL));
6746 #if defined(PETSC_HAVE_HYPRE)
6747   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE));
6748   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ));
6749 #endif
6750   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ));
6751   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ));
6752   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ));
6753   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ));
6754   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ));
6755   PetscFunctionReturn(0);
6756 }
6757 
6758 /*@C
6759      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6760          and "off-diagonal" part of the matrix in CSR format.
6761 
6762    Collective
6763 
6764    Input Parameters:
6765 +  comm - MPI communicator
6766 .  m - number of local rows (Cannot be PETSC_DECIDE)
6767 .  n - This value should be the same as the local size used in creating the
6768        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6769        calculated if N is given) For square matrices n is almost always m.
6770 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6771 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6772 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6773 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6774 .   a - matrix values
6775 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6776 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6777 -   oa - matrix values
6778 
6779    Output Parameter:
6780 .   mat - the matrix
6781 
6782    Level: advanced
6783 
6784    Notes:
6785        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6786        must free the arrays once the matrix has been destroyed and not before.
6787 
6788        The i and j indices are 0 based
6789 
6790        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6791 
6792        This sets local rows and cannot be used to set off-processor values.
6793 
6794        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6795        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6796        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6797        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6798        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6799        communication if it is known that only local entries will be set.
6800 
6801 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6802           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6803 @*/
6804 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6805 {
6806   Mat_MPIAIJ     *maij;
6807 
6808   PetscFunctionBegin;
6809   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6810   PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6811   PetscCheck(oi[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6812   PetscCall(MatCreate(comm,mat));
6813   PetscCall(MatSetSizes(*mat,m,n,M,N));
6814   PetscCall(MatSetType(*mat,MATMPIAIJ));
6815   maij = (Mat_MPIAIJ*) (*mat)->data;
6816 
6817   (*mat)->preallocated = PETSC_TRUE;
6818 
6819   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6820   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6821 
6822   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A));
6823   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B));
6824 
6825   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
6826   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
6827   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
6828   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
6829   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
6830   PetscFunctionReturn(0);
6831 }
6832 
6833 typedef struct {
6834   Mat       *mp;    /* intermediate products */
6835   PetscBool *mptmp; /* is the intermediate product temporary ? */
6836   PetscInt  cp;     /* number of intermediate products */
6837 
6838   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6839   PetscInt    *startsj_s,*startsj_r;
6840   PetscScalar *bufa;
6841   Mat         P_oth;
6842 
6843   /* may take advantage of merging product->B */
6844   Mat Bloc; /* B-local by merging diag and off-diag */
6845 
6846   /* cusparse does not have support to split between symbolic and numeric phases.
6847      When api_user is true, we don't need to update the numerical values
6848      of the temporary storage */
6849   PetscBool reusesym;
6850 
6851   /* support for COO values insertion */
6852   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6853   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6854   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6855   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6856   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6857   PetscMemType mtype;
6858 
6859   /* customization */
6860   PetscBool abmerge;
6861   PetscBool P_oth_bind;
6862 } MatMatMPIAIJBACKEND;
6863 
6864 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6865 {
6866   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6867   PetscInt            i;
6868 
6869   PetscFunctionBegin;
6870   PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r));
6871   PetscCall(PetscFree(mmdata->bufa));
6872   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v));
6873   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w));
6874   PetscCall(MatDestroy(&mmdata->P_oth));
6875   PetscCall(MatDestroy(&mmdata->Bloc));
6876   PetscCall(PetscSFDestroy(&mmdata->sf));
6877   for (i = 0; i < mmdata->cp; i++) {
6878     PetscCall(MatDestroy(&mmdata->mp[i]));
6879   }
6880   PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp));
6881   PetscCall(PetscFree(mmdata->own[0]));
6882   PetscCall(PetscFree(mmdata->own));
6883   PetscCall(PetscFree(mmdata->off[0]));
6884   PetscCall(PetscFree(mmdata->off));
6885   PetscCall(PetscFree(mmdata));
6886   PetscFunctionReturn(0);
6887 }
6888 
6889 /* Copy selected n entries with indices in idx[] of A to v[].
6890    If idx is NULL, copy the whole data array of A to v[]
6891  */
6892 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6893 {
6894   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6895 
6896   PetscFunctionBegin;
6897   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f));
6898   if (f) {
6899     PetscCall((*f)(A,n,idx,v));
6900   } else {
6901     const PetscScalar *vv;
6902 
6903     PetscCall(MatSeqAIJGetArrayRead(A,&vv));
6904     if (n && idx) {
6905       PetscScalar    *w = v;
6906       const PetscInt *oi = idx;
6907       PetscInt       j;
6908 
6909       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6910     } else {
6911       PetscCall(PetscArraycpy(v,vv,n));
6912     }
6913     PetscCall(MatSeqAIJRestoreArrayRead(A,&vv));
6914   }
6915   PetscFunctionReturn(0);
6916 }
6917 
6918 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6919 {
6920   MatMatMPIAIJBACKEND *mmdata;
6921   PetscInt            i,n_d,n_o;
6922 
6923   PetscFunctionBegin;
6924   MatCheckProduct(C,1);
6925   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6926   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6927   if (!mmdata->reusesym) { /* update temporary matrices */
6928     if (mmdata->P_oth) {
6929       PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
6930     }
6931     if (mmdata->Bloc) {
6932       PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc));
6933     }
6934   }
6935   mmdata->reusesym = PETSC_FALSE;
6936 
6937   for (i = 0; i < mmdata->cp; i++) {
6938     PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6939     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
6940   }
6941   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6942     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6943 
6944     if (mmdata->mptmp[i]) continue;
6945     if (noff) {
6946       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6947 
6948       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o));
6949       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d));
6950       n_o += noff;
6951       n_d += nown;
6952     } else {
6953       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6954 
6955       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d));
6956       n_d += mm->nz;
6957     }
6958   }
6959   if (mmdata->hasoffproc) { /* offprocess insertion */
6960     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6961     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6962   }
6963   PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES));
6964   PetscFunctionReturn(0);
6965 }
6966 
6967 /* Support for Pt * A, A * P, or Pt * A * P */
6968 #define MAX_NUMBER_INTERMEDIATE 4
6969 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6970 {
6971   Mat_Product            *product = C->product;
6972   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6973   Mat_MPIAIJ             *a,*p;
6974   MatMatMPIAIJBACKEND    *mmdata;
6975   ISLocalToGlobalMapping P_oth_l2g = NULL;
6976   IS                     glob = NULL;
6977   const char             *prefix;
6978   char                   pprefix[256];
6979   const PetscInt         *globidx,*P_oth_idx;
6980   PetscInt               i,j,cp,m,n,M,N,*coo_i,*coo_j;
6981   PetscCount             ncoo,ncoo_d,ncoo_o,ncoo_oown;
6982   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6983                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
6984                                                                                         /* a base offset; type-2: sparse with a local to global map table */
6985   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6986 
6987   MatProductType         ptype;
6988   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6989   PetscMPIInt            size;
6990 
6991   PetscFunctionBegin;
6992   MatCheckProduct(C,1);
6993   PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6994   ptype = product->type;
6995   if (product->A->symmetric && ptype == MATPRODUCT_AtB) {
6996     ptype = MATPRODUCT_AB;
6997     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6998   }
6999   switch (ptype) {
7000   case MATPRODUCT_AB:
7001     A = product->A;
7002     P = product->B;
7003     m = A->rmap->n;
7004     n = P->cmap->n;
7005     M = A->rmap->N;
7006     N = P->cmap->N;
7007     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7008     break;
7009   case MATPRODUCT_AtB:
7010     P = product->A;
7011     A = product->B;
7012     m = P->cmap->n;
7013     n = A->cmap->n;
7014     M = P->cmap->N;
7015     N = A->cmap->N;
7016     hasoffproc = PETSC_TRUE;
7017     break;
7018   case MATPRODUCT_PtAP:
7019     A = product->A;
7020     P = product->B;
7021     m = P->cmap->n;
7022     n = P->cmap->n;
7023     M = P->cmap->N;
7024     N = P->cmap->N;
7025     hasoffproc = PETSC_TRUE;
7026     break;
7027   default:
7028     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
7029   }
7030   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size));
7031   if (size == 1) hasoffproc = PETSC_FALSE;
7032 
7033   /* defaults */
7034   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
7035     mp[i]    = NULL;
7036     mptmp[i] = PETSC_FALSE;
7037     rmapt[i] = -1;
7038     cmapt[i] = -1;
7039     rmapa[i] = NULL;
7040     cmapa[i] = NULL;
7041   }
7042 
7043   /* customization */
7044   PetscCall(PetscNew(&mmdata));
7045   mmdata->reusesym = product->api_user;
7046   if (ptype == MATPRODUCT_AB) {
7047     if (product->api_user) {
7048       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");
7049       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
7050       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7051       PetscOptionsEnd();
7052     } else {
7053       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");
7054       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
7055       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7056       PetscOptionsEnd();
7057     }
7058   } else if (ptype == MATPRODUCT_PtAP) {
7059     if (product->api_user) {
7060       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");
7061       PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7062       PetscOptionsEnd();
7063     } else {
7064       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
7065       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7066       PetscOptionsEnd();
7067     }
7068   }
7069   a = (Mat_MPIAIJ*)A->data;
7070   p = (Mat_MPIAIJ*)P->data;
7071   PetscCall(MatSetSizes(C,m,n,M,N));
7072   PetscCall(PetscLayoutSetUp(C->rmap));
7073   PetscCall(PetscLayoutSetUp(C->cmap));
7074   PetscCall(MatSetType(C,((PetscObject)A)->type_name));
7075   PetscCall(MatGetOptionsPrefix(C,&prefix));
7076 
7077   cp   = 0;
7078   switch (ptype) {
7079   case MATPRODUCT_AB: /* A * P */
7080     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
7081 
7082     /* A_diag * P_local (merged or not) */
7083     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7084       /* P is product->B */
7085       PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7086       PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
7087       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7088       PetscCall(MatProductSetFill(mp[cp],product->fill));
7089       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7090       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7091       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7092       mp[cp]->product->api_user = product->api_user;
7093       PetscCall(MatProductSetFromOptions(mp[cp]));
7094       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7095       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7096       PetscCall(ISGetIndices(glob,&globidx));
7097       rmapt[cp] = 1;
7098       cmapt[cp] = 2;
7099       cmapa[cp] = globidx;
7100       mptmp[cp] = PETSC_FALSE;
7101       cp++;
7102     } else { /* A_diag * P_diag and A_diag * P_off */
7103       PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp]));
7104       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7105       PetscCall(MatProductSetFill(mp[cp],product->fill));
7106       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7107       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7108       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7109       mp[cp]->product->api_user = product->api_user;
7110       PetscCall(MatProductSetFromOptions(mp[cp]));
7111       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7112       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7113       rmapt[cp] = 1;
7114       cmapt[cp] = 1;
7115       mptmp[cp] = PETSC_FALSE;
7116       cp++;
7117       PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp]));
7118       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7119       PetscCall(MatProductSetFill(mp[cp],product->fill));
7120       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7121       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7122       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7123       mp[cp]->product->api_user = product->api_user;
7124       PetscCall(MatProductSetFromOptions(mp[cp]));
7125       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7126       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7127       rmapt[cp] = 1;
7128       cmapt[cp] = 2;
7129       cmapa[cp] = p->garray;
7130       mptmp[cp] = PETSC_FALSE;
7131       cp++;
7132     }
7133 
7134     /* A_off * P_other */
7135     if (mmdata->P_oth) {
7136       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */
7137       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7138       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7139       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7140       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7141       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7142       PetscCall(MatProductSetFill(mp[cp],product->fill));
7143       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7144       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7145       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7146       mp[cp]->product->api_user = product->api_user;
7147       PetscCall(MatProductSetFromOptions(mp[cp]));
7148       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7149       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7150       rmapt[cp] = 1;
7151       cmapt[cp] = 2;
7152       cmapa[cp] = P_oth_idx;
7153       mptmp[cp] = PETSC_FALSE;
7154       cp++;
7155     }
7156     break;
7157 
7158   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7159     /* A is product->B */
7160     PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7161     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7162       PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]));
7163       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7164       PetscCall(MatProductSetFill(mp[cp],product->fill));
7165       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7166       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7167       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7168       mp[cp]->product->api_user = product->api_user;
7169       PetscCall(MatProductSetFromOptions(mp[cp]));
7170       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7171       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7172       PetscCall(ISGetIndices(glob,&globidx));
7173       rmapt[cp] = 2;
7174       rmapa[cp] = globidx;
7175       cmapt[cp] = 2;
7176       cmapa[cp] = globidx;
7177       mptmp[cp] = PETSC_FALSE;
7178       cp++;
7179     } else {
7180       PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]));
7181       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7182       PetscCall(MatProductSetFill(mp[cp],product->fill));
7183       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7184       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7185       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7186       mp[cp]->product->api_user = product->api_user;
7187       PetscCall(MatProductSetFromOptions(mp[cp]));
7188       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7189       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7190       PetscCall(ISGetIndices(glob,&globidx));
7191       rmapt[cp] = 1;
7192       cmapt[cp] = 2;
7193       cmapa[cp] = globidx;
7194       mptmp[cp] = PETSC_FALSE;
7195       cp++;
7196       PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]));
7197       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7198       PetscCall(MatProductSetFill(mp[cp],product->fill));
7199       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7200       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7201       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7202       mp[cp]->product->api_user = product->api_user;
7203       PetscCall(MatProductSetFromOptions(mp[cp]));
7204       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7205       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7206       rmapt[cp] = 2;
7207       rmapa[cp] = p->garray;
7208       cmapt[cp] = 2;
7209       cmapa[cp] = globidx;
7210       mptmp[cp] = PETSC_FALSE;
7211       cp++;
7212     }
7213     break;
7214   case MATPRODUCT_PtAP:
7215     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
7216     /* P is product->B */
7217     PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7218     PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
7219     PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP));
7220     PetscCall(MatProductSetFill(mp[cp],product->fill));
7221     PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7222     PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7223     PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7224     mp[cp]->product->api_user = product->api_user;
7225     PetscCall(MatProductSetFromOptions(mp[cp]));
7226     PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7227     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7228     PetscCall(ISGetIndices(glob,&globidx));
7229     rmapt[cp] = 2;
7230     rmapa[cp] = globidx;
7231     cmapt[cp] = 2;
7232     cmapa[cp] = globidx;
7233     mptmp[cp] = PETSC_FALSE;
7234     cp++;
7235     if (mmdata->P_oth) {
7236       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g));
7237       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7238       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7239       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7240       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7241       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7242       PetscCall(MatProductSetFill(mp[cp],product->fill));
7243       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7244       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7245       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7246       mp[cp]->product->api_user = product->api_user;
7247       PetscCall(MatProductSetFromOptions(mp[cp]));
7248       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7249       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7250       mptmp[cp] = PETSC_TRUE;
7251       cp++;
7252       PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]));
7253       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7254       PetscCall(MatProductSetFill(mp[cp],product->fill));
7255       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7256       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7257       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7258       mp[cp]->product->api_user = product->api_user;
7259       PetscCall(MatProductSetFromOptions(mp[cp]));
7260       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7261       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7262       rmapt[cp] = 2;
7263       rmapa[cp] = globidx;
7264       cmapt[cp] = 2;
7265       cmapa[cp] = P_oth_idx;
7266       mptmp[cp] = PETSC_FALSE;
7267       cp++;
7268     }
7269     break;
7270   default:
7271     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
7272   }
7273   /* sanity check */
7274   if (size > 1) for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i);
7275 
7276   PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp));
7277   for (i = 0; i < cp; i++) {
7278     mmdata->mp[i]    = mp[i];
7279     mmdata->mptmp[i] = mptmp[i];
7280   }
7281   mmdata->cp = cp;
7282   C->product->data       = mmdata;
7283   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7284   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7285 
7286   /* memory type */
7287   mmdata->mtype = PETSC_MEMTYPE_HOST;
7288   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,""));
7289   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,""));
7290   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7291   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7292 
7293   /* prepare coo coordinates for values insertion */
7294 
7295   /* count total nonzeros of those intermediate seqaij Mats
7296     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7297     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7298     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7299   */
7300   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7301     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7302     if (mptmp[cp]) continue;
7303     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7304       const PetscInt *rmap = rmapa[cp];
7305       const PetscInt mr = mp[cp]->rmap->n;
7306       const PetscInt rs = C->rmap->rstart;
7307       const PetscInt re = C->rmap->rend;
7308       const PetscInt *ii  = mm->i;
7309       for (i = 0; i < mr; i++) {
7310         const PetscInt gr = rmap[i];
7311         const PetscInt nz = ii[i+1] - ii[i];
7312         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7313         else ncoo_oown += nz; /* this row is local */
7314       }
7315     } else ncoo_d += mm->nz;
7316   }
7317 
7318   /*
7319     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7320 
7321     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7322 
7323     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
7324 
7325     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7326     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7327     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7328 
7329     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7330     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7331   */
7332   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */
7333   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own));
7334 
7335   /* gather (i,j) of nonzeros inserted by remote procs */
7336   if (hasoffproc) {
7337     PetscSF  msf;
7338     PetscInt ncoo2,*coo_i2,*coo_j2;
7339 
7340     PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0]));
7341     PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0]));
7342     PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */
7343 
7344     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7345       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7346       PetscInt   *idxoff = mmdata->off[cp];
7347       PetscInt   *idxown = mmdata->own[cp];
7348       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7349         const PetscInt *rmap = rmapa[cp];
7350         const PetscInt *cmap = cmapa[cp];
7351         const PetscInt *ii  = mm->i;
7352         PetscInt       *coi = coo_i + ncoo_o;
7353         PetscInt       *coj = coo_j + ncoo_o;
7354         const PetscInt mr = mp[cp]->rmap->n;
7355         const PetscInt rs = C->rmap->rstart;
7356         const PetscInt re = C->rmap->rend;
7357         const PetscInt cs = C->cmap->rstart;
7358         for (i = 0; i < mr; i++) {
7359           const PetscInt *jj = mm->j + ii[i];
7360           const PetscInt gr  = rmap[i];
7361           const PetscInt nz  = ii[i+1] - ii[i];
7362           if (gr < rs || gr >= re) { /* this is an offproc row */
7363             for (j = ii[i]; j < ii[i+1]; j++) {
7364               *coi++ = gr;
7365               *idxoff++ = j;
7366             }
7367             if (!cmapt[cp]) { /* already global */
7368               for (j = 0; j < nz; j++) *coj++ = jj[j];
7369             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7370               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7371             } else { /* offdiag */
7372               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7373             }
7374             ncoo_o += nz;
7375           } else { /* this is a local row */
7376             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
7377           }
7378         }
7379       }
7380       mmdata->off[cp + 1] = idxoff;
7381       mmdata->own[cp + 1] = idxown;
7382     }
7383 
7384     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7385     PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i));
7386     PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf));
7387     PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL));
7388     ncoo = ncoo_d + ncoo_oown + ncoo2;
7389     PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2));
7390     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7391     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown));
7392     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7393     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7394     PetscCall(PetscFree2(coo_i,coo_j));
7395     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7396     PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w));
7397     coo_i = coo_i2;
7398     coo_j = coo_j2;
7399   } else { /* no offproc values insertion */
7400     ncoo = ncoo_d;
7401     PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j));
7402 
7403     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7404     PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER));
7405     PetscCall(PetscSFSetUp(mmdata->sf));
7406   }
7407   mmdata->hasoffproc = hasoffproc;
7408 
7409   /* gather (i,j) of nonzeros inserted locally */
7410   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7411     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
7412     PetscInt       *coi = coo_i + ncoo_d;
7413     PetscInt       *coj = coo_j + ncoo_d;
7414     const PetscInt *jj  = mm->j;
7415     const PetscInt *ii  = mm->i;
7416     const PetscInt *cmap = cmapa[cp];
7417     const PetscInt *rmap = rmapa[cp];
7418     const PetscInt mr = mp[cp]->rmap->n;
7419     const PetscInt rs = C->rmap->rstart;
7420     const PetscInt re = C->rmap->rend;
7421     const PetscInt cs = C->cmap->rstart;
7422 
7423     if (mptmp[cp]) continue;
7424     if (rmapt[cp] == 1) { /* consecutive rows */
7425       /* fill coo_i */
7426       for (i = 0; i < mr; i++) {
7427         const PetscInt gr = i + rs;
7428         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
7429       }
7430       /* fill coo_j */
7431       if (!cmapt[cp]) { /* type-0, already global */
7432         PetscCall(PetscArraycpy(coj,jj,mm->nz));
7433       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
7434         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7435       } else { /* type-2, local to global for sparse columns */
7436         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7437       }
7438       ncoo_d += mm->nz;
7439     } else if (rmapt[cp] == 2) { /* sparse rows */
7440       for (i = 0; i < mr; i++) {
7441         const PetscInt *jj = mm->j + ii[i];
7442         const PetscInt gr  = rmap[i];
7443         const PetscInt nz  = ii[i+1] - ii[i];
7444         if (gr >= rs && gr < re) { /* local rows */
7445           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
7446           if (!cmapt[cp]) { /* type-0, already global */
7447             for (j = 0; j < nz; j++) *coj++ = jj[j];
7448           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7449             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7450           } else { /* type-2, local to global for sparse columns */
7451             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7452           }
7453           ncoo_d += nz;
7454         }
7455       }
7456     }
7457   }
7458   if (glob) {
7459     PetscCall(ISRestoreIndices(glob,&globidx));
7460   }
7461   PetscCall(ISDestroy(&glob));
7462   if (P_oth_l2g) {
7463     PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx));
7464   }
7465   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7466   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7467   PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v));
7468 
7469   /* preallocate with COO data */
7470   PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j));
7471   PetscCall(PetscFree2(coo_i,coo_j));
7472   PetscFunctionReturn(0);
7473 }
7474 
7475 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7476 {
7477   Mat_Product *product = mat->product;
7478 #if defined(PETSC_HAVE_DEVICE)
7479   PetscBool    match   = PETSC_FALSE;
7480   PetscBool    usecpu  = PETSC_FALSE;
7481 #else
7482   PetscBool    match   = PETSC_TRUE;
7483 #endif
7484 
7485   PetscFunctionBegin;
7486   MatCheckProduct(mat,1);
7487 #if defined(PETSC_HAVE_DEVICE)
7488   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7489     PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match));
7490   }
7491   if (match) { /* we can always fallback to the CPU if requested */
7492     switch (product->type) {
7493     case MATPRODUCT_AB:
7494       if (product->api_user) {
7495         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");
7496         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7497         PetscOptionsEnd();
7498       } else {
7499         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");
7500         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7501         PetscOptionsEnd();
7502       }
7503       break;
7504     case MATPRODUCT_AtB:
7505       if (product->api_user) {
7506         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");
7507         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7508         PetscOptionsEnd();
7509       } else {
7510         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");
7511         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7512         PetscOptionsEnd();
7513       }
7514       break;
7515     case MATPRODUCT_PtAP:
7516       if (product->api_user) {
7517         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");
7518         PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7519         PetscOptionsEnd();
7520       } else {
7521         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");
7522         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7523         PetscOptionsEnd();
7524       }
7525       break;
7526     default:
7527       break;
7528     }
7529     match = (PetscBool)!usecpu;
7530   }
7531 #endif
7532   if (match) {
7533     switch (product->type) {
7534     case MATPRODUCT_AB:
7535     case MATPRODUCT_AtB:
7536     case MATPRODUCT_PtAP:
7537       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7538       break;
7539     default:
7540       break;
7541     }
7542   }
7543   /* fallback to MPIAIJ ops */
7544   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7545   PetscFunctionReturn(0);
7546 }
7547 
7548 /*
7549    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7550 
7551    n - the number of block indices in cc[]
7552    cc - the block indices (must be large enough to contain the indices)
7553 */
7554 static inline PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc)
7555 {
7556   PetscInt       cnt = -1,nidx,j;
7557   const PetscInt *idx;
7558 
7559   PetscFunctionBegin;
7560   PetscCall(MatGetRow(Amat,row,&nidx,&idx,NULL));
7561   if (nidx) {
7562     cnt = 0;
7563     cc[cnt] = idx[0]/bs;
7564     for (j=1; j<nidx; j++) {
7565       if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs;
7566     }
7567   }
7568   PetscCall(MatRestoreRow(Amat,row,&nidx,&idx,NULL));
7569   *n = cnt+1;
7570   PetscFunctionReturn(0);
7571 }
7572 
7573 /*
7574     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7575 
7576     ncollapsed - the number of block indices
7577     collapsed - the block indices (must be large enough to contain the indices)
7578 */
7579 static inline PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed)
7580 {
7581   PetscInt       i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp;
7582 
7583   PetscFunctionBegin;
7584   PetscCall(MatCollapseRow(Amat,start,bs,&nprev,cprev));
7585   for (i=start+1; i<start+bs; i++) {
7586     PetscCall(MatCollapseRow(Amat,i,bs,&ncur,ccur));
7587     PetscCall(PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged));
7588     cprevtmp = cprev; cprev = merged; merged = cprevtmp;
7589   }
7590   *ncollapsed = nprev;
7591   if (collapsed) *collapsed  = cprev;
7592   PetscFunctionReturn(0);
7593 }
7594 
7595 /* -------------------------------------------------------------------------- */
7596 /*
7597  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7598 
7599  Input Parameter:
7600  . Amat - matrix
7601  - symmetrize - make the result symmetric
7602  + scale - scale with diagonal
7603 
7604  Output Parameter:
7605  . a_Gmat - output scalar graph >= 0
7606 
7607  */
7608 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, Mat *a_Gmat)
7609 {
7610   PetscInt       Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs;
7611   MPI_Comm       comm;
7612   Mat            Gmat;
7613   PetscBool      ismpiaij,isseqaij;
7614   Mat            a, b, c;
7615   MatType        jtype;
7616 
7617   PetscFunctionBegin;
7618   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
7619   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7620   PetscCall(MatGetSize(Amat, &MM, &NN));
7621   PetscCall(MatGetBlockSize(Amat, &bs));
7622   nloc = (Iend-Istart)/bs;
7623 
7624   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij));
7625   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij));
7626   PetscCheck(isseqaij || ismpiaij,comm,PETSC_ERR_USER,"Require (MPI)AIJ matrix type");
7627 
7628   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7629   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7630      implementation */
7631   if (bs > 1) {
7632     PetscCall(MatGetType(Amat,&jtype));
7633     PetscCall(MatCreate(comm, &Gmat));
7634     PetscCall(MatSetType(Gmat, jtype));
7635     PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE));
7636     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7637     if (isseqaij || ((Mat_MPIAIJ*)Amat->data)->garray) {
7638       PetscInt  *d_nnz, *o_nnz;
7639       MatScalar *aa,val,AA[4096];
7640       PetscInt  *aj,*ai,AJ[4096],nc;
7641       if (isseqaij) { a = Amat; b = NULL; }
7642       else {
7643         Mat_MPIAIJ *d = (Mat_MPIAIJ*)Amat->data;
7644         a = d->A; b = d->B;
7645       }
7646       PetscCall(PetscInfo(Amat,"New bs>1 Graph. nloc=%" PetscInt_FMT "\n",nloc));
7647       PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz));
7648       for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
7649         PetscInt       *nnz = (c==a) ? d_nnz : o_nnz, nmax=0;
7650         const PetscInt *cols;
7651         for (PetscInt brow=0,jj,ok=1,j0; brow < nloc*bs; brow += bs) { // block rows
7652           PetscCall(MatGetRow(c,brow,&jj,&cols,NULL));
7653           nnz[brow/bs] = jj/bs;
7654           if (jj%bs) ok = 0;
7655           if (cols) j0 = cols[0];
7656           else j0 = -1;
7657           PetscCall(MatRestoreRow(c,brow,&jj,&cols,NULL));
7658           if (nnz[brow/bs]>nmax) nmax = nnz[brow/bs];
7659           for (PetscInt ii=1; ii < bs && nnz[brow/bs] ; ii++) { // check for non-dense blocks
7660             PetscCall(MatGetRow(c,brow+ii,&jj,&cols,NULL));
7661             if (jj%bs) ok = 0;
7662             if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0;
7663             if (nnz[brow/bs] != jj/bs) ok = 0;
7664             PetscCall(MatRestoreRow(c,brow+ii,&jj,&cols,NULL));
7665           }
7666           if (!ok) {
7667             PetscCall(PetscFree2(d_nnz,o_nnz));
7668             goto old_bs;
7669           }
7670         }
7671         PetscCheck(nmax<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer %" PetscInt_FMT " too small 4096.",nmax);
7672       }
7673       PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz));
7674       PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz));
7675       PetscCall(PetscFree2(d_nnz,o_nnz));
7676       // diag
7677       for (PetscInt brow=0,n,grow; brow < nloc*bs; brow += bs) { // block rows
7678         Mat_SeqAIJ *aseq  = (Mat_SeqAIJ*)a->data;
7679         ai = aseq->i;
7680         n  = ai[brow+1] - ai[brow];
7681         aj = aseq->j + ai[brow];
7682         for (int k=0; k<n; k += bs) { // block columns
7683           AJ[k/bs] = aj[k]/bs + Istart/bs; // diag starts at (Istart,Istart)
7684           val = 0;
7685           for (int ii=0; ii<bs; ii++) { // rows in block
7686             aa = aseq->a + ai[brow+ii] + k;
7687             for (int jj=0; jj<bs; jj++) { // columns in block
7688               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7689             }
7690           }
7691           AA[k/bs] = val;
7692         }
7693         grow = Istart/bs + brow/bs;
7694         PetscCall(MatSetValues(Gmat,1,&grow,n/bs,AJ,AA,INSERT_VALUES));
7695       }
7696       // off-diag
7697       if (ismpiaij) {
7698         Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)Amat->data;
7699         const PetscScalar *vals;
7700         const PetscInt    *cols, *garray = aij->garray;
7701         PetscCheck(garray,PETSC_COMM_SELF,PETSC_ERR_USER,"No garray ?");
7702         for (PetscInt brow=0,grow; brow < nloc*bs; brow += bs) { // block rows
7703           PetscCall(MatGetRow(b,brow,&ncols,&cols,NULL));
7704           for (int k=0,cidx=0 ; k < ncols ; k += bs, cidx++) {
7705             AA[k/bs] = 0;
7706             AJ[cidx] = garray[cols[k]]/bs;
7707           }
7708           nc = ncols/bs;
7709           PetscCall(MatRestoreRow(b,brow,&ncols,&cols,NULL));
7710           for (int ii=0; ii<bs; ii++) { // rows in block
7711             PetscCall(MatGetRow(b,brow+ii,&ncols,&cols,&vals));
7712             for (int k=0; k<ncols; k += bs) {
7713               for (int jj=0; jj<bs; jj++) { // cols in block
7714                 AA[k/bs] += PetscAbs(PetscRealPart(vals[k+jj]));
7715               }
7716             }
7717             PetscCall(MatRestoreRow(b,brow+ii,&ncols,&cols,&vals));
7718           }
7719           grow = Istart/bs + brow/bs;
7720           PetscCall(MatSetValues(Gmat,1,&grow,nc,AJ,AA,INSERT_VALUES));
7721         }
7722       }
7723       PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY));
7724       PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY));
7725     } else {
7726       const PetscScalar *vals;
7727       const PetscInt    *idx;
7728       PetscInt          *d_nnz, *o_nnz,*w0,*w1,*w2;
7729       old_bs:
7730       /*
7731        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7732        */
7733       PetscCall(PetscInfo(Amat,"OLD bs>1 CreateGraph\n"));
7734       PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz));
7735       if (isseqaij) {
7736         PetscInt max_d_nnz;
7737         /*
7738          Determine exact preallocation count for (sequential) scalar matrix
7739          */
7740         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz));
7741         max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
7742         PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2));
7743         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) {
7744           PetscCall(MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL));
7745         }
7746         PetscCall(PetscFree3(w0,w1,w2));
7747       } else if (ismpiaij) {
7748         Mat            Daij,Oaij;
7749         const PetscInt *garray;
7750         PetscInt       max_d_nnz;
7751         PetscCall(MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray));
7752         /*
7753          Determine exact preallocation count for diagonal block portion of scalar matrix
7754          */
7755         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz));
7756         max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
7757         PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2));
7758         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7759           PetscCall(MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL));
7760         }
7761         PetscCall(PetscFree3(w0,w1,w2));
7762         /*
7763          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7764          */
7765         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7766           o_nnz[jj] = 0;
7767           for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */
7768             PetscCall(MatGetRow(Oaij,Ii+kk,&ncols,NULL,NULL));
7769             o_nnz[jj] += ncols;
7770             PetscCall(MatRestoreRow(Oaij,Ii+kk,&ncols,NULL,NULL));
7771           }
7772           if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc;
7773         }
7774       } else SETERRQ(comm,PETSC_ERR_USER,"Require AIJ matrix type");
7775       /* get scalar copy (norms) of matrix */
7776       PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz));
7777       PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz));
7778       PetscCall(PetscFree2(d_nnz,o_nnz));
7779       for (Ii = Istart; Ii < Iend; Ii++) {
7780         PetscInt dest_row = Ii/bs;
7781         PetscCall(MatGetRow(Amat,Ii,&ncols,&idx,&vals));
7782         for (jj=0; jj<ncols; jj++) {
7783           PetscInt    dest_col = idx[jj]/bs;
7784           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7785           PetscCall(MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES));
7786         }
7787         PetscCall(MatRestoreRow(Amat,Ii,&ncols,&idx,&vals));
7788       }
7789       PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY));
7790       PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY));
7791     }
7792   } else {
7793     /* TODO GPU: optimization proposal, each class provides fast implementation of this
7794      procedure via MatAbs API */
7795     /* just copy scalar matrix & abs() */
7796     PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7797     if (isseqaij) { a = Gmat; b = NULL; }
7798     else {
7799       Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data;
7800       a = d->A; b = d->B;
7801     }
7802     /* abs */
7803     for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
7804       MatInfo     info;
7805       PetscScalar *avals;
7806       PetscCall(MatGetInfo(c,MAT_LOCAL,&info));
7807       PetscCall(MatSeqAIJGetArray(c,&avals));
7808       for (int jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7809       PetscCall(MatSeqAIJRestoreArray(c,&avals));
7810     }
7811   }
7812   if (symmetrize) {
7813     PetscBool issym;
7814     PetscCall(MatGetOption(Amat,MAT_SYMMETRIC,&issym));
7815     if (!issym) {
7816       Mat matTrans;
7817       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
7818       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
7819       PetscCall(MatDestroy(&matTrans));
7820     }
7821     PetscCall(MatSetOption(Gmat,MAT_SYMMETRIC,PETSC_TRUE));
7822   } else {
7823     PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
7824   }
7825   if (scale) {
7826     /* scale c for all diagonal values = 1 or -1 */
7827     Vec               diag;
7828     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
7829     PetscCall(MatGetDiagonal(Gmat, diag));
7830     PetscCall(VecReciprocal(diag));
7831     PetscCall(VecSqrtAbs(diag));
7832     PetscCall(MatDiagonalScale(Gmat, diag, diag));
7833     PetscCall(VecDestroy(&diag));
7834   }
7835   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
7836   *a_Gmat = Gmat;
7837   PetscFunctionReturn(0);
7838 }
7839 
7840 /* -------------------------------------------------------------------------- */
7841 /*@C
7842    MatFilter_AIJ - filter values with small absolute values
7843      With vfilter < 0 does nothing so should not be called.
7844 
7845    Collective on Mat
7846 
7847    Input Parameters:
7848 +   Gmat - the graph
7849 .   vfilter - threshold parameter [0,1)
7850 
7851  Output Parameter:
7852  .  filteredG - output filtered scalar graph
7853 
7854    Level: developer
7855 
7856    Notes:
7857     This is called before graph coarsers are called.
7858     This could go into Mat, move 'symm' to GAMG
7859 
7860 .seealso: `PCGAMGSetThreshold()`
7861 @*/
7862 PETSC_INTERN PetscErrorCode MatFilter_AIJ(Mat Gmat,PetscReal vfilter, Mat *filteredG)
7863 {
7864   PetscInt          Istart,Iend,ncols,nnz0,nnz1, NN, MM, nloc;
7865   Mat               tGmat;
7866   MPI_Comm          comm;
7867   const PetscScalar *vals;
7868   const PetscInt    *idx;
7869   PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols=0;
7870   MatScalar         *AA; // this is checked in graph
7871   PetscBool         isseqaij;
7872   Mat               a, b, c;
7873   MatType           jtype;
7874 
7875   PetscFunctionBegin;
7876   PetscCall(PetscObjectGetComm((PetscObject)Gmat,&comm));
7877   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isseqaij));
7878   PetscCall(MatGetType(Gmat,&jtype));
7879   PetscCall(MatCreate(comm, &tGmat));
7880   PetscCall(MatSetType(tGmat, jtype));
7881 
7882   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
7883                Also, if the matrix is symmetric, can we skip this
7884                operation? It can be very expensive on large matrices. */
7885 
7886   // global sizes
7887   PetscCall(MatGetSize(Gmat, &MM, &NN));
7888   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
7889   nloc = Iend - Istart;
7890   PetscCall(PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz));
7891   if (isseqaij) { a = Gmat; b = NULL; }
7892   else {
7893     Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data;
7894     a = d->A; b = d->B;
7895     garray = d->garray;
7896   }
7897   /* Determine upper bound on non-zeros needed in new filtered matrix */
7898   for (PetscInt row=0; row < nloc; row++) {
7899     PetscCall(MatGetRow(a,row,&ncols,NULL,NULL));
7900     d_nnz[row] = ncols;
7901     if (ncols>maxcols) maxcols=ncols;
7902     PetscCall(MatRestoreRow(a,row,&ncols,NULL,NULL));
7903   }
7904   if (b) {
7905     for (PetscInt row=0; row < nloc; row++) {
7906       PetscCall(MatGetRow(b,row,&ncols,NULL,NULL));
7907       o_nnz[row] = ncols;
7908       if (ncols>maxcols) maxcols=ncols;
7909       PetscCall(MatRestoreRow(b,row,&ncols,NULL,NULL));
7910     }
7911   }
7912   PetscCall(MatSetSizes(tGmat,nloc,nloc,MM,MM));
7913   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
7914   PetscCall(MatSeqAIJSetPreallocation(tGmat,0,d_nnz));
7915   PetscCall(MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz));
7916   PetscCall(MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
7917   PetscCall(PetscFree2(d_nnz,o_nnz));
7918   //
7919   PetscCall(PetscMalloc2(maxcols, &AA,maxcols, &AJ));
7920   nnz0 = nnz1 = 0;
7921   for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
7922     for (PetscInt row=0, grow=Istart, ncol_row, jj ; row < nloc; row++,grow++) {
7923       PetscCall(MatGetRow(c,row,&ncols,&idx,&vals));
7924       for (ncol_row=jj=0; jj<ncols; jj++,nnz0++) {
7925         PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
7926         if (PetscRealPart(sv) > vfilter) {
7927           nnz1++;
7928           PetscInt cid = idx[jj] + Istart; //diag
7929           if (c!=a) cid = garray[idx[jj]];
7930           AA[ncol_row] = vals[jj];
7931           AJ[ncol_row] = cid;
7932           ncol_row++;
7933         }
7934       }
7935       PetscCall(MatRestoreRow(c,row,&ncols,&idx,&vals));
7936       PetscCall(MatSetValues(tGmat,1,&grow,ncol_row,AJ,AA,INSERT_VALUES));
7937     }
7938   }
7939   PetscCall(PetscFree2(AA,AJ));
7940   PetscCall(MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY));
7941   PetscCall(MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY));
7942   PetscCall(MatPropagateSymmetryOptions(Gmat,tGmat)); /* Normal Mat options are not relevant ? */
7943 
7944   PetscCall(PetscInfo(tGmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n",
7945                       (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, (double)vfilter,
7946                       (!nloc) ? 1. : (double)nnz0/(double)nloc,MM,(int)maxcols));
7947 
7948   *filteredG = tGmat;
7949   PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view"));
7950   PetscFunctionReturn(0);
7951 }
7952 
7953 /*
7954     Special version for direct calls from Fortran
7955 */
7956 #include <petsc/private/fortranimpl.h>
7957 
7958 /* Change these macros so can be used in void function */
7959 /* Identical to PetscCallVoid, except it assigns to *_ierr */
7960 #undef  PetscCall
7961 #define PetscCall(...) do {                                                                    \
7962     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__;                                              \
7963     if (PetscUnlikely(ierr_msv_mpiaij)) {                                                      \
7964       *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \
7965       return;                                                                                  \
7966     }                                                                                          \
7967   } while (0)
7968 
7969 #undef SETERRQ
7970 #define SETERRQ(comm,ierr,...) do {                                                            \
7971     *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \
7972     return;                                                                                    \
7973   } while (0)
7974 
7975 #if defined(PETSC_HAVE_FORTRAN_CAPS)
7976 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
7977 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
7978 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
7979 #else
7980 #endif
7981 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
7982 {
7983   Mat          mat  = *mmat;
7984   PetscInt     m    = *mm, n = *mn;
7985   InsertMode   addv = *maddv;
7986   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ*)mat->data;
7987   PetscScalar  value;
7988 
7989   MatCheckPreallocated(mat,1);
7990   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
7991   else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
7992   {
7993     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
7994     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
7995     PetscBool roworiented = aij->roworiented;
7996 
7997     /* Some Variables required in the macro */
7998     Mat        A                    = aij->A;
7999     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
8000     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
8001     MatScalar  *aa;
8002     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8003     Mat        B                    = aij->B;
8004     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
8005     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
8006     MatScalar  *ba;
8007     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8008      * cannot use "#if defined" inside a macro. */
8009     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8010 
8011     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
8012     PetscInt  nonew = a->nonew;
8013     MatScalar *ap1,*ap2;
8014 
8015     PetscFunctionBegin;
8016     PetscCall(MatSeqAIJGetArray(A,&aa));
8017     PetscCall(MatSeqAIJGetArray(B,&ba));
8018     for (i=0; i<m; i++) {
8019       if (im[i] < 0) continue;
8020       PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
8021       if (im[i] >= rstart && im[i] < rend) {
8022         row      = im[i] - rstart;
8023         lastcol1 = -1;
8024         rp1      = aj + ai[row];
8025         ap1      = aa + ai[row];
8026         rmax1    = aimax[row];
8027         nrow1    = ailen[row];
8028         low1     = 0;
8029         high1    = nrow1;
8030         lastcol2 = -1;
8031         rp2      = bj + bi[row];
8032         ap2      = ba + bi[row];
8033         rmax2    = bimax[row];
8034         nrow2    = bilen[row];
8035         low2     = 0;
8036         high2    = nrow2;
8037 
8038         for (j=0; j<n; j++) {
8039           if (roworiented) value = v[i*n+j];
8040           else value = v[i+j*m];
8041           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8042           if (in[j] >= cstart && in[j] < cend) {
8043             col = in[j] - cstart;
8044             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
8045           } else if (in[j] < 0) continue;
8046           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8047             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
8048             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
8049           } else {
8050             if (mat->was_assembled) {
8051               if (!aij->colmap) {
8052                 PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8053               }
8054 #if defined(PETSC_USE_CTABLE)
8055               PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col));
8056               col--;
8057 #else
8058               col = aij->colmap[in[j]] - 1;
8059 #endif
8060               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
8061                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8062                 col  =  in[j];
8063                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8064                 B        = aij->B;
8065                 b        = (Mat_SeqAIJ*)B->data;
8066                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
8067                 rp2      = bj + bi[row];
8068                 ap2      = ba + bi[row];
8069                 rmax2    = bimax[row];
8070                 nrow2    = bilen[row];
8071                 low2     = 0;
8072                 high2    = nrow2;
8073                 bm       = aij->B->rmap->n;
8074                 ba       = b->a;
8075                 inserted = PETSC_FALSE;
8076               }
8077             } else col = in[j];
8078             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
8079           }
8080         }
8081       } else if (!aij->donotstash) {
8082         if (roworiented) {
8083           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8084         } else {
8085           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8086         }
8087       }
8088     }
8089     PetscCall(MatSeqAIJRestoreArray(A,&aa));
8090     PetscCall(MatSeqAIJRestoreArray(B,&ba));
8091   }
8092   PetscFunctionReturnVoid();
8093 }
8094 
8095 /* Undefining these here since they were redefined from their original definition above! No
8096  * other PETSc functions should be defined past this point, as it is impossible to recover the
8097  * original definitions */
8098 #undef PetscCall
8099 #undef SETERRQ
8100