xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision bc996fdc2f4503fc01f6a6476f80e02484e6569c)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
10 {
11   Mat            B;
12 
13   PetscFunctionBegin;
14   PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&B));
15   PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject)B));
16   PetscCall(MatGetRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done));
17   PetscFunctionReturn(0);
18 }
19 
20 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
21 {
22   Mat            B;
23 
24   PetscFunctionBegin;
25   PetscCall(PetscObjectQuery((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject*)&B));
26   PetscCall(MatRestoreRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done));
27   PetscCall(MatDestroy(&B));
28   PetscFunctionReturn(0);
29 }
30 
31 /*MC
32    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
33 
34    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
35    and MATMPIAIJ otherwise.  As a result, for single process communicators,
36   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
37   for communicators controlling multiple processes.  It is recommended that you call both of
38   the above preallocation routines for simplicity.
39 
40    Options Database Keys:
41 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
42 
43   Developer Notes:
44     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
45    enough exist.
46 
47   Level: beginner
48 
49 .seealso: `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
50 M*/
51 
52 /*MC
53    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
54 
55    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
56    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
57    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
58   for communicators controlling multiple processes.  It is recommended that you call both of
59   the above preallocation routines for simplicity.
60 
61    Options Database Keys:
62 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
63 
64   Level: beginner
65 
66 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
67 M*/
68 
69 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
70 {
71   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
72 
73   PetscFunctionBegin;
74 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
75   A->boundtocpu = flg;
76 #endif
77   if (a->A) PetscCall(MatBindToCPU(a->A,flg));
78   if (a->B) PetscCall(MatBindToCPU(a->B,flg));
79 
80   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
81    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
82    * to differ from the parent matrix. */
83   if (a->lvec) PetscCall(VecBindToCPU(a->lvec,flg));
84   if (a->diag) PetscCall(VecBindToCPU(a->diag,flg));
85 
86   PetscFunctionReturn(0);
87 }
88 
89 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
90 {
91   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
92 
93   PetscFunctionBegin;
94   if (mat->A) {
95     PetscCall(MatSetBlockSizes(mat->A,rbs,cbs));
96     PetscCall(MatSetBlockSizes(mat->B,rbs,1));
97   }
98   PetscFunctionReturn(0);
99 }
100 
101 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
102 {
103   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
104   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
105   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
106   const PetscInt  *ia,*ib;
107   const MatScalar *aa,*bb,*aav,*bav;
108   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
109   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
110 
111   PetscFunctionBegin;
112   *keptrows = NULL;
113 
114   ia   = a->i;
115   ib   = b->i;
116   PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav));
117   PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav));
118   for (i=0; i<m; i++) {
119     na = ia[i+1] - ia[i];
120     nb = ib[i+1] - ib[i];
121     if (!na && !nb) {
122       cnt++;
123       goto ok1;
124     }
125     aa = aav + ia[i];
126     for (j=0; j<na; j++) {
127       if (aa[j] != 0.0) goto ok1;
128     }
129     bb = bav + ib[i];
130     for (j=0; j <nb; j++) {
131       if (bb[j] != 0.0) goto ok1;
132     }
133     cnt++;
134 ok1:;
135   }
136   PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M)));
137   if (!n0rows) {
138     PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
139     PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
140     PetscFunctionReturn(0);
141   }
142   PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows));
143   cnt  = 0;
144   for (i=0; i<m; i++) {
145     na = ia[i+1] - ia[i];
146     nb = ib[i+1] - ib[i];
147     if (!na && !nb) continue;
148     aa = aav + ia[i];
149     for (j=0; j<na;j++) {
150       if (aa[j] != 0.0) {
151         rows[cnt++] = rstart + i;
152         goto ok2;
153       }
154     }
155     bb = bav + ib[i];
156     for (j=0; j<nb; j++) {
157       if (bb[j] != 0.0) {
158         rows[cnt++] = rstart + i;
159         goto ok2;
160       }
161     }
162 ok2:;
163   }
164   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows));
165   PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
166   PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
167   PetscFunctionReturn(0);
168 }
169 
170 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
171 {
172   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
173   PetscBool         cong;
174 
175   PetscFunctionBegin;
176   PetscCall(MatHasCongruentLayouts(Y,&cong));
177   if (Y->assembled && cong) {
178     PetscCall(MatDiagonalSet(aij->A,D,is));
179   } else {
180     PetscCall(MatDiagonalSet_Default(Y,D,is));
181   }
182   PetscFunctionReturn(0);
183 }
184 
185 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
188   PetscInt       i,rstart,nrows,*rows;
189 
190   PetscFunctionBegin;
191   *zrows = NULL;
192   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows));
193   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
194   for (i=0; i<nrows; i++) rows[i] += rstart;
195   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows));
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
200 {
201   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
202   PetscInt          i,m,n,*garray = aij->garray;
203   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
204   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
205   PetscReal         *work;
206   const PetscScalar *dummy;
207 
208   PetscFunctionBegin;
209   PetscCall(MatGetSize(A,&m,&n));
210   PetscCall(PetscCalloc1(n,&work));
211   PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy));
212   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy));
213   PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy));
214   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy));
215   if (type == NORM_2) {
216     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
217       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
218     }
219     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
220       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
221     }
222   } else if (type == NORM_1) {
223     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
224       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
225     }
226     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
227       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
228     }
229   } else if (type == NORM_INFINITY) {
230     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
231       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
232     }
233     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
234       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
235     }
236   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
237     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
238       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
239     }
240     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
241       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
242     }
243   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
244     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
245       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
246     }
247     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
248       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
249     }
250   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
251   if (type == NORM_INFINITY) {
252     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
253   } else {
254     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
255   }
256   PetscCall(PetscFree(work));
257   if (type == NORM_2) {
258     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
259   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
260     for (i=0; i<n; i++) reductions[i] /= m;
261   }
262   PetscFunctionReturn(0);
263 }
264 
265 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
266 {
267   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
268   IS              sis,gis;
269   const PetscInt  *isis,*igis;
270   PetscInt        n,*iis,nsis,ngis,rstart,i;
271 
272   PetscFunctionBegin;
273   PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis));
274   PetscCall(MatFindNonzeroRows(a->B,&gis));
275   PetscCall(ISGetSize(gis,&ngis));
276   PetscCall(ISGetSize(sis,&nsis));
277   PetscCall(ISGetIndices(sis,&isis));
278   PetscCall(ISGetIndices(gis,&igis));
279 
280   PetscCall(PetscMalloc1(ngis+nsis,&iis));
281   PetscCall(PetscArraycpy(iis,igis,ngis));
282   PetscCall(PetscArraycpy(iis+ngis,isis,nsis));
283   n    = ngis + nsis;
284   PetscCall(PetscSortRemoveDupsInt(&n,iis));
285   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
286   for (i=0; i<n; i++) iis[i] += rstart;
287   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is));
288 
289   PetscCall(ISRestoreIndices(sis,&isis));
290   PetscCall(ISRestoreIndices(gis,&igis));
291   PetscCall(ISDestroy(&sis));
292   PetscCall(ISDestroy(&gis));
293   PetscFunctionReturn(0);
294 }
295 
296 /*
297   Local utility routine that creates a mapping from the global column
298 number to the local number in the off-diagonal part of the local
299 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
300 a slightly higher hash table cost; without it it is not scalable (each processor
301 has an order N integer array but is fast to access.
302 */
303 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscInt       n = aij->B->cmap->n,i;
307 
308   PetscFunctionBegin;
309   PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
310 #if defined(PETSC_USE_CTABLE)
311   PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap));
312   for (i=0; i<n; i++) {
313     PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES));
314   }
315 #else
316   PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap));
317   PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt)));
318   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
319 #endif
320   PetscFunctionReturn(0);
321 }
322 
323 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
324 { \
325     if (col <= lastcol1)  low1 = 0;     \
326     else                 high1 = nrow1; \
327     lastcol1 = col;\
328     while (high1-low1 > 5) { \
329       t = (low1+high1)/2; \
330       if (rp1[t] > col) high1 = t; \
331       else              low1  = t; \
332     } \
333       for (_i=low1; _i<high1; _i++) { \
334         if (rp1[_i] > col) break; \
335         if (rp1[_i] == col) { \
336           if (addv == ADD_VALUES) { \
337             ap1[_i] += value;   \
338             /* Not sure LogFlops will slow dow the code or not */ \
339             (void)PetscLogFlops(1.0);   \
340            } \
341           else                    ap1[_i] = value; \
342           goto a_noinsert; \
343         } \
344       }  \
345       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
346       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
347       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
348       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
349       N = nrow1++ - 1; a->nz++; high1++; \
350       /* shift up all the later entries in this row */ \
351       PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\
352       PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\
353       rp1[_i] = col;  \
354       ap1[_i] = value;  \
355       A->nonzerostate++;\
356       a_noinsert: ; \
357       ailen[row] = nrow1; \
358 }
359 
360 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
361   { \
362     if (col <= lastcol2) low2 = 0;                        \
363     else high2 = nrow2;                                   \
364     lastcol2 = col;                                       \
365     while (high2-low2 > 5) {                              \
366       t = (low2+high2)/2;                                 \
367       if (rp2[t] > col) high2 = t;                        \
368       else             low2  = t;                         \
369     }                                                     \
370     for (_i=low2; _i<high2; _i++) {                       \
371       if (rp2[_i] > col) break;                           \
372       if (rp2[_i] == col) {                               \
373         if (addv == ADD_VALUES) {                         \
374           ap2[_i] += value;                               \
375           (void)PetscLogFlops(1.0);                       \
376         }                                                 \
377         else                    ap2[_i] = value;          \
378         goto b_noinsert;                                  \
379       }                                                   \
380     }                                                     \
381     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
382     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
383     PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
384     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
385     N = nrow2++ - 1; b->nz++; high2++;                    \
386     /* shift up all the later entries in this row */      \
387     PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\
388     PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\
389     rp2[_i] = col;                                        \
390     ap2[_i] = value;                                      \
391     B->nonzerostate++;                                    \
392     b_noinsert: ;                                         \
393     bilen[row] = nrow2;                                   \
394   }
395 
396 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
397 {
398   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
399   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
400   PetscInt       l,*garray = mat->garray,diag;
401   PetscScalar    *aa,*ba;
402 
403   PetscFunctionBegin;
404   /* code only works for square matrices A */
405 
406   /* find size of row to the left of the diagonal part */
407   PetscCall(MatGetOwnershipRange(A,&diag,NULL));
408   row  = row - diag;
409   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
410     if (garray[b->j[b->i[row]+l]] > diag) break;
411   }
412   if (l) {
413     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
414     PetscCall(PetscArraycpy(ba+b->i[row],v,l));
415     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
416   }
417 
418   /* diagonal part */
419   if (a->i[row+1]-a->i[row]) {
420     PetscCall(MatSeqAIJGetArray(mat->A,&aa));
421     PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row])));
422     PetscCall(MatSeqAIJRestoreArray(mat->A,&aa));
423   }
424 
425   /* right of diagonal part */
426   if (b->i[row+1]-b->i[row]-l) {
427     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
428     PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l));
429     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
430   }
431   PetscFunctionReturn(0);
432 }
433 
434 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
435 {
436   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
437   PetscScalar    value = 0.0;
438   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
439   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
440   PetscBool      roworiented = aij->roworiented;
441 
442   /* Some Variables required in the macro */
443   Mat        A                    = aij->A;
444   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
445   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
446   PetscBool  ignorezeroentries    = a->ignorezeroentries;
447   Mat        B                    = aij->B;
448   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
449   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
450   MatScalar  *aa,*ba;
451   PetscInt   *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
452   PetscInt   nonew;
453   MatScalar  *ap1,*ap2;
454 
455   PetscFunctionBegin;
456   PetscCall(MatSeqAIJGetArray(A,&aa));
457   PetscCall(MatSeqAIJGetArray(B,&ba));
458   for (i=0; i<m; i++) {
459     if (im[i] < 0) continue;
460     PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
461     if (im[i] >= rstart && im[i] < rend) {
462       row      = im[i] - rstart;
463       lastcol1 = -1;
464       rp1      = aj + ai[row];
465       ap1      = aa + ai[row];
466       rmax1    = aimax[row];
467       nrow1    = ailen[row];
468       low1     = 0;
469       high1    = nrow1;
470       lastcol2 = -1;
471       rp2      = bj + bi[row];
472       ap2      = ba + bi[row];
473       rmax2    = bimax[row];
474       nrow2    = bilen[row];
475       low2     = 0;
476       high2    = nrow2;
477 
478       for (j=0; j<n; j++) {
479         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
480         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
481         if (in[j] >= cstart && in[j] < cend) {
482           col   = in[j] - cstart;
483           nonew = a->nonew;
484           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
485         } else if (in[j] < 0) {
486           continue;
487         } else {
488           PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
489           if (mat->was_assembled) {
490             if (!aij->colmap) {
491               PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
492             }
493 #if defined(PETSC_USE_CTABLE)
494             PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */
495             col--;
496 #else
497             col = aij->colmap[in[j]] - 1;
498 #endif
499             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
500               PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */
501               col  =  in[j];
502               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
503               B        = aij->B;
504               b        = (Mat_SeqAIJ*)B->data;
505               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
506               rp2      = bj + bi[row];
507               ap2      = ba + bi[row];
508               rmax2    = bimax[row];
509               nrow2    = bilen[row];
510               low2     = 0;
511               high2    = nrow2;
512               bm       = aij->B->rmap->n;
513               ba       = b->a;
514             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
515               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
516                 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j]));
517               } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
518             }
519           } else col = in[j];
520           nonew = b->nonew;
521           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
522         }
523       }
524     } else {
525       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
526       if (!aij->donotstash) {
527         mat->assembled = PETSC_FALSE;
528         if (roworiented) {
529           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
530         } else {
531           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
532         }
533       }
534     }
535   }
536   PetscCall(MatSeqAIJRestoreArray(A,&aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
537   PetscCall(MatSeqAIJRestoreArray(B,&ba));
538   PetscFunctionReturn(0);
539 }
540 
541 /*
542     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
543     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
544     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
545 */
546 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
547 {
548   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
549   Mat            A           = aij->A; /* diagonal part of the matrix */
550   Mat            B           = aij->B; /* offdiagonal part of the matrix */
551   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
552   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
553   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
554   PetscInt       *ailen      = a->ilen,*aj = a->j;
555   PetscInt       *bilen      = b->ilen,*bj = b->j;
556   PetscInt       am          = aij->A->rmap->n,j;
557   PetscInt       diag_so_far = 0,dnz;
558   PetscInt       offd_so_far = 0,onz;
559 
560   PetscFunctionBegin;
561   /* Iterate over all rows of the matrix */
562   for (j=0; j<am; j++) {
563     dnz = onz = 0;
564     /*  Iterate over all non-zero columns of the current row */
565     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
566       /* If column is in the diagonal */
567       if (mat_j[col] >= cstart && mat_j[col] < cend) {
568         aj[diag_so_far++] = mat_j[col] - cstart;
569         dnz++;
570       } else { /* off-diagonal entries */
571         bj[offd_so_far++] = mat_j[col];
572         onz++;
573       }
574     }
575     ailen[j] = dnz;
576     bilen[j] = onz;
577   }
578   PetscFunctionReturn(0);
579 }
580 
581 /*
582     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
583     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
584     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
585     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
586     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
587 */
588 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
589 {
590   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
591   Mat            A      = aij->A; /* diagonal part of the matrix */
592   Mat            B      = aij->B; /* offdiagonal part of the matrix */
593   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
594   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
595   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
596   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
597   PetscInt       *ailen = a->ilen,*aj = a->j;
598   PetscInt       *bilen = b->ilen,*bj = b->j;
599   PetscInt       am     = aij->A->rmap->n,j;
600   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
601   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
602   PetscScalar    *aa = a->a,*ba = b->a;
603 
604   PetscFunctionBegin;
605   /* Iterate over all rows of the matrix */
606   for (j=0; j<am; j++) {
607     dnz_row = onz_row = 0;
608     rowstart_offd = full_offd_i[j];
609     rowstart_diag = full_diag_i[j];
610     /*  Iterate over all non-zero columns of the current row */
611     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
612       /* If column is in the diagonal */
613       if (mat_j[col] >= cstart && mat_j[col] < cend) {
614         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
615         aa[rowstart_diag+dnz_row] = mat_a[col];
616         dnz_row++;
617       } else { /* off-diagonal entries */
618         bj[rowstart_offd+onz_row] = mat_j[col];
619         ba[rowstart_offd+onz_row] = mat_a[col];
620         onz_row++;
621       }
622     }
623     ailen[j] = dnz_row;
624     bilen[j] = onz_row;
625   }
626   PetscFunctionReturn(0);
627 }
628 
629 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
630 {
631   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
632   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
633   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* negative row */
638     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= rstart && idxm[i] < rend) {
640       row = idxm[i] - rstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* negative column */
643         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= cstart && idxn[j] < cend) {
645           col  = idxn[j] - cstart;
646           PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j));
647         } else {
648           if (!aij->colmap) {
649             PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
650           }
651 #if defined(PETSC_USE_CTABLE)
652           PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col));
653           col--;
654 #else
655           col = aij->colmap[idxn[j]] - 1;
656 #endif
657           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
658           else {
659             PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j));
660           }
661         }
662       }
663     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
671   PetscInt       nstash,reallocs;
672 
673   PetscFunctionBegin;
674   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
675 
676   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
677   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
678   PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
679   PetscFunctionReturn(0);
680 }
681 
682 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
683 {
684   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
685   PetscMPIInt    n;
686   PetscInt       i,j,rstart,ncols,flg;
687   PetscInt       *row,*col;
688   PetscBool      other_disassembled;
689   PetscScalar    *val;
690 
691   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
692 
693   PetscFunctionBegin;
694   if (!aij->donotstash && !mat->nooffprocentries) {
695     while (1) {
696       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
697       if (!flg) break;
698 
699       for (i=0; i<n;) {
700         /* Now identify the consecutive vals belonging to the same row */
701         for (j=i,rstart=row[j]; j<n; j++) {
702           if (row[j] != rstart) break;
703         }
704         if (j < n) ncols = j-i;
705         else       ncols = n-i;
706         /* Now assemble all these values with a single function call */
707         PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
708         i    = j;
709       }
710     }
711     PetscCall(MatStashScatterEnd_Private(&mat->stash));
712   }
713 #if defined(PETSC_HAVE_DEVICE)
714   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
715   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
716   if (mat->boundtocpu) {
717     PetscCall(MatBindToCPU(aij->A,PETSC_TRUE));
718     PetscCall(MatBindToCPU(aij->B,PETSC_TRUE));
719   }
720 #endif
721   PetscCall(MatAssemblyBegin(aij->A,mode));
722   PetscCall(MatAssemblyEnd(aij->A,mode));
723 
724   /* determine if any processor has disassembled, if so we must
725      also disassemble ourself, in order that we may reassemble. */
726   /*
727      if nonzero structure of submatrix B cannot change then we know that
728      no processor disassembled thus we can skip this stuff
729   */
730   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
731     PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat)));
732     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
733       PetscCall(MatDisAssemble_MPIAIJ(mat));
734     }
735   }
736   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
737     PetscCall(MatSetUpMultiply_MPIAIJ(mat));
738   }
739   PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE));
740 #if defined(PETSC_HAVE_DEVICE)
741   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
742 #endif
743   PetscCall(MatAssemblyBegin(aij->B,mode));
744   PetscCall(MatAssemblyEnd(aij->B,mode));
745 
746   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
747 
748   aij->rowvalues = NULL;
749 
750   PetscCall(VecDestroy(&aij->diag));
751 
752   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
753   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
754     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
755     PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
756   }
757 #if defined(PETSC_HAVE_DEVICE)
758   mat->offloadmask = PETSC_OFFLOAD_BOTH;
759 #endif
760   PetscFunctionReturn(0);
761 }
762 
763 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
764 {
765   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
766 
767   PetscFunctionBegin;
768   PetscCall(MatZeroEntries(l->A));
769   PetscCall(MatZeroEntries(l->B));
770   PetscFunctionReturn(0);
771 }
772 
773 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
774 {
775   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
776   PetscObjectState sA, sB;
777   PetscInt        *lrows;
778   PetscInt         r, len;
779   PetscBool        cong, lch, gch;
780 
781   PetscFunctionBegin;
782   /* get locally owned rows */
783   PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows));
784   PetscCall(MatHasCongruentLayouts(A,&cong));
785   /* fix right hand side if needed */
786   if (x && b) {
787     const PetscScalar *xx;
788     PetscScalar       *bb;
789 
790     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
791     PetscCall(VecGetArrayRead(x, &xx));
792     PetscCall(VecGetArray(b, &bb));
793     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
794     PetscCall(VecRestoreArrayRead(x, &xx));
795     PetscCall(VecRestoreArray(b, &bb));
796   }
797 
798   sA = mat->A->nonzerostate;
799   sB = mat->B->nonzerostate;
800 
801   if (diag != 0.0 && cong) {
802     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
803     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
804   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
805     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
806     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
807     PetscInt   nnwA, nnwB;
808     PetscBool  nnzA, nnzB;
809 
810     nnwA = aijA->nonew;
811     nnwB = aijB->nonew;
812     nnzA = aijA->keepnonzeropattern;
813     nnzB = aijB->keepnonzeropattern;
814     if (!nnzA) {
815       PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
816       aijA->nonew = 0;
817     }
818     if (!nnzB) {
819       PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
820       aijB->nonew = 0;
821     }
822     /* Must zero here before the next loop */
823     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
824     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
825     for (r = 0; r < len; ++r) {
826       const PetscInt row = lrows[r] + A->rmap->rstart;
827       if (row >= A->cmap->N) continue;
828       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
829     }
830     aijA->nonew = nnwA;
831     aijB->nonew = nnwB;
832   } else {
833     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
834     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
835   }
836   PetscCall(PetscFree(lrows));
837   PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
838   PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
839 
840   /* reduce nonzerostate */
841   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
842   PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A)));
843   if (gch) A->nonzerostate++;
844   PetscFunctionReturn(0);
845 }
846 
847 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
848 {
849   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
850   PetscMPIInt       n = A->rmap->n;
851   PetscInt          i,j,r,m,len = 0;
852   PetscInt          *lrows,*owners = A->rmap->range;
853   PetscMPIInt       p = 0;
854   PetscSFNode       *rrows;
855   PetscSF           sf;
856   const PetscScalar *xx;
857   PetscScalar       *bb,*mask,*aij_a;
858   Vec               xmask,lmask;
859   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
860   const PetscInt    *aj, *ii,*ridx;
861   PetscScalar       *aa;
862 
863   PetscFunctionBegin;
864   /* Create SF where leaves are input rows and roots are owned rows */
865   PetscCall(PetscMalloc1(n, &lrows));
866   for (r = 0; r < n; ++r) lrows[r] = -1;
867   PetscCall(PetscMalloc1(N, &rrows));
868   for (r = 0; r < N; ++r) {
869     const PetscInt idx   = rows[r];
870     PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
871     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
872       PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p));
873     }
874     rrows[r].rank  = p;
875     rrows[r].index = rows[r] - owners[p];
876   }
877   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf));
878   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
879   /* Collect flags for rows to be zeroed */
880   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
881   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
882   PetscCall(PetscSFDestroy(&sf));
883   /* Compress and put in row numbers */
884   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
885   /* zero diagonal part of matrix */
886   PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b));
887   /* handle off diagonal part of matrix */
888   PetscCall(MatCreateVecs(A,&xmask,NULL));
889   PetscCall(VecDuplicate(l->lvec,&lmask));
890   PetscCall(VecGetArray(xmask,&bb));
891   for (i=0; i<len; i++) bb[lrows[i]] = 1;
892   PetscCall(VecRestoreArray(xmask,&bb));
893   PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
894   PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
895   PetscCall(VecDestroy(&xmask));
896   if (x && b) { /* this code is buggy when the row and column layout don't match */
897     PetscBool cong;
898 
899     PetscCall(MatHasCongruentLayouts(A,&cong));
900     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
901     PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
902     PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
903     PetscCall(VecGetArrayRead(l->lvec,&xx));
904     PetscCall(VecGetArray(b,&bb));
905   }
906   PetscCall(VecGetArray(lmask,&mask));
907   /* remove zeroed rows of off diagonal matrix */
908   PetscCall(MatSeqAIJGetArray(l->B,&aij_a));
909   ii = aij->i;
910   for (i=0; i<len; i++) {
911     PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]));
912   }
913   /* loop over all elements of off process part of matrix zeroing removed columns*/
914   if (aij->compressedrow.use) {
915     m    = aij->compressedrow.nrows;
916     ii   = aij->compressedrow.i;
917     ridx = aij->compressedrow.rindex;
918     for (i=0; i<m; i++) {
919       n  = ii[i+1] - ii[i];
920       aj = aij->j + ii[i];
921       aa = aij_a + ii[i];
922 
923       for (j=0; j<n; j++) {
924         if (PetscAbsScalar(mask[*aj])) {
925           if (b) bb[*ridx] -= *aa*xx[*aj];
926           *aa = 0.0;
927         }
928         aa++;
929         aj++;
930       }
931       ridx++;
932     }
933   } else { /* do not use compressed row format */
934     m = l->B->rmap->n;
935     for (i=0; i<m; i++) {
936       n  = ii[i+1] - ii[i];
937       aj = aij->j + ii[i];
938       aa = aij_a + ii[i];
939       for (j=0; j<n; j++) {
940         if (PetscAbsScalar(mask[*aj])) {
941           if (b) bb[i] -= *aa*xx[*aj];
942           *aa = 0.0;
943         }
944         aa++;
945         aj++;
946       }
947     }
948   }
949   if (x && b) {
950     PetscCall(VecRestoreArray(b,&bb));
951     PetscCall(VecRestoreArrayRead(l->lvec,&xx));
952   }
953   PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a));
954   PetscCall(VecRestoreArray(lmask,&mask));
955   PetscCall(VecDestroy(&lmask));
956   PetscCall(PetscFree(lrows));
957 
958   /* only change matrix nonzero state if pattern was allowed to be changed */
959   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
960     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
961     PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A)));
962   }
963   PetscFunctionReturn(0);
964 }
965 
966 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
967 {
968   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
969   PetscInt       nt;
970   VecScatter     Mvctx = a->Mvctx;
971 
972   PetscFunctionBegin;
973   PetscCall(VecGetLocalSize(xx,&nt));
974   PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt);
975   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
976   PetscCall((*a->A->ops->mult)(a->A,xx,yy));
977   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
978   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy));
979   PetscFunctionReturn(0);
980 }
981 
982 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
983 {
984   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
985 
986   PetscFunctionBegin;
987   PetscCall(MatMultDiagonalBlock(a->A,bb,xx));
988   PetscFunctionReturn(0);
989 }
990 
991 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
992 {
993   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
994   VecScatter     Mvctx = a->Mvctx;
995 
996   PetscFunctionBegin;
997   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
998   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz));
999   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
1000   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz));
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1005 {
1006   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1007 
1008   PetscFunctionBegin;
1009   /* do nondiagonal part */
1010   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1011   /* do local part */
1012   PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy));
1013   /* add partial results together */
1014   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1015   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1020 {
1021   MPI_Comm       comm;
1022   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1023   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1024   IS             Me,Notme;
1025   PetscInt       M,N,first,last,*notme,i;
1026   PetscBool      lf;
1027   PetscMPIInt    size;
1028 
1029   PetscFunctionBegin;
1030   /* Easy test: symmetric diagonal block */
1031   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1032   PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf));
1033   PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat)));
1034   if (!*f) PetscFunctionReturn(0);
1035   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
1036   PetscCallMPI(MPI_Comm_size(comm,&size));
1037   if (size == 1) PetscFunctionReturn(0);
1038 
1039   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1040   PetscCall(MatGetSize(Amat,&M,&N));
1041   PetscCall(MatGetOwnershipRange(Amat,&first,&last));
1042   PetscCall(PetscMalloc1(N-last+first,&notme));
1043   for (i=0; i<first; i++) notme[i] = i;
1044   for (i=last; i<M; i++) notme[i-last+first] = i;
1045   PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme));
1046   PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me));
1047   PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs));
1048   Aoff = Aoffs[0];
1049   PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs));
1050   Boff = Boffs[0];
1051   PetscCall(MatIsTranspose(Aoff,Boff,tol,f));
1052   PetscCall(MatDestroyMatrices(1,&Aoffs));
1053   PetscCall(MatDestroyMatrices(1,&Boffs));
1054   PetscCall(ISDestroy(&Me));
1055   PetscCall(ISDestroy(&Notme));
1056   PetscCall(PetscFree(notme));
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1061 {
1062   PetscFunctionBegin;
1063   PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f));
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1068 {
1069   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1070 
1071   PetscFunctionBegin;
1072   /* do nondiagonal part */
1073   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1074   /* do local part */
1075   PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz));
1076   /* add partial results together */
1077   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1078   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 /*
1083   This only works correctly for square matrices where the subblock A->A is the
1084    diagonal block
1085 */
1086 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1087 {
1088   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1089 
1090   PetscFunctionBegin;
1091   PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1092   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1093   PetscCall(MatGetDiagonal(a->A,v));
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1098 {
1099   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1100 
1101   PetscFunctionBegin;
1102   PetscCall(MatScale(a->A,aa));
1103   PetscCall(MatScale(a->B,aa));
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1108 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1109 {
1110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1111 
1112   PetscFunctionBegin;
1113   PetscCall(PetscSFDestroy(&aij->coo_sf));
1114   PetscCall(PetscFree(aij->Aperm1));
1115   PetscCall(PetscFree(aij->Bperm1));
1116   PetscCall(PetscFree(aij->Ajmap1));
1117   PetscCall(PetscFree(aij->Bjmap1));
1118 
1119   PetscCall(PetscFree(aij->Aimap2));
1120   PetscCall(PetscFree(aij->Bimap2));
1121   PetscCall(PetscFree(aij->Aperm2));
1122   PetscCall(PetscFree(aij->Bperm2));
1123   PetscCall(PetscFree(aij->Ajmap2));
1124   PetscCall(PetscFree(aij->Bjmap2));
1125 
1126   PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf));
1127   PetscCall(PetscFree(aij->Cperm1));
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1132 {
1133   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1134 
1135   PetscFunctionBegin;
1136 #if defined(PETSC_USE_LOG)
1137   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1138 #endif
1139   PetscCall(MatStashDestroy_Private(&mat->stash));
1140   PetscCall(VecDestroy(&aij->diag));
1141   PetscCall(MatDestroy(&aij->A));
1142   PetscCall(MatDestroy(&aij->B));
1143 #if defined(PETSC_USE_CTABLE)
1144   PetscCall(PetscTableDestroy(&aij->colmap));
1145 #else
1146   PetscCall(PetscFree(aij->colmap));
1147 #endif
1148   PetscCall(PetscFree(aij->garray));
1149   PetscCall(VecDestroy(&aij->lvec));
1150   PetscCall(VecScatterDestroy(&aij->Mvctx));
1151   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
1152   PetscCall(PetscFree(aij->ld));
1153 
1154   /* Free COO */
1155   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
1156 
1157   PetscCall(PetscFree(mat->data));
1158 
1159   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1160   PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL));
1161 
1162   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
1163   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
1164   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
1165   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL));
1166   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL));
1167   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL));
1168   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL));
1169   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL));
1170   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL));
1171   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL));
1172 #if defined(PETSC_HAVE_CUDA)
1173   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL));
1174 #endif
1175 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1176   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL));
1177 #endif
1178   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
1179 #if defined(PETSC_HAVE_ELEMENTAL)
1180   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL));
1181 #endif
1182 #if defined(PETSC_HAVE_SCALAPACK)
1183   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL));
1184 #endif
1185 #if defined(PETSC_HAVE_HYPRE)
1186   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL));
1187   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL));
1188 #endif
1189   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1190   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL));
1191   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL));
1192   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL));
1193   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL));
1194   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL));
1195 #if defined(PETSC_HAVE_MKL_SPARSE)
1196   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL));
1197 #endif
1198   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL));
1199   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1200   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL));
1201   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL));
1202   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL));
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1207 {
1208   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1209   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1210   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1211   const PetscInt    *garray = aij->garray;
1212   const PetscScalar *aa,*ba;
1213   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1214   PetscInt          *rowlens;
1215   PetscInt          *colidxs;
1216   PetscScalar       *matvals;
1217 
1218   PetscFunctionBegin;
1219   PetscCall(PetscViewerSetUp(viewer));
1220 
1221   M  = mat->rmap->N;
1222   N  = mat->cmap->N;
1223   m  = mat->rmap->n;
1224   rs = mat->rmap->rstart;
1225   cs = mat->cmap->rstart;
1226   nz = A->nz + B->nz;
1227 
1228   /* write matrix header */
1229   header[0] = MAT_FILE_CLASSID;
1230   header[1] = M; header[2] = N; header[3] = nz;
1231   PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat)));
1232   PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT));
1233 
1234   /* fill in and store row lengths  */
1235   PetscCall(PetscMalloc1(m,&rowlens));
1236   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1237   PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT));
1238   PetscCall(PetscFree(rowlens));
1239 
1240   /* fill in and store column indices */
1241   PetscCall(PetscMalloc1(nz,&colidxs));
1242   for (cnt=0, i=0; i<m; i++) {
1243     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1244       if (garray[B->j[jb]] > cs) break;
1245       colidxs[cnt++] = garray[B->j[jb]];
1246     }
1247     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1248       colidxs[cnt++] = A->j[ja] + cs;
1249     for (; jb<B->i[i+1]; jb++)
1250       colidxs[cnt++] = garray[B->j[jb]];
1251   }
1252   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1253   PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
1254   PetscCall(PetscFree(colidxs));
1255 
1256   /* fill in and store nonzero values */
1257   PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa));
1258   PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba));
1259   PetscCall(PetscMalloc1(nz,&matvals));
1260   for (cnt=0, i=0; i<m; i++) {
1261     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1262       if (garray[B->j[jb]] > cs) break;
1263       matvals[cnt++] = ba[jb];
1264     }
1265     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1266       matvals[cnt++] = aa[ja];
1267     for (; jb<B->i[i+1]; jb++)
1268       matvals[cnt++] = ba[jb];
1269   }
1270   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa));
1271   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba));
1272   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1273   PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
1274   PetscCall(PetscFree(matvals));
1275 
1276   /* write block size option to the viewer's .info file */
1277   PetscCall(MatView_Binary_BlockSizes(mat,viewer));
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 #include <petscdraw.h>
1282 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1283 {
1284   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1285   PetscMPIInt       rank = aij->rank,size = aij->size;
1286   PetscBool         isdraw,iascii,isbinary;
1287   PetscViewer       sviewer;
1288   PetscViewerFormat format;
1289 
1290   PetscFunctionBegin;
1291   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1292   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1293   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1294   if (iascii) {
1295     PetscCall(PetscViewerGetFormat(viewer,&format));
1296     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1297       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1298       PetscCall(PetscMalloc1(size,&nz));
1299       PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat)));
1300       for (i=0; i<(PetscInt)size; i++) {
1301         nmax = PetscMax(nmax,nz[i]);
1302         nmin = PetscMin(nmin,nz[i]);
1303         navg += nz[i];
1304       }
1305       PetscCall(PetscFree(nz));
1306       navg = navg/size;
1307       PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n",nmin,navg,nmax));
1308       PetscFunctionReturn(0);
1309     }
1310     PetscCall(PetscViewerGetFormat(viewer,&format));
1311     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1312       MatInfo   info;
1313       PetscInt *inodes=NULL;
1314 
1315       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
1316       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
1317       PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL));
1318       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1319       if (!inodes) {
1320         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n",
1321                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1322       } else {
1323         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n",
1324                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1325       }
1326       PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info));
1327       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1328       PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info));
1329       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1330       PetscCall(PetscViewerFlush(viewer));
1331       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1332       PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
1333       PetscCall(VecScatterView(aij->Mvctx,viewer));
1334       PetscFunctionReturn(0);
1335     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1336       PetscInt inodecount,inodelimit,*inodes;
1337       PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit));
1338       if (inodes) {
1339         PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit));
1340       } else {
1341         PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n"));
1342       }
1343       PetscFunctionReturn(0);
1344     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1345       PetscFunctionReturn(0);
1346     }
1347   } else if (isbinary) {
1348     if (size == 1) {
1349       PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1350       PetscCall(MatView(aij->A,viewer));
1351     } else {
1352       PetscCall(MatView_MPIAIJ_Binary(mat,viewer));
1353     }
1354     PetscFunctionReturn(0);
1355   } else if (iascii && size == 1) {
1356     PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1357     PetscCall(MatView(aij->A,viewer));
1358     PetscFunctionReturn(0);
1359   } else if (isdraw) {
1360     PetscDraw draw;
1361     PetscBool isnull;
1362     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
1363     PetscCall(PetscDrawIsNull(draw,&isnull));
1364     if (isnull) PetscFunctionReturn(0);
1365   }
1366 
1367   { /* assemble the entire matrix onto first processor */
1368     Mat A = NULL, Av;
1369     IS  isrow,iscol;
1370 
1371     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1372     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1373     PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A));
1374     PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL));
1375 /*  The commented code uses MatCreateSubMatrices instead */
1376 /*
1377     Mat *AA, A = NULL, Av;
1378     IS  isrow,iscol;
1379 
1380     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1381     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1382     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1383     if (rank == 0) {
1384        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1385        A    = AA[0];
1386        Av   = AA[0];
1387     }
1388     PetscCall(MatDestroySubMatrices(1,&AA));
1389 */
1390     PetscCall(ISDestroy(&iscol));
1391     PetscCall(ISDestroy(&isrow));
1392     /*
1393        Everyone has to call to draw the matrix since the graphics waits are
1394        synchronized across all processors that share the PetscDraw object
1395     */
1396     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1397     if (rank == 0) {
1398       if (((PetscObject)mat)->name) {
1399         PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name));
1400       }
1401       PetscCall(MatView_SeqAIJ(Av,sviewer));
1402     }
1403     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1404     PetscCall(PetscViewerFlush(viewer));
1405     PetscCall(MatDestroy(&A));
1406   }
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1411 {
1412   PetscBool      iascii,isdraw,issocket,isbinary;
1413 
1414   PetscFunctionBegin;
1415   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1416   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1417   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1418   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
1419   if (iascii || isdraw || isbinary || issocket) {
1420     PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer));
1421   }
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1426 {
1427   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1428   Vec            bb1 = NULL;
1429   PetscBool      hasop;
1430 
1431   PetscFunctionBegin;
1432   if (flag == SOR_APPLY_UPPER) {
1433     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1434     PetscFunctionReturn(0);
1435   }
1436 
1437   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1438     PetscCall(VecDuplicate(bb,&bb1));
1439   }
1440 
1441   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1442     if (flag & SOR_ZERO_INITIAL_GUESS) {
1443       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1444       its--;
1445     }
1446 
1447     while (its--) {
1448       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1449       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1450 
1451       /* update rhs: bb1 = bb - B*x */
1452       PetscCall(VecScale(mat->lvec,-1.0));
1453       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1454 
1455       /* local sweep */
1456       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx));
1457     }
1458   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1459     if (flag & SOR_ZERO_INITIAL_GUESS) {
1460       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1461       its--;
1462     }
1463     while (its--) {
1464       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1465       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1466 
1467       /* update rhs: bb1 = bb - B*x */
1468       PetscCall(VecScale(mat->lvec,-1.0));
1469       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1470 
1471       /* local sweep */
1472       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx));
1473     }
1474   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1475     if (flag & SOR_ZERO_INITIAL_GUESS) {
1476       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1477       its--;
1478     }
1479     while (its--) {
1480       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1481       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1482 
1483       /* update rhs: bb1 = bb - B*x */
1484       PetscCall(VecScale(mat->lvec,-1.0));
1485       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1486 
1487       /* local sweep */
1488       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx));
1489     }
1490   } else if (flag & SOR_EISENSTAT) {
1491     Vec xx1;
1492 
1493     PetscCall(VecDuplicate(bb,&xx1));
1494     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx));
1495 
1496     PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1497     PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1498     if (!mat->diag) {
1499       PetscCall(MatCreateVecs(matin,&mat->diag,NULL));
1500       PetscCall(MatGetDiagonal(matin,mat->diag));
1501     }
1502     PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop));
1503     if (hasop) {
1504       PetscCall(MatMultDiagonalBlock(matin,xx,bb1));
1505     } else {
1506       PetscCall(VecPointwiseMult(bb1,mat->diag,xx));
1507     }
1508     PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb));
1509 
1510     PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1));
1511 
1512     /* local sweep */
1513     PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1));
1514     PetscCall(VecAXPY(xx,1.0,xx1));
1515     PetscCall(VecDestroy(&xx1));
1516   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1517 
1518   PetscCall(VecDestroy(&bb1));
1519 
1520   matin->factorerrortype = mat->A->factorerrortype;
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1525 {
1526   Mat            aA,aB,Aperm;
1527   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1528   PetscScalar    *aa,*ba;
1529   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1530   PetscSF        rowsf,sf;
1531   IS             parcolp = NULL;
1532   PetscBool      done;
1533 
1534   PetscFunctionBegin;
1535   PetscCall(MatGetLocalSize(A,&m,&n));
1536   PetscCall(ISGetIndices(rowp,&rwant));
1537   PetscCall(ISGetIndices(colp,&cwant));
1538   PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest));
1539 
1540   /* Invert row permutation to find out where my rows should go */
1541   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf));
1542   PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant));
1543   PetscCall(PetscSFSetFromOptions(rowsf));
1544   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1545   PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1546   PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1547 
1548   /* Invert column permutation to find out where my columns should go */
1549   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1550   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant));
1551   PetscCall(PetscSFSetFromOptions(sf));
1552   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1553   PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1554   PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1555   PetscCall(PetscSFDestroy(&sf));
1556 
1557   PetscCall(ISRestoreIndices(rowp,&rwant));
1558   PetscCall(ISRestoreIndices(colp,&cwant));
1559   PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols));
1560 
1561   /* Find out where my gcols should go */
1562   PetscCall(MatGetSize(aB,NULL,&ng));
1563   PetscCall(PetscMalloc1(ng,&gcdest));
1564   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1565   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols));
1566   PetscCall(PetscSFSetFromOptions(sf));
1567   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1568   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1569   PetscCall(PetscSFDestroy(&sf));
1570 
1571   PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz));
1572   PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1573   PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1574   for (i=0; i<m; i++) {
1575     PetscInt    row = rdest[i];
1576     PetscMPIInt rowner;
1577     PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner));
1578     for (j=ai[i]; j<ai[i+1]; j++) {
1579       PetscInt    col = cdest[aj[j]];
1580       PetscMPIInt cowner;
1581       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */
1582       if (rowner == cowner) dnnz[i]++;
1583       else onnz[i]++;
1584     }
1585     for (j=bi[i]; j<bi[i+1]; j++) {
1586       PetscInt    col = gcdest[bj[j]];
1587       PetscMPIInt cowner;
1588       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner));
1589       if (rowner == cowner) dnnz[i]++;
1590       else onnz[i]++;
1591     }
1592   }
1593   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1594   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1595   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1596   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1597   PetscCall(PetscSFDestroy(&rowsf));
1598 
1599   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm));
1600   PetscCall(MatSeqAIJGetArray(aA,&aa));
1601   PetscCall(MatSeqAIJGetArray(aB,&ba));
1602   for (i=0; i<m; i++) {
1603     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1604     PetscInt j0,rowlen;
1605     rowlen = ai[i+1] - ai[i];
1606     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1607       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1608       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES));
1609     }
1610     rowlen = bi[i+1] - bi[i];
1611     for (j0=j=0; j<rowlen; j0=j) {
1612       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1613       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES));
1614     }
1615   }
1616   PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY));
1617   PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY));
1618   PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1619   PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1620   PetscCall(MatSeqAIJRestoreArray(aA,&aa));
1621   PetscCall(MatSeqAIJRestoreArray(aB,&ba));
1622   PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz));
1623   PetscCall(PetscFree3(work,rdest,cdest));
1624   PetscCall(PetscFree(gcdest));
1625   if (parcolp) PetscCall(ISDestroy(&colp));
1626   *B = Aperm;
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1631 {
1632   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1633 
1634   PetscFunctionBegin;
1635   PetscCall(MatGetSize(aij->B,NULL,nghosts));
1636   if (ghosts) *ghosts = aij->garray;
1637   PetscFunctionReturn(0);
1638 }
1639 
1640 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1641 {
1642   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1643   Mat            A    = mat->A,B = mat->B;
1644   PetscLogDouble isend[5],irecv[5];
1645 
1646   PetscFunctionBegin;
1647   info->block_size = 1.0;
1648   PetscCall(MatGetInfo(A,MAT_LOCAL,info));
1649 
1650   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1651   isend[3] = info->memory;  isend[4] = info->mallocs;
1652 
1653   PetscCall(MatGetInfo(B,MAT_LOCAL,info));
1654 
1655   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1656   isend[3] += info->memory;  isend[4] += info->mallocs;
1657   if (flag == MAT_LOCAL) {
1658     info->nz_used      = isend[0];
1659     info->nz_allocated = isend[1];
1660     info->nz_unneeded  = isend[2];
1661     info->memory       = isend[3];
1662     info->mallocs      = isend[4];
1663   } else if (flag == MAT_GLOBAL_MAX) {
1664     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
1665 
1666     info->nz_used      = irecv[0];
1667     info->nz_allocated = irecv[1];
1668     info->nz_unneeded  = irecv[2];
1669     info->memory       = irecv[3];
1670     info->mallocs      = irecv[4];
1671   } else if (flag == MAT_GLOBAL_SUM) {
1672     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
1673 
1674     info->nz_used      = irecv[0];
1675     info->nz_allocated = irecv[1];
1676     info->nz_unneeded  = irecv[2];
1677     info->memory       = irecv[3];
1678     info->mallocs      = irecv[4];
1679   }
1680   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1681   info->fill_ratio_needed = 0;
1682   info->factor_mallocs    = 0;
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1687 {
1688   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1689 
1690   PetscFunctionBegin;
1691   switch (op) {
1692   case MAT_NEW_NONZERO_LOCATIONS:
1693   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1694   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1695   case MAT_KEEP_NONZERO_PATTERN:
1696   case MAT_NEW_NONZERO_LOCATION_ERR:
1697   case MAT_USE_INODES:
1698   case MAT_IGNORE_ZERO_ENTRIES:
1699   case MAT_FORM_EXPLICIT_TRANSPOSE:
1700     MatCheckPreallocated(A,1);
1701     PetscCall(MatSetOption(a->A,op,flg));
1702     PetscCall(MatSetOption(a->B,op,flg));
1703     break;
1704   case MAT_ROW_ORIENTED:
1705     MatCheckPreallocated(A,1);
1706     a->roworiented = flg;
1707 
1708     PetscCall(MatSetOption(a->A,op,flg));
1709     PetscCall(MatSetOption(a->B,op,flg));
1710     break;
1711   case MAT_FORCE_DIAGONAL_ENTRIES:
1712   case MAT_SORTED_FULL:
1713     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1714     break;
1715   case MAT_IGNORE_OFF_PROC_ENTRIES:
1716     a->donotstash = flg;
1717     break;
1718   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1719   case MAT_SPD:
1720   case MAT_SYMMETRIC:
1721   case MAT_STRUCTURALLY_SYMMETRIC:
1722   case MAT_HERMITIAN:
1723   case MAT_SYMMETRY_ETERNAL:
1724     break;
1725   case MAT_SUBMAT_SINGLEIS:
1726     A->submat_singleis = flg;
1727     break;
1728   case MAT_STRUCTURE_ONLY:
1729     /* The option is handled directly by MatSetOption() */
1730     break;
1731   default:
1732     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1733   }
1734   PetscFunctionReturn(0);
1735 }
1736 
1737 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1738 {
1739   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1740   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1741   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1742   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1743   PetscInt       *cmap,*idx_p;
1744 
1745   PetscFunctionBegin;
1746   PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1747   mat->getrowactive = PETSC_TRUE;
1748 
1749   if (!mat->rowvalues && (idx || v)) {
1750     /*
1751         allocate enough space to hold information from the longest row.
1752     */
1753     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1754     PetscInt   max = 1,tmp;
1755     for (i=0; i<matin->rmap->n; i++) {
1756       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1757       if (max < tmp) max = tmp;
1758     }
1759     PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices));
1760   }
1761 
1762   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1763   lrow = row - rstart;
1764 
1765   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1766   if (!v)   {pvA = NULL; pvB = NULL;}
1767   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1768   PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
1769   PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1770   nztot = nzA + nzB;
1771 
1772   cmap = mat->garray;
1773   if (v  || idx) {
1774     if (nztot) {
1775       /* Sort by increasing column numbers, assuming A and B already sorted */
1776       PetscInt imark = -1;
1777       if (v) {
1778         *v = v_p = mat->rowvalues;
1779         for (i=0; i<nzB; i++) {
1780           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1781           else break;
1782         }
1783         imark = i;
1784         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1785         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1786       }
1787       if (idx) {
1788         *idx = idx_p = mat->rowindices;
1789         if (imark > -1) {
1790           for (i=0; i<imark; i++) {
1791             idx_p[i] = cmap[cworkB[i]];
1792           }
1793         } else {
1794           for (i=0; i<nzB; i++) {
1795             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1796             else break;
1797           }
1798           imark = i;
1799         }
1800         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1801         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1802       }
1803     } else {
1804       if (idx) *idx = NULL;
1805       if (v)   *v   = NULL;
1806     }
1807   }
1808   *nz  = nztot;
1809   PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
1810   PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1815 {
1816   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1817 
1818   PetscFunctionBegin;
1819   PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1820   aij->getrowactive = PETSC_FALSE;
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1825 {
1826   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ*)mat->data;
1827   Mat_SeqAIJ      *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1828   PetscInt        i,j,cstart = mat->cmap->rstart;
1829   PetscReal       sum = 0.0;
1830   const MatScalar *v,*amata,*bmata;
1831 
1832   PetscFunctionBegin;
1833   if (aij->size == 1) {
1834     PetscCall(MatNorm(aij->A,type,norm));
1835   } else {
1836     PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata));
1837     PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata));
1838     if (type == NORM_FROBENIUS) {
1839       v = amata;
1840       for (i=0; i<amat->nz; i++) {
1841         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1842       }
1843       v = bmata;
1844       for (i=0; i<bmat->nz; i++) {
1845         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1846       }
1847       PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1848       *norm = PetscSqrtReal(*norm);
1849       PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz));
1850     } else if (type == NORM_1) { /* max column norm */
1851       PetscReal *tmp,*tmp2;
1852       PetscInt  *jj,*garray = aij->garray;
1853       PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp));
1854       PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2));
1855       *norm = 0.0;
1856       v     = amata; jj = amat->j;
1857       for (j=0; j<amat->nz; j++) {
1858         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1859       }
1860       v = bmata; jj = bmat->j;
1861       for (j=0; j<bmat->nz; j++) {
1862         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1863       }
1864       PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1865       for (j=0; j<mat->cmap->N; j++) {
1866         if (tmp2[j] > *norm) *norm = tmp2[j];
1867       }
1868       PetscCall(PetscFree(tmp));
1869       PetscCall(PetscFree(tmp2));
1870       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1871     } else if (type == NORM_INFINITY) { /* max row norm */
1872       PetscReal ntemp = 0.0;
1873       for (j=0; j<aij->A->rmap->n; j++) {
1874         v   = amata + amat->i[j];
1875         sum = 0.0;
1876         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1877           sum += PetscAbsScalar(*v); v++;
1878         }
1879         v = bmata + bmat->i[j];
1880         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1881           sum += PetscAbsScalar(*v); v++;
1882         }
1883         if (sum > ntemp) ntemp = sum;
1884       }
1885       PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat)));
1886       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1887     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1888     PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata));
1889     PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata));
1890   }
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1895 {
1896   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1897   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1898   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1899   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1900   Mat             B,A_diag,*B_diag;
1901   const MatScalar *pbv,*bv;
1902 
1903   PetscFunctionBegin;
1904   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1905   ai = Aloc->i; aj = Aloc->j;
1906   bi = Bloc->i; bj = Bloc->j;
1907   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1908     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1909     PetscSFNode          *oloc;
1910     PETSC_UNUSED PetscSF sf;
1911 
1912     PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc));
1913     /* compute d_nnz for preallocation */
1914     PetscCall(PetscArrayzero(d_nnz,na));
1915     for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++;
1916     /* compute local off-diagonal contributions */
1917     PetscCall(PetscArrayzero(g_nnz,nb));
1918     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1919     /* map those to global */
1920     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1921     PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray));
1922     PetscCall(PetscSFSetFromOptions(sf));
1923     PetscCall(PetscArrayzero(o_nnz,na));
1924     PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1925     PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1926     PetscCall(PetscSFDestroy(&sf));
1927 
1928     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
1929     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
1930     PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs)));
1931     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
1932     PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
1933     PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc));
1934   } else {
1935     B    = *matout;
1936     PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
1937   }
1938 
1939   b           = (Mat_MPIAIJ*)B->data;
1940   A_diag      = a->A;
1941   B_diag      = &b->A;
1942   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1943   A_diag_ncol = A_diag->cmap->N;
1944   B_diag_ilen = sub_B_diag->ilen;
1945   B_diag_i    = sub_B_diag->i;
1946 
1947   /* Set ilen for diagonal of B */
1948   for (i=0; i<A_diag_ncol; i++) {
1949     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1950   }
1951 
1952   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1953   very quickly (=without using MatSetValues), because all writes are local. */
1954   PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag));
1955 
1956   /* copy over the B part */
1957   PetscCall(PetscMalloc1(bi[mb],&cols));
1958   PetscCall(MatSeqAIJGetArrayRead(a->B,&bv));
1959   pbv  = bv;
1960   row  = A->rmap->rstart;
1961   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1962   cols_tmp = cols;
1963   for (i=0; i<mb; i++) {
1964     ncol = bi[i+1]-bi[i];
1965     PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES));
1966     row++;
1967     pbv += ncol; cols_tmp += ncol;
1968   }
1969   PetscCall(PetscFree(cols));
1970   PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv));
1971 
1972   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
1973   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
1974   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1975     *matout = B;
1976   } else {
1977     PetscCall(MatHeaderMerge(A,&B));
1978   }
1979   PetscFunctionReturn(0);
1980 }
1981 
1982 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1983 {
1984   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1985   Mat            a    = aij->A,b = aij->B;
1986   PetscInt       s1,s2,s3;
1987 
1988   PetscFunctionBegin;
1989   PetscCall(MatGetLocalSize(mat,&s2,&s3));
1990   if (rr) {
1991     PetscCall(VecGetLocalSize(rr,&s1));
1992     PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1993     /* Overlap communication with computation. */
1994     PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1995   }
1996   if (ll) {
1997     PetscCall(VecGetLocalSize(ll,&s1));
1998     PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1999     PetscCall((*b->ops->diagonalscale)(b,ll,NULL));
2000   }
2001   /* scale  the diagonal block */
2002   PetscCall((*a->ops->diagonalscale)(a,ll,rr));
2003 
2004   if (rr) {
2005     /* Do a scatter end and then right scale the off-diagonal block */
2006     PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
2007     PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec));
2008   }
2009   PetscFunctionReturn(0);
2010 }
2011 
2012 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2013 {
2014   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2015 
2016   PetscFunctionBegin;
2017   PetscCall(MatSetUnfactored(a->A));
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2022 {
2023   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2024   Mat            a,b,c,d;
2025   PetscBool      flg;
2026 
2027   PetscFunctionBegin;
2028   a = matA->A; b = matA->B;
2029   c = matB->A; d = matB->B;
2030 
2031   PetscCall(MatEqual(a,c,&flg));
2032   if (flg) {
2033     PetscCall(MatEqual(b,d,&flg));
2034   }
2035   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2036   PetscFunctionReturn(0);
2037 }
2038 
2039 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2040 {
2041   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2042   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2043 
2044   PetscFunctionBegin;
2045   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2046   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2047     /* because of the column compression in the off-processor part of the matrix a->B,
2048        the number of columns in a->B and b->B may be different, hence we cannot call
2049        the MatCopy() directly on the two parts. If need be, we can provide a more
2050        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2051        then copying the submatrices */
2052     PetscCall(MatCopy_Basic(A,B,str));
2053   } else {
2054     PetscCall(MatCopy(a->A,b->A,str));
2055     PetscCall(MatCopy(a->B,b->B,str));
2056   }
2057   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2058   PetscFunctionReturn(0);
2059 }
2060 
2061 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2062 {
2063   PetscFunctionBegin;
2064   PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 /*
2069    Computes the number of nonzeros per row needed for preallocation when X and Y
2070    have different nonzero structure.
2071 */
2072 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2073 {
2074   PetscInt       i,j,k,nzx,nzy;
2075 
2076   PetscFunctionBegin;
2077   /* Set the number of nonzeros in the new matrix */
2078   for (i=0; i<m; i++) {
2079     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2080     nzx = xi[i+1] - xi[i];
2081     nzy = yi[i+1] - yi[i];
2082     nnz[i] = 0;
2083     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2084       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2085       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2086       nnz[i]++;
2087     }
2088     for (; k<nzy; k++) nnz[i]++;
2089   }
2090   PetscFunctionReturn(0);
2091 }
2092 
2093 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2094 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2095 {
2096   PetscInt       m = Y->rmap->N;
2097   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2098   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2099 
2100   PetscFunctionBegin;
2101   PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz));
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2106 {
2107   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2108 
2109   PetscFunctionBegin;
2110   if (str == SAME_NONZERO_PATTERN) {
2111     PetscCall(MatAXPY(yy->A,a,xx->A,str));
2112     PetscCall(MatAXPY(yy->B,a,xx->B,str));
2113   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2114     PetscCall(MatAXPY_Basic(Y,a,X,str));
2115   } else {
2116     Mat      B;
2117     PetscInt *nnz_d,*nnz_o;
2118 
2119     PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d));
2120     PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o));
2121     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
2122     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
2123     PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap));
2124     PetscCall(MatSetType(B,((PetscObject)Y)->type_name));
2125     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d));
2126     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
2127     PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o));
2128     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
2129     PetscCall(MatHeaderMerge(Y,&B));
2130     PetscCall(PetscFree(nnz_d));
2131     PetscCall(PetscFree(nnz_o));
2132   }
2133   PetscFunctionReturn(0);
2134 }
2135 
2136 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2137 
2138 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2139 {
2140   PetscFunctionBegin;
2141   if (PetscDefined(USE_COMPLEX)) {
2142     Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2143 
2144     PetscCall(MatConjugate_SeqAIJ(aij->A));
2145     PetscCall(MatConjugate_SeqAIJ(aij->B));
2146   }
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2151 {
2152   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2153 
2154   PetscFunctionBegin;
2155   PetscCall(MatRealPart(a->A));
2156   PetscCall(MatRealPart(a->B));
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2161 {
2162   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2163 
2164   PetscFunctionBegin;
2165   PetscCall(MatImaginaryPart(a->A));
2166   PetscCall(MatImaginaryPart(a->B));
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2171 {
2172   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2173   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2174   PetscScalar       *va,*vv;
2175   Vec               vB,vA;
2176   const PetscScalar *vb;
2177 
2178   PetscFunctionBegin;
2179   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA));
2180   PetscCall(MatGetRowMaxAbs(a->A,vA,idx));
2181 
2182   PetscCall(VecGetArrayWrite(vA,&va));
2183   if (idx) {
2184     for (i=0; i<m; i++) {
2185       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2186     }
2187   }
2188 
2189   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB));
2190   PetscCall(PetscMalloc1(m,&idxb));
2191   PetscCall(MatGetRowMaxAbs(a->B,vB,idxb));
2192 
2193   PetscCall(VecGetArrayWrite(v,&vv));
2194   PetscCall(VecGetArrayRead(vB,&vb));
2195   for (i=0; i<m; i++) {
2196     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2197       vv[i] = vb[i];
2198       if (idx) idx[i] = a->garray[idxb[i]];
2199     } else {
2200       vv[i] = va[i];
2201       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2202         idx[i] = a->garray[idxb[i]];
2203     }
2204   }
2205   PetscCall(VecRestoreArrayWrite(vA,&vv));
2206   PetscCall(VecRestoreArrayWrite(vA,&va));
2207   PetscCall(VecRestoreArrayRead(vB,&vb));
2208   PetscCall(PetscFree(idxb));
2209   PetscCall(VecDestroy(&vA));
2210   PetscCall(VecDestroy(&vB));
2211   PetscFunctionReturn(0);
2212 }
2213 
2214 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2215 {
2216   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2217   PetscInt          m = A->rmap->n,n = A->cmap->n;
2218   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2219   PetscInt          *cmap  = mat->garray;
2220   PetscInt          *diagIdx, *offdiagIdx;
2221   Vec               diagV, offdiagV;
2222   PetscScalar       *a, *diagA, *offdiagA;
2223   const PetscScalar *ba,*bav;
2224   PetscInt          r,j,col,ncols,*bi,*bj;
2225   Mat               B = mat->B;
2226   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2227 
2228   PetscFunctionBegin;
2229   /* When a process holds entire A and other processes have no entry */
2230   if (A->cmap->N == n) {
2231     PetscCall(VecGetArrayWrite(v,&diagA));
2232     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2233     PetscCall(MatGetRowMinAbs(mat->A,diagV,idx));
2234     PetscCall(VecDestroy(&diagV));
2235     PetscCall(VecRestoreArrayWrite(v,&diagA));
2236     PetscFunctionReturn(0);
2237   } else if (n == 0) {
2238     if (m) {
2239       PetscCall(VecGetArrayWrite(v,&a));
2240       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2241       PetscCall(VecRestoreArrayWrite(v,&a));
2242     }
2243     PetscFunctionReturn(0);
2244   }
2245 
2246   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2247   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2248   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2249   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2250 
2251   /* Get offdiagIdx[] for implicit 0.0 */
2252   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2253   ba   = bav;
2254   bi   = b->i;
2255   bj   = b->j;
2256   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2257   for (r = 0; r < m; r++) {
2258     ncols = bi[r+1] - bi[r];
2259     if (ncols == A->cmap->N - n) { /* Brow is dense */
2260       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2261     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2262       offdiagA[r] = 0.0;
2263 
2264       /* Find first hole in the cmap */
2265       for (j=0; j<ncols; j++) {
2266         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2267         if (col > j && j < cstart) {
2268           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2269           break;
2270         } else if (col > j + n && j >= cstart) {
2271           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2272           break;
2273         }
2274       }
2275       if (j == ncols && ncols < A->cmap->N - n) {
2276         /* a hole is outside compressed Bcols */
2277         if (ncols == 0) {
2278           if (cstart) {
2279             offdiagIdx[r] = 0;
2280           } else offdiagIdx[r] = cend;
2281         } else { /* ncols > 0 */
2282           offdiagIdx[r] = cmap[ncols-1] + 1;
2283           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2284         }
2285       }
2286     }
2287 
2288     for (j=0; j<ncols; j++) {
2289       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2290       ba++; bj++;
2291     }
2292   }
2293 
2294   PetscCall(VecGetArrayWrite(v, &a));
2295   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2296   for (r = 0; r < m; ++r) {
2297     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2298       a[r]   = diagA[r];
2299       if (idx) idx[r] = cstart + diagIdx[r];
2300     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2301       a[r] = diagA[r];
2302       if (idx) {
2303         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2304           idx[r] = cstart + diagIdx[r];
2305         } else idx[r] = offdiagIdx[r];
2306       }
2307     } else {
2308       a[r]   = offdiagA[r];
2309       if (idx) idx[r] = offdiagIdx[r];
2310     }
2311   }
2312   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2313   PetscCall(VecRestoreArrayWrite(v, &a));
2314   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2315   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2316   PetscCall(VecDestroy(&diagV));
2317   PetscCall(VecDestroy(&offdiagV));
2318   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2319   PetscFunctionReturn(0);
2320 }
2321 
2322 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2323 {
2324   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2325   PetscInt          m = A->rmap->n,n = A->cmap->n;
2326   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2327   PetscInt          *cmap  = mat->garray;
2328   PetscInt          *diagIdx, *offdiagIdx;
2329   Vec               diagV, offdiagV;
2330   PetscScalar       *a, *diagA, *offdiagA;
2331   const PetscScalar *ba,*bav;
2332   PetscInt          r,j,col,ncols,*bi,*bj;
2333   Mat               B = mat->B;
2334   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2335 
2336   PetscFunctionBegin;
2337   /* When a process holds entire A and other processes have no entry */
2338   if (A->cmap->N == n) {
2339     PetscCall(VecGetArrayWrite(v,&diagA));
2340     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2341     PetscCall(MatGetRowMin(mat->A,diagV,idx));
2342     PetscCall(VecDestroy(&diagV));
2343     PetscCall(VecRestoreArrayWrite(v,&diagA));
2344     PetscFunctionReturn(0);
2345   } else if (n == 0) {
2346     if (m) {
2347       PetscCall(VecGetArrayWrite(v,&a));
2348       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2349       PetscCall(VecRestoreArrayWrite(v,&a));
2350     }
2351     PetscFunctionReturn(0);
2352   }
2353 
2354   PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx));
2355   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2356   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2357   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2358 
2359   /* Get offdiagIdx[] for implicit 0.0 */
2360   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2361   ba   = bav;
2362   bi   = b->i;
2363   bj   = b->j;
2364   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2365   for (r = 0; r < m; r++) {
2366     ncols = bi[r+1] - bi[r];
2367     if (ncols == A->cmap->N - n) { /* Brow is dense */
2368       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2369     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2370       offdiagA[r] = 0.0;
2371 
2372       /* Find first hole in the cmap */
2373       for (j=0; j<ncols; j++) {
2374         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2375         if (col > j && j < cstart) {
2376           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2377           break;
2378         } else if (col > j + n && j >= cstart) {
2379           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2380           break;
2381         }
2382       }
2383       if (j == ncols && ncols < A->cmap->N - n) {
2384         /* a hole is outside compressed Bcols */
2385         if (ncols == 0) {
2386           if (cstart) {
2387             offdiagIdx[r] = 0;
2388           } else offdiagIdx[r] = cend;
2389         } else { /* ncols > 0 */
2390           offdiagIdx[r] = cmap[ncols-1] + 1;
2391           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2392         }
2393       }
2394     }
2395 
2396     for (j=0; j<ncols; j++) {
2397       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2398       ba++; bj++;
2399     }
2400   }
2401 
2402   PetscCall(VecGetArrayWrite(v, &a));
2403   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2404   for (r = 0; r < m; ++r) {
2405     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2406       a[r]   = diagA[r];
2407       if (idx) idx[r] = cstart + diagIdx[r];
2408     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2409       a[r] = diagA[r];
2410       if (idx) {
2411         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2412           idx[r] = cstart + diagIdx[r];
2413         } else idx[r] = offdiagIdx[r];
2414       }
2415     } else {
2416       a[r]   = offdiagA[r];
2417       if (idx) idx[r] = offdiagIdx[r];
2418     }
2419   }
2420   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2421   PetscCall(VecRestoreArrayWrite(v, &a));
2422   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2423   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2424   PetscCall(VecDestroy(&diagV));
2425   PetscCall(VecDestroy(&offdiagV));
2426   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2431 {
2432   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2433   PetscInt          m = A->rmap->n,n = A->cmap->n;
2434   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2435   PetscInt          *cmap  = mat->garray;
2436   PetscInt          *diagIdx, *offdiagIdx;
2437   Vec               diagV, offdiagV;
2438   PetscScalar       *a, *diagA, *offdiagA;
2439   const PetscScalar *ba,*bav;
2440   PetscInt          r,j,col,ncols,*bi,*bj;
2441   Mat               B = mat->B;
2442   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2443 
2444   PetscFunctionBegin;
2445   /* When a process holds entire A and other processes have no entry */
2446   if (A->cmap->N == n) {
2447     PetscCall(VecGetArrayWrite(v,&diagA));
2448     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2449     PetscCall(MatGetRowMax(mat->A,diagV,idx));
2450     PetscCall(VecDestroy(&diagV));
2451     PetscCall(VecRestoreArrayWrite(v,&diagA));
2452     PetscFunctionReturn(0);
2453   } else if (n == 0) {
2454     if (m) {
2455       PetscCall(VecGetArrayWrite(v,&a));
2456       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2457       PetscCall(VecRestoreArrayWrite(v,&a));
2458     }
2459     PetscFunctionReturn(0);
2460   }
2461 
2462   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2463   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2464   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2465   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2466 
2467   /* Get offdiagIdx[] for implicit 0.0 */
2468   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2469   ba   = bav;
2470   bi   = b->i;
2471   bj   = b->j;
2472   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2473   for (r = 0; r < m; r++) {
2474     ncols = bi[r+1] - bi[r];
2475     if (ncols == A->cmap->N - n) { /* Brow is dense */
2476       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2477     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2478       offdiagA[r] = 0.0;
2479 
2480       /* Find first hole in the cmap */
2481       for (j=0; j<ncols; j++) {
2482         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2483         if (col > j && j < cstart) {
2484           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2485           break;
2486         } else if (col > j + n && j >= cstart) {
2487           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2488           break;
2489         }
2490       }
2491       if (j == ncols && ncols < A->cmap->N - n) {
2492         /* a hole is outside compressed Bcols */
2493         if (ncols == 0) {
2494           if (cstart) {
2495             offdiagIdx[r] = 0;
2496           } else offdiagIdx[r] = cend;
2497         } else { /* ncols > 0 */
2498           offdiagIdx[r] = cmap[ncols-1] + 1;
2499           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2500         }
2501       }
2502     }
2503 
2504     for (j=0; j<ncols; j++) {
2505       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2506       ba++; bj++;
2507     }
2508   }
2509 
2510   PetscCall(VecGetArrayWrite(v,    &a));
2511   PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA));
2512   for (r = 0; r < m; ++r) {
2513     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2514       a[r] = diagA[r];
2515       if (idx) idx[r] = cstart + diagIdx[r];
2516     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2517       a[r] = diagA[r];
2518       if (idx) {
2519         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2520           idx[r] = cstart + diagIdx[r];
2521         } else idx[r] = offdiagIdx[r];
2522       }
2523     } else {
2524       a[r] = offdiagA[r];
2525       if (idx) idx[r] = offdiagIdx[r];
2526     }
2527   }
2528   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2529   PetscCall(VecRestoreArrayWrite(v,       &a));
2530   PetscCall(VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA));
2531   PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA));
2532   PetscCall(VecDestroy(&diagV));
2533   PetscCall(VecDestroy(&offdiagV));
2534   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2535   PetscFunctionReturn(0);
2536 }
2537 
2538 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2539 {
2540   Mat            *dummy;
2541 
2542   PetscFunctionBegin;
2543   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy));
2544   *newmat = *dummy;
2545   PetscCall(PetscFree(dummy));
2546   PetscFunctionReturn(0);
2547 }
2548 
2549 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2550 {
2551   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2552 
2553   PetscFunctionBegin;
2554   PetscCall(MatInvertBlockDiagonal(a->A,values));
2555   A->factorerrortype = a->A->factorerrortype;
2556   PetscFunctionReturn(0);
2557 }
2558 
2559 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2560 {
2561   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2562 
2563   PetscFunctionBegin;
2564   PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2565   PetscCall(MatSetRandom(aij->A,rctx));
2566   if (x->assembled) {
2567     PetscCall(MatSetRandom(aij->B,rctx));
2568   } else {
2569     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx));
2570   }
2571   PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY));
2572   PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY));
2573   PetscFunctionReturn(0);
2574 }
2575 
2576 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2577 {
2578   PetscFunctionBegin;
2579   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2580   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 /*@
2585    MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2586 
2587    Not collective
2588 
2589    Input Parameter:
2590 .    A - the matrix
2591 
2592    Output Parameter:
2593 .    nz - the number of nonzeros
2594 
2595  Level: advanced
2596 
2597 @*/
2598 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A,PetscCount *nz)
2599 {
2600   Mat_MPIAIJ *maij = (Mat_MPIAIJ*)A->data;
2601   Mat_SeqAIJ *aaij = (Mat_SeqAIJ*)maij->A->data, *baij = (Mat_SeqAIJ*)maij->B->data;
2602 
2603   PetscFunctionBegin;
2604   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2605   PetscFunctionReturn(0);
2606 }
2607 
2608 /*@
2609    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2610 
2611    Collective on Mat
2612 
2613    Input Parameters:
2614 +    A - the matrix
2615 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2616 
2617  Level: advanced
2618 
2619 @*/
2620 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2621 {
2622   PetscFunctionBegin;
2623   PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));
2624   PetscFunctionReturn(0);
2625 }
2626 
2627 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2628 {
2629   PetscBool            sc = PETSC_FALSE,flg;
2630 
2631   PetscFunctionBegin;
2632   PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options");
2633   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2634   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg));
2635   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc));
2636   PetscOptionsHeadEnd();
2637   PetscFunctionReturn(0);
2638 }
2639 
2640 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2641 {
2642   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2643   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2644 
2645   PetscFunctionBegin;
2646   if (!Y->preallocated) {
2647     PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL));
2648   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2649     PetscInt nonew = aij->nonew;
2650     PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL));
2651     aij->nonew = nonew;
2652   }
2653   PetscCall(MatShift_Basic(Y,a));
2654   PetscFunctionReturn(0);
2655 }
2656 
2657 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2658 {
2659   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2660 
2661   PetscFunctionBegin;
2662   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2663   PetscCall(MatMissingDiagonal(a->A,missing,d));
2664   if (d) {
2665     PetscInt rstart;
2666     PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
2667     *d += rstart;
2668 
2669   }
2670   PetscFunctionReturn(0);
2671 }
2672 
2673 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2674 {
2675   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2676 
2677   PetscFunctionBegin;
2678   PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag));
2679   PetscFunctionReturn(0);
2680 }
2681 
2682 /* -------------------------------------------------------------------*/
2683 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2684                                        MatGetRow_MPIAIJ,
2685                                        MatRestoreRow_MPIAIJ,
2686                                        MatMult_MPIAIJ,
2687                                 /* 4*/ MatMultAdd_MPIAIJ,
2688                                        MatMultTranspose_MPIAIJ,
2689                                        MatMultTransposeAdd_MPIAIJ,
2690                                        NULL,
2691                                        NULL,
2692                                        NULL,
2693                                 /*10*/ NULL,
2694                                        NULL,
2695                                        NULL,
2696                                        MatSOR_MPIAIJ,
2697                                        MatTranspose_MPIAIJ,
2698                                 /*15*/ MatGetInfo_MPIAIJ,
2699                                        MatEqual_MPIAIJ,
2700                                        MatGetDiagonal_MPIAIJ,
2701                                        MatDiagonalScale_MPIAIJ,
2702                                        MatNorm_MPIAIJ,
2703                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2704                                        MatAssemblyEnd_MPIAIJ,
2705                                        MatSetOption_MPIAIJ,
2706                                        MatZeroEntries_MPIAIJ,
2707                                 /*24*/ MatZeroRows_MPIAIJ,
2708                                        NULL,
2709                                        NULL,
2710                                        NULL,
2711                                        NULL,
2712                                 /*29*/ MatSetUp_MPIAIJ,
2713                                        NULL,
2714                                        NULL,
2715                                        MatGetDiagonalBlock_MPIAIJ,
2716                                        NULL,
2717                                 /*34*/ MatDuplicate_MPIAIJ,
2718                                        NULL,
2719                                        NULL,
2720                                        NULL,
2721                                        NULL,
2722                                 /*39*/ MatAXPY_MPIAIJ,
2723                                        MatCreateSubMatrices_MPIAIJ,
2724                                        MatIncreaseOverlap_MPIAIJ,
2725                                        MatGetValues_MPIAIJ,
2726                                        MatCopy_MPIAIJ,
2727                                 /*44*/ MatGetRowMax_MPIAIJ,
2728                                        MatScale_MPIAIJ,
2729                                        MatShift_MPIAIJ,
2730                                        MatDiagonalSet_MPIAIJ,
2731                                        MatZeroRowsColumns_MPIAIJ,
2732                                 /*49*/ MatSetRandom_MPIAIJ,
2733                                        MatGetRowIJ_MPIAIJ,
2734                                        MatRestoreRowIJ_MPIAIJ,
2735                                        NULL,
2736                                        NULL,
2737                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2738                                        NULL,
2739                                        MatSetUnfactored_MPIAIJ,
2740                                        MatPermute_MPIAIJ,
2741                                        NULL,
2742                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2743                                        MatDestroy_MPIAIJ,
2744                                        MatView_MPIAIJ,
2745                                        NULL,
2746                                        NULL,
2747                                 /*64*/ NULL,
2748                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2749                                        NULL,
2750                                        NULL,
2751                                        NULL,
2752                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2753                                        MatGetRowMinAbs_MPIAIJ,
2754                                        NULL,
2755                                        NULL,
2756                                        NULL,
2757                                        NULL,
2758                                 /*75*/ MatFDColoringApply_AIJ,
2759                                        MatSetFromOptions_MPIAIJ,
2760                                        NULL,
2761                                        NULL,
2762                                        MatFindZeroDiagonals_MPIAIJ,
2763                                 /*80*/ NULL,
2764                                        NULL,
2765                                        NULL,
2766                                 /*83*/ MatLoad_MPIAIJ,
2767                                        MatIsSymmetric_MPIAIJ,
2768                                        NULL,
2769                                        NULL,
2770                                        NULL,
2771                                        NULL,
2772                                 /*89*/ NULL,
2773                                        NULL,
2774                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2775                                        NULL,
2776                                        NULL,
2777                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2778                                        NULL,
2779                                        NULL,
2780                                        NULL,
2781                                        MatBindToCPU_MPIAIJ,
2782                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2783                                        NULL,
2784                                        NULL,
2785                                        MatConjugate_MPIAIJ,
2786                                        NULL,
2787                                 /*104*/MatSetValuesRow_MPIAIJ,
2788                                        MatRealPart_MPIAIJ,
2789                                        MatImaginaryPart_MPIAIJ,
2790                                        NULL,
2791                                        NULL,
2792                                 /*109*/NULL,
2793                                        NULL,
2794                                        MatGetRowMin_MPIAIJ,
2795                                        NULL,
2796                                        MatMissingDiagonal_MPIAIJ,
2797                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2798                                        NULL,
2799                                        MatGetGhosts_MPIAIJ,
2800                                        NULL,
2801                                        NULL,
2802                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2803                                        NULL,
2804                                        NULL,
2805                                        NULL,
2806                                        MatGetMultiProcBlock_MPIAIJ,
2807                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2808                                        MatGetColumnReductions_MPIAIJ,
2809                                        MatInvertBlockDiagonal_MPIAIJ,
2810                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2811                                        MatCreateSubMatricesMPI_MPIAIJ,
2812                                 /*129*/NULL,
2813                                        NULL,
2814                                        NULL,
2815                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2816                                        NULL,
2817                                 /*134*/NULL,
2818                                        NULL,
2819                                        NULL,
2820                                        NULL,
2821                                        NULL,
2822                                 /*139*/MatSetBlockSizes_MPIAIJ,
2823                                        NULL,
2824                                        NULL,
2825                                        MatFDColoringSetUp_MPIXAIJ,
2826                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2827                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2828                                 /*145*/NULL,
2829                                        NULL,
2830                                        NULL,
2831                                        MatCreateGraph_Simple_AIJ,
2832                                        MatFilter_AIJ
2833 };
2834 
2835 /* ----------------------------------------------------------------------------------------*/
2836 
2837 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2838 {
2839   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2840 
2841   PetscFunctionBegin;
2842   PetscCall(MatStoreValues(aij->A));
2843   PetscCall(MatStoreValues(aij->B));
2844   PetscFunctionReturn(0);
2845 }
2846 
2847 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2848 {
2849   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2850 
2851   PetscFunctionBegin;
2852   PetscCall(MatRetrieveValues(aij->A));
2853   PetscCall(MatRetrieveValues(aij->B));
2854   PetscFunctionReturn(0);
2855 }
2856 
2857 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2858 {
2859   Mat_MPIAIJ     *b;
2860   PetscMPIInt    size;
2861 
2862   PetscFunctionBegin;
2863   PetscCall(PetscLayoutSetUp(B->rmap));
2864   PetscCall(PetscLayoutSetUp(B->cmap));
2865   b = (Mat_MPIAIJ*)B->data;
2866 
2867 #if defined(PETSC_USE_CTABLE)
2868   PetscCall(PetscTableDestroy(&b->colmap));
2869 #else
2870   PetscCall(PetscFree(b->colmap));
2871 #endif
2872   PetscCall(PetscFree(b->garray));
2873   PetscCall(VecDestroy(&b->lvec));
2874   PetscCall(VecScatterDestroy(&b->Mvctx));
2875 
2876   /* Because the B will have been resized we simply destroy it and create a new one each time */
2877   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
2878   PetscCall(MatDestroy(&b->B));
2879   PetscCall(MatCreate(PETSC_COMM_SELF,&b->B));
2880   PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
2881   PetscCall(MatSetBlockSizesFromMats(b->B,B,B));
2882   PetscCall(MatSetType(b->B,MATSEQAIJ));
2883   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
2884 
2885   if (!B->preallocated) {
2886     PetscCall(MatCreate(PETSC_COMM_SELF,&b->A));
2887     PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
2888     PetscCall(MatSetBlockSizesFromMats(b->A,B,B));
2889     PetscCall(MatSetType(b->A,MATSEQAIJ));
2890     PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
2891   }
2892 
2893   PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz));
2894   PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz));
2895   B->preallocated  = PETSC_TRUE;
2896   B->was_assembled = PETSC_FALSE;
2897   B->assembled     = PETSC_FALSE;
2898   PetscFunctionReturn(0);
2899 }
2900 
2901 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2902 {
2903   Mat_MPIAIJ     *b;
2904 
2905   PetscFunctionBegin;
2906   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2907   PetscCall(PetscLayoutSetUp(B->rmap));
2908   PetscCall(PetscLayoutSetUp(B->cmap));
2909   b = (Mat_MPIAIJ*)B->data;
2910 
2911 #if defined(PETSC_USE_CTABLE)
2912   PetscCall(PetscTableDestroy(&b->colmap));
2913 #else
2914   PetscCall(PetscFree(b->colmap));
2915 #endif
2916   PetscCall(PetscFree(b->garray));
2917   PetscCall(VecDestroy(&b->lvec));
2918   PetscCall(VecScatterDestroy(&b->Mvctx));
2919 
2920   PetscCall(MatResetPreallocation(b->A));
2921   PetscCall(MatResetPreallocation(b->B));
2922   B->preallocated  = PETSC_TRUE;
2923   B->was_assembled = PETSC_FALSE;
2924   B->assembled = PETSC_FALSE;
2925   PetscFunctionReturn(0);
2926 }
2927 
2928 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2929 {
2930   Mat            mat;
2931   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2932 
2933   PetscFunctionBegin;
2934   *newmat = NULL;
2935   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
2936   PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
2937   PetscCall(MatSetBlockSizesFromMats(mat,matin,matin));
2938   PetscCall(MatSetType(mat,((PetscObject)matin)->type_name));
2939   a       = (Mat_MPIAIJ*)mat->data;
2940 
2941   mat->factortype   = matin->factortype;
2942   mat->assembled    = matin->assembled;
2943   mat->insertmode   = NOT_SET_VALUES;
2944   mat->preallocated = matin->preallocated;
2945 
2946   a->size         = oldmat->size;
2947   a->rank         = oldmat->rank;
2948   a->donotstash   = oldmat->donotstash;
2949   a->roworiented  = oldmat->roworiented;
2950   a->rowindices   = NULL;
2951   a->rowvalues    = NULL;
2952   a->getrowactive = PETSC_FALSE;
2953 
2954   PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap));
2955   PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap));
2956 
2957   if (oldmat->colmap) {
2958 #if defined(PETSC_USE_CTABLE)
2959     PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
2960 #else
2961     PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap));
2962     PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt)));
2963     PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N));
2964 #endif
2965   } else a->colmap = NULL;
2966   if (oldmat->garray) {
2967     PetscInt len;
2968     len  = oldmat->B->cmap->n;
2969     PetscCall(PetscMalloc1(len+1,&a->garray));
2970     PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
2971     if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len));
2972   } else a->garray = NULL;
2973 
2974   /* It may happen MatDuplicate is called with a non-assembled matrix
2975      In fact, MatDuplicate only requires the matrix to be preallocated
2976      This may happen inside a DMCreateMatrix_Shell */
2977   if (oldmat->lvec) {
2978     PetscCall(VecDuplicate(oldmat->lvec,&a->lvec));
2979     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
2980   }
2981   if (oldmat->Mvctx) {
2982     PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
2983     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
2984   }
2985   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2986   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2987   PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B));
2988   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
2989   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
2990   *newmat = mat;
2991   PetscFunctionReturn(0);
2992 }
2993 
2994 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2995 {
2996   PetscBool      isbinary, ishdf5;
2997 
2998   PetscFunctionBegin;
2999   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
3000   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
3001   /* force binary viewer to load .info file if it has not yet done so */
3002   PetscCall(PetscViewerSetUp(viewer));
3003   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
3004   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
3005   if (isbinary) {
3006     PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer));
3007   } else if (ishdf5) {
3008 #if defined(PETSC_HAVE_HDF5)
3009     PetscCall(MatLoad_AIJ_HDF5(newMat,viewer));
3010 #else
3011     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3012 #endif
3013   } else {
3014     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3015   }
3016   PetscFunctionReturn(0);
3017 }
3018 
3019 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3020 {
3021   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3022   PetscInt       *rowidxs,*colidxs;
3023   PetscScalar    *matvals;
3024 
3025   PetscFunctionBegin;
3026   PetscCall(PetscViewerSetUp(viewer));
3027 
3028   /* read in matrix header */
3029   PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT));
3030   PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3031   M  = header[1]; N = header[2]; nz = header[3];
3032   PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
3033   PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
3034   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3035 
3036   /* set block sizes from the viewer's .info file */
3037   PetscCall(MatLoad_Binary_BlockSizes(mat,viewer));
3038   /* set global sizes if not set already */
3039   if (mat->rmap->N < 0) mat->rmap->N = M;
3040   if (mat->cmap->N < 0) mat->cmap->N = N;
3041   PetscCall(PetscLayoutSetUp(mat->rmap));
3042   PetscCall(PetscLayoutSetUp(mat->cmap));
3043 
3044   /* check if the matrix sizes are correct */
3045   PetscCall(MatGetSize(mat,&rows,&cols));
3046   PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3047 
3048   /* read in row lengths and build row indices */
3049   PetscCall(MatGetLocalSize(mat,&m,NULL));
3050   PetscCall(PetscMalloc1(m+1,&rowidxs));
3051   PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT));
3052   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3053   PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer)));
3054   PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3055   /* read in column indices and matrix values */
3056   PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals));
3057   PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
3058   PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
3059   /* store matrix indices and values */
3060   PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals));
3061   PetscCall(PetscFree(rowidxs));
3062   PetscCall(PetscFree2(colidxs,matvals));
3063   PetscFunctionReturn(0);
3064 }
3065 
3066 /* Not scalable because of ISAllGather() unless getting all columns. */
3067 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3068 {
3069   IS             iscol_local;
3070   PetscBool      isstride;
3071   PetscMPIInt    lisstride=0,gisstride;
3072 
3073   PetscFunctionBegin;
3074   /* check if we are grabbing all columns*/
3075   PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride));
3076 
3077   if (isstride) {
3078     PetscInt  start,len,mstart,mlen;
3079     PetscCall(ISStrideGetInfo(iscol,&start,NULL));
3080     PetscCall(ISGetLocalSize(iscol,&len));
3081     PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen));
3082     if (mstart == start && mlen-mstart == len) lisstride = 1;
3083   }
3084 
3085   PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat)));
3086   if (gisstride) {
3087     PetscInt N;
3088     PetscCall(MatGetSize(mat,NULL,&N));
3089     PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local));
3090     PetscCall(ISSetIdentity(iscol_local));
3091     PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3092   } else {
3093     PetscInt cbs;
3094     PetscCall(ISGetBlockSize(iscol,&cbs));
3095     PetscCall(ISAllGather(iscol,&iscol_local));
3096     PetscCall(ISSetBlockSize(iscol_local,cbs));
3097   }
3098 
3099   *isseq = iscol_local;
3100   PetscFunctionReturn(0);
3101 }
3102 
3103 /*
3104  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3105  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3106 
3107  Input Parameters:
3108    mat - matrix
3109    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3110            i.e., mat->rstart <= isrow[i] < mat->rend
3111    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3112            i.e., mat->cstart <= iscol[i] < mat->cend
3113  Output Parameter:
3114    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3115    iscol_o - sequential column index set for retrieving mat->B
3116    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3117  */
3118 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3119 {
3120   Vec            x,cmap;
3121   const PetscInt *is_idx;
3122   PetscScalar    *xarray,*cmaparray;
3123   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3124   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3125   Mat            B=a->B;
3126   Vec            lvec=a->lvec,lcmap;
3127   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3128   MPI_Comm       comm;
3129   VecScatter     Mvctx=a->Mvctx;
3130 
3131   PetscFunctionBegin;
3132   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3133   PetscCall(ISGetLocalSize(iscol,&ncols));
3134 
3135   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3136   PetscCall(MatCreateVecs(mat,&x,NULL));
3137   PetscCall(VecSet(x,-1.0));
3138   PetscCall(VecDuplicate(x,&cmap));
3139   PetscCall(VecSet(cmap,-1.0));
3140 
3141   /* Get start indices */
3142   PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm));
3143   isstart -= ncols;
3144   PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend));
3145 
3146   PetscCall(ISGetIndices(iscol,&is_idx));
3147   PetscCall(VecGetArray(x,&xarray));
3148   PetscCall(VecGetArray(cmap,&cmaparray));
3149   PetscCall(PetscMalloc1(ncols,&idx));
3150   for (i=0; i<ncols; i++) {
3151     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3152     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3153     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3154   }
3155   PetscCall(VecRestoreArray(x,&xarray));
3156   PetscCall(VecRestoreArray(cmap,&cmaparray));
3157   PetscCall(ISRestoreIndices(iscol,&is_idx));
3158 
3159   /* Get iscol_d */
3160   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d));
3161   PetscCall(ISGetBlockSize(iscol,&i));
3162   PetscCall(ISSetBlockSize(*iscol_d,i));
3163 
3164   /* Get isrow_d */
3165   PetscCall(ISGetLocalSize(isrow,&m));
3166   rstart = mat->rmap->rstart;
3167   PetscCall(PetscMalloc1(m,&idx));
3168   PetscCall(ISGetIndices(isrow,&is_idx));
3169   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3170   PetscCall(ISRestoreIndices(isrow,&is_idx));
3171 
3172   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d));
3173   PetscCall(ISGetBlockSize(isrow,&i));
3174   PetscCall(ISSetBlockSize(*isrow_d,i));
3175 
3176   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3177   PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3178   PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3179 
3180   PetscCall(VecDuplicate(lvec,&lcmap));
3181 
3182   PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3183   PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3184 
3185   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3186   /* off-process column indices */
3187   count = 0;
3188   PetscCall(PetscMalloc1(Bn,&idx));
3189   PetscCall(PetscMalloc1(Bn,&cmap1));
3190 
3191   PetscCall(VecGetArray(lvec,&xarray));
3192   PetscCall(VecGetArray(lcmap,&cmaparray));
3193   for (i=0; i<Bn; i++) {
3194     if (PetscRealPart(xarray[i]) > -1.0) {
3195       idx[count]     = i;                   /* local column index in off-diagonal part B */
3196       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3197       count++;
3198     }
3199   }
3200   PetscCall(VecRestoreArray(lvec,&xarray));
3201   PetscCall(VecRestoreArray(lcmap,&cmaparray));
3202 
3203   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o));
3204   /* cannot ensure iscol_o has same blocksize as iscol! */
3205 
3206   PetscCall(PetscFree(idx));
3207   *garray = cmap1;
3208 
3209   PetscCall(VecDestroy(&x));
3210   PetscCall(VecDestroy(&cmap));
3211   PetscCall(VecDestroy(&lcmap));
3212   PetscFunctionReturn(0);
3213 }
3214 
3215 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3216 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3217 {
3218   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3219   Mat            M = NULL;
3220   MPI_Comm       comm;
3221   IS             iscol_d,isrow_d,iscol_o;
3222   Mat            Asub = NULL,Bsub = NULL;
3223   PetscInt       n;
3224 
3225   PetscFunctionBegin;
3226   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3227 
3228   if (call == MAT_REUSE_MATRIX) {
3229     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3230     PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d));
3231     PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3232 
3233     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d));
3234     PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3235 
3236     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o));
3237     PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3238 
3239     /* Update diagonal and off-diagonal portions of submat */
3240     asub = (Mat_MPIAIJ*)(*submat)->data;
3241     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A));
3242     PetscCall(ISGetLocalSize(iscol_o,&n));
3243     if (n) {
3244       PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B));
3245     }
3246     PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY));
3247     PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY));
3248 
3249   } else { /* call == MAT_INITIAL_MATRIX) */
3250     const PetscInt *garray;
3251     PetscInt        BsubN;
3252 
3253     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3254     PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray));
3255 
3256     /* Create local submatrices Asub and Bsub */
3257     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub));
3258     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub));
3259 
3260     /* Create submatrix M */
3261     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M));
3262 
3263     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3264     asub = (Mat_MPIAIJ*)M->data;
3265 
3266     PetscCall(ISGetLocalSize(iscol_o,&BsubN));
3267     n = asub->B->cmap->N;
3268     if (BsubN > n) {
3269       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3270       const PetscInt *idx;
3271       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3272       PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN));
3273 
3274       PetscCall(PetscMalloc1(n,&idx_new));
3275       j = 0;
3276       PetscCall(ISGetIndices(iscol_o,&idx));
3277       for (i=0; i<n; i++) {
3278         if (j >= BsubN) break;
3279         while (subgarray[i] > garray[j]) j++;
3280 
3281         if (subgarray[i] == garray[j]) {
3282           idx_new[i] = idx[j++];
3283         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]);
3284       }
3285       PetscCall(ISRestoreIndices(iscol_o,&idx));
3286 
3287       PetscCall(ISDestroy(&iscol_o));
3288       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o));
3289 
3290     } else if (BsubN < n) {
3291       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N);
3292     }
3293 
3294     PetscCall(PetscFree(garray));
3295     *submat = M;
3296 
3297     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3298     PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d));
3299     PetscCall(ISDestroy(&isrow_d));
3300 
3301     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d));
3302     PetscCall(ISDestroy(&iscol_d));
3303 
3304     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o));
3305     PetscCall(ISDestroy(&iscol_o));
3306   }
3307   PetscFunctionReturn(0);
3308 }
3309 
3310 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3311 {
3312   IS             iscol_local=NULL,isrow_d;
3313   PetscInt       csize;
3314   PetscInt       n,i,j,start,end;
3315   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3316   MPI_Comm       comm;
3317 
3318   PetscFunctionBegin;
3319   /* If isrow has same processor distribution as mat,
3320      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3321   if (call == MAT_REUSE_MATRIX) {
3322     PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d));
3323     if (isrow_d) {
3324       sameRowDist  = PETSC_TRUE;
3325       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3326     } else {
3327       PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local));
3328       if (iscol_local) {
3329         sameRowDist  = PETSC_TRUE;
3330         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3331       }
3332     }
3333   } else {
3334     /* Check if isrow has same processor distribution as mat */
3335     sameDist[0] = PETSC_FALSE;
3336     PetscCall(ISGetLocalSize(isrow,&n));
3337     if (!n) {
3338       sameDist[0] = PETSC_TRUE;
3339     } else {
3340       PetscCall(ISGetMinMax(isrow,&i,&j));
3341       PetscCall(MatGetOwnershipRange(mat,&start,&end));
3342       if (i >= start && j < end) {
3343         sameDist[0] = PETSC_TRUE;
3344       }
3345     }
3346 
3347     /* Check if iscol has same processor distribution as mat */
3348     sameDist[1] = PETSC_FALSE;
3349     PetscCall(ISGetLocalSize(iscol,&n));
3350     if (!n) {
3351       sameDist[1] = PETSC_TRUE;
3352     } else {
3353       PetscCall(ISGetMinMax(iscol,&i,&j));
3354       PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end));
3355       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3356     }
3357 
3358     PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3359     PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm));
3360     sameRowDist = tsameDist[0];
3361   }
3362 
3363   if (sameRowDist) {
3364     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3365       /* isrow and iscol have same processor distribution as mat */
3366       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat));
3367       PetscFunctionReturn(0);
3368     } else { /* sameRowDist */
3369       /* isrow has same processor distribution as mat */
3370       if (call == MAT_INITIAL_MATRIX) {
3371         PetscBool sorted;
3372         PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3373         PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */
3374         PetscCall(ISGetSize(iscol,&i));
3375         PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i);
3376 
3377         PetscCall(ISSorted(iscol_local,&sorted));
3378         if (sorted) {
3379           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3380           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat));
3381           PetscFunctionReturn(0);
3382         }
3383       } else { /* call == MAT_REUSE_MATRIX */
3384         IS iscol_sub;
3385         PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3386         if (iscol_sub) {
3387           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat));
3388           PetscFunctionReturn(0);
3389         }
3390       }
3391     }
3392   }
3393 
3394   /* General case: iscol -> iscol_local which has global size of iscol */
3395   if (call == MAT_REUSE_MATRIX) {
3396     PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
3397     PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3398   } else {
3399     if (!iscol_local) {
3400       PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3401     }
3402   }
3403 
3404   PetscCall(ISGetLocalSize(iscol,&csize));
3405   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat));
3406 
3407   if (call == MAT_INITIAL_MATRIX) {
3408     PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
3409     PetscCall(ISDestroy(&iscol_local));
3410   }
3411   PetscFunctionReturn(0);
3412 }
3413 
3414 /*@C
3415      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3416          and "off-diagonal" part of the matrix in CSR format.
3417 
3418    Collective
3419 
3420    Input Parameters:
3421 +  comm - MPI communicator
3422 .  A - "diagonal" portion of matrix
3423 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3424 -  garray - global index of B columns
3425 
3426    Output Parameter:
3427 .   mat - the matrix, with input A as its local diagonal matrix
3428    Level: advanced
3429 
3430    Notes:
3431        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3432        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3433 
3434 .seealso: `MatCreateMPIAIJWithSplitArrays()`
3435 @*/
3436 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3437 {
3438   Mat_MPIAIJ        *maij;
3439   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3440   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3441   const PetscScalar *oa;
3442   Mat               Bnew;
3443   PetscInt          m,n,N;
3444   MatType           mpi_mat_type;
3445 
3446   PetscFunctionBegin;
3447   PetscCall(MatCreate(comm,mat));
3448   PetscCall(MatGetSize(A,&m,&n));
3449   PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N);
3450   PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs);
3451   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3452   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3453 
3454   /* Get global columns of mat */
3455   PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm));
3456 
3457   PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N));
3458   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3459   PetscCall(MatGetMPIMatType_Private(A,&mpi_mat_type));
3460   PetscCall(MatSetType(*mat,mpi_mat_type));
3461 
3462   PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs));
3463   maij = (Mat_MPIAIJ*)(*mat)->data;
3464 
3465   (*mat)->preallocated = PETSC_TRUE;
3466 
3467   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3468   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3469 
3470   /* Set A as diagonal portion of *mat */
3471   maij->A = A;
3472 
3473   nz = oi[m];
3474   for (i=0; i<nz; i++) {
3475     col   = oj[i];
3476     oj[i] = garray[col];
3477   }
3478 
3479   /* Set Bnew as off-diagonal portion of *mat */
3480   PetscCall(MatSeqAIJGetArrayRead(B,&oa));
3481   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew));
3482   PetscCall(MatSeqAIJRestoreArrayRead(B,&oa));
3483   bnew        = (Mat_SeqAIJ*)Bnew->data;
3484   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3485   maij->B     = Bnew;
3486 
3487   PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N);
3488 
3489   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3490   b->free_a       = PETSC_FALSE;
3491   b->free_ij      = PETSC_FALSE;
3492   PetscCall(MatDestroy(&B));
3493 
3494   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3495   bnew->free_a       = PETSC_TRUE;
3496   bnew->free_ij      = PETSC_TRUE;
3497 
3498   /* condense columns of maij->B */
3499   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
3500   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
3501   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
3502   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
3503   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3504   PetscFunctionReturn(0);
3505 }
3506 
3507 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3508 
3509 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3510 {
3511   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3512   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3513   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3514   Mat            M,Msub,B=a->B;
3515   MatScalar      *aa;
3516   Mat_SeqAIJ     *aij;
3517   PetscInt       *garray = a->garray,*colsub,Ncols;
3518   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3519   IS             iscol_sub,iscmap;
3520   const PetscInt *is_idx,*cmap;
3521   PetscBool      allcolumns=PETSC_FALSE;
3522   MPI_Comm       comm;
3523 
3524   PetscFunctionBegin;
3525   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3526   if (call == MAT_REUSE_MATRIX) {
3527     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3528     PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3529     PetscCall(ISGetLocalSize(iscol_sub,&count));
3530 
3531     PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap));
3532     PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3533 
3534     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub));
3535     PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3536 
3537     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub));
3538 
3539   } else { /* call == MAT_INITIAL_MATRIX) */
3540     PetscBool flg;
3541 
3542     PetscCall(ISGetLocalSize(iscol,&n));
3543     PetscCall(ISGetSize(iscol,&Ncols));
3544 
3545     /* (1) iscol -> nonscalable iscol_local */
3546     /* Check for special case: each processor gets entire matrix columns */
3547     PetscCall(ISIdentity(iscol_local,&flg));
3548     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3549     PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3550     if (allcolumns) {
3551       iscol_sub = iscol_local;
3552       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3553       PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap));
3554 
3555     } else {
3556       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3557       PetscInt *idx,*cmap1,k;
3558       PetscCall(PetscMalloc1(Ncols,&idx));
3559       PetscCall(PetscMalloc1(Ncols,&cmap1));
3560       PetscCall(ISGetIndices(iscol_local,&is_idx));
3561       count = 0;
3562       k     = 0;
3563       for (i=0; i<Ncols; i++) {
3564         j = is_idx[i];
3565         if (j >= cstart && j < cend) {
3566           /* diagonal part of mat */
3567           idx[count]     = j;
3568           cmap1[count++] = i; /* column index in submat */
3569         } else if (Bn) {
3570           /* off-diagonal part of mat */
3571           if (j == garray[k]) {
3572             idx[count]     = j;
3573             cmap1[count++] = i;  /* column index in submat */
3574           } else if (j > garray[k]) {
3575             while (j > garray[k] && k < Bn-1) k++;
3576             if (j == garray[k]) {
3577               idx[count]     = j;
3578               cmap1[count++] = i; /* column index in submat */
3579             }
3580           }
3581         }
3582       }
3583       PetscCall(ISRestoreIndices(iscol_local,&is_idx));
3584 
3585       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub));
3586       PetscCall(ISGetBlockSize(iscol,&cbs));
3587       PetscCall(ISSetBlockSize(iscol_sub,cbs));
3588 
3589       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap));
3590     }
3591 
3592     /* (3) Create sequential Msub */
3593     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub));
3594   }
3595 
3596   PetscCall(ISGetLocalSize(iscol_sub,&count));
3597   aij  = (Mat_SeqAIJ*)(Msub)->data;
3598   ii   = aij->i;
3599   PetscCall(ISGetIndices(iscmap,&cmap));
3600 
3601   /*
3602       m - number of local rows
3603       Ncols - number of columns (same on all processors)
3604       rstart - first row in new global matrix generated
3605   */
3606   PetscCall(MatGetSize(Msub,&m,NULL));
3607 
3608   if (call == MAT_INITIAL_MATRIX) {
3609     /* (4) Create parallel newmat */
3610     PetscMPIInt    rank,size;
3611     PetscInt       csize;
3612 
3613     PetscCallMPI(MPI_Comm_size(comm,&size));
3614     PetscCallMPI(MPI_Comm_rank(comm,&rank));
3615 
3616     /*
3617         Determine the number of non-zeros in the diagonal and off-diagonal
3618         portions of the matrix in order to do correct preallocation
3619     */
3620 
3621     /* first get start and end of "diagonal" columns */
3622     PetscCall(ISGetLocalSize(iscol,&csize));
3623     if (csize == PETSC_DECIDE) {
3624       PetscCall(ISGetSize(isrow,&mglobal));
3625       if (mglobal == Ncols) { /* square matrix */
3626         nlocal = m;
3627       } else {
3628         nlocal = Ncols/size + ((Ncols % size) > rank);
3629       }
3630     } else {
3631       nlocal = csize;
3632     }
3633     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3634     rstart = rend - nlocal;
3635     PetscCheck(rank != size - 1 || rend == Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols);
3636 
3637     /* next, compute all the lengths */
3638     jj    = aij->j;
3639     PetscCall(PetscMalloc1(2*m+1,&dlens));
3640     olens = dlens + m;
3641     for (i=0; i<m; i++) {
3642       jend = ii[i+1] - ii[i];
3643       olen = 0;
3644       dlen = 0;
3645       for (j=0; j<jend; j++) {
3646         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3647         else dlen++;
3648         jj++;
3649       }
3650       olens[i] = olen;
3651       dlens[i] = dlen;
3652     }
3653 
3654     PetscCall(ISGetBlockSize(isrow,&bs));
3655     PetscCall(ISGetBlockSize(iscol,&cbs));
3656 
3657     PetscCall(MatCreate(comm,&M));
3658     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols));
3659     PetscCall(MatSetBlockSizes(M,bs,cbs));
3660     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3661     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3662     PetscCall(PetscFree(dlens));
3663 
3664   } else { /* call == MAT_REUSE_MATRIX */
3665     M    = *newmat;
3666     PetscCall(MatGetLocalSize(M,&i,NULL));
3667     PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3668     PetscCall(MatZeroEntries(M));
3669     /*
3670          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3671        rather than the slower MatSetValues().
3672     */
3673     M->was_assembled = PETSC_TRUE;
3674     M->assembled     = PETSC_FALSE;
3675   }
3676 
3677   /* (5) Set values of Msub to *newmat */
3678   PetscCall(PetscMalloc1(count,&colsub));
3679   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
3680 
3681   jj   = aij->j;
3682   PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa));
3683   for (i=0; i<m; i++) {
3684     row = rstart + i;
3685     nz  = ii[i+1] - ii[i];
3686     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3687     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES));
3688     jj += nz; aa += nz;
3689   }
3690   PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa));
3691   PetscCall(ISRestoreIndices(iscmap,&cmap));
3692 
3693   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3694   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3695 
3696   PetscCall(PetscFree(colsub));
3697 
3698   /* save Msub, iscol_sub and iscmap used in processor for next request */
3699   if (call == MAT_INITIAL_MATRIX) {
3700     *newmat = M;
3701     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub));
3702     PetscCall(MatDestroy(&Msub));
3703 
3704     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub));
3705     PetscCall(ISDestroy(&iscol_sub));
3706 
3707     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap));
3708     PetscCall(ISDestroy(&iscmap));
3709 
3710     if (iscol_local) {
3711       PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local));
3712       PetscCall(ISDestroy(&iscol_local));
3713     }
3714   }
3715   PetscFunctionReturn(0);
3716 }
3717 
3718 /*
3719     Not great since it makes two copies of the submatrix, first an SeqAIJ
3720   in local and then by concatenating the local matrices the end result.
3721   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3722 
3723   Note: This requires a sequential iscol with all indices.
3724 */
3725 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3726 {
3727   PetscMPIInt    rank,size;
3728   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3729   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3730   Mat            M,Mreuse;
3731   MatScalar      *aa,*vwork;
3732   MPI_Comm       comm;
3733   Mat_SeqAIJ     *aij;
3734   PetscBool      colflag,allcolumns=PETSC_FALSE;
3735 
3736   PetscFunctionBegin;
3737   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3738   PetscCallMPI(MPI_Comm_rank(comm,&rank));
3739   PetscCallMPI(MPI_Comm_size(comm,&size));
3740 
3741   /* Check for special case: each processor gets entire matrix columns */
3742   PetscCall(ISIdentity(iscol,&colflag));
3743   PetscCall(ISGetLocalSize(iscol,&n));
3744   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3745   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3746 
3747   if (call ==  MAT_REUSE_MATRIX) {
3748     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse));
3749     PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3750     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse));
3751   } else {
3752     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse));
3753   }
3754 
3755   /*
3756       m - number of local rows
3757       n - number of columns (same on all processors)
3758       rstart - first row in new global matrix generated
3759   */
3760   PetscCall(MatGetSize(Mreuse,&m,&n));
3761   PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs));
3762   if (call == MAT_INITIAL_MATRIX) {
3763     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3764     ii  = aij->i;
3765     jj  = aij->j;
3766 
3767     /*
3768         Determine the number of non-zeros in the diagonal and off-diagonal
3769         portions of the matrix in order to do correct preallocation
3770     */
3771 
3772     /* first get start and end of "diagonal" columns */
3773     if (csize == PETSC_DECIDE) {
3774       PetscCall(ISGetSize(isrow,&mglobal));
3775       if (mglobal == n) { /* square matrix */
3776         nlocal = m;
3777       } else {
3778         nlocal = n/size + ((n % size) > rank);
3779       }
3780     } else {
3781       nlocal = csize;
3782     }
3783     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3784     rstart = rend - nlocal;
3785     PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
3786 
3787     /* next, compute all the lengths */
3788     PetscCall(PetscMalloc1(2*m+1,&dlens));
3789     olens = dlens + m;
3790     for (i=0; i<m; i++) {
3791       jend = ii[i+1] - ii[i];
3792       olen = 0;
3793       dlen = 0;
3794       for (j=0; j<jend; j++) {
3795         if (*jj < rstart || *jj >= rend) olen++;
3796         else dlen++;
3797         jj++;
3798       }
3799       olens[i] = olen;
3800       dlens[i] = dlen;
3801     }
3802     PetscCall(MatCreate(comm,&M));
3803     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n));
3804     PetscCall(MatSetBlockSizes(M,bs,cbs));
3805     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3806     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3807     PetscCall(PetscFree(dlens));
3808   } else {
3809     PetscInt ml,nl;
3810 
3811     M    = *newmat;
3812     PetscCall(MatGetLocalSize(M,&ml,&nl));
3813     PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3814     PetscCall(MatZeroEntries(M));
3815     /*
3816          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3817        rather than the slower MatSetValues().
3818     */
3819     M->was_assembled = PETSC_TRUE;
3820     M->assembled     = PETSC_FALSE;
3821   }
3822   PetscCall(MatGetOwnershipRange(M,&rstart,&rend));
3823   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3824   ii   = aij->i;
3825   jj   = aij->j;
3826 
3827   /* trigger copy to CPU if needed */
3828   PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa));
3829   for (i=0; i<m; i++) {
3830     row   = rstart + i;
3831     nz    = ii[i+1] - ii[i];
3832     cwork = jj; jj += nz;
3833     vwork = aa; aa += nz;
3834     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES));
3835   }
3836   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa));
3837 
3838   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3839   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3840   *newmat = M;
3841 
3842   /* save submatrix used in processor for next request */
3843   if (call ==  MAT_INITIAL_MATRIX) {
3844     PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse));
3845     PetscCall(MatDestroy(&Mreuse));
3846   }
3847   PetscFunctionReturn(0);
3848 }
3849 
3850 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3851 {
3852   PetscInt       m,cstart, cend,j,nnz,i,d,*ld;
3853   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3854   const PetscInt *JJ;
3855   PetscBool      nooffprocentries;
3856   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)B->data;
3857 
3858   PetscFunctionBegin;
3859   PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]);
3860 
3861   PetscCall(PetscLayoutSetUp(B->rmap));
3862   PetscCall(PetscLayoutSetUp(B->cmap));
3863   m      = B->rmap->n;
3864   cstart = B->cmap->rstart;
3865   cend   = B->cmap->rend;
3866   rstart = B->rmap->rstart;
3867 
3868   PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz));
3869 
3870   if (PetscDefined(USE_DEBUG)) {
3871     for (i=0; i<m; i++) {
3872       nnz = Ii[i+1]- Ii[i];
3873       JJ  = J + Ii[i];
3874       PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz);
3875       PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]);
3876       PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N);
3877     }
3878   }
3879 
3880   for (i=0; i<m; i++) {
3881     nnz     = Ii[i+1]- Ii[i];
3882     JJ      = J + Ii[i];
3883     nnz_max = PetscMax(nnz_max,nnz);
3884     d       = 0;
3885     for (j=0; j<nnz; j++) {
3886       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3887     }
3888     d_nnz[i] = d;
3889     o_nnz[i] = nnz - d;
3890   }
3891   PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
3892   PetscCall(PetscFree2(d_nnz,o_nnz));
3893 
3894   for (i=0; i<m; i++) {
3895     ii   = i + rstart;
3896     PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES));
3897   }
3898   nooffprocentries    = B->nooffprocentries;
3899   B->nooffprocentries = PETSC_TRUE;
3900   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
3901   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3902   B->nooffprocentries = nooffprocentries;
3903 
3904   /* count number of entries below block diagonal */
3905   PetscCall(PetscFree(Aij->ld));
3906   PetscCall(PetscCalloc1(m,&ld));
3907   Aij->ld = ld;
3908   for (i=0; i<m; i++) {
3909     nnz  = Ii[i+1] - Ii[i];
3910     j     = 0;
3911     while  (j < nnz && J[j] < cstart) {j++;}
3912     ld[i] = j;
3913     J     += nnz;
3914   }
3915 
3916   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3917   PetscFunctionReturn(0);
3918 }
3919 
3920 /*@
3921    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3922    (the default parallel PETSc format).
3923 
3924    Collective
3925 
3926    Input Parameters:
3927 +  B - the matrix
3928 .  i - the indices into j for the start of each local row (starts with zero)
3929 .  j - the column indices for each local row (starts with zero)
3930 -  v - optional values in the matrix
3931 
3932    Level: developer
3933 
3934    Notes:
3935        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3936      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3937      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3938 
3939        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3940 
3941        The format which is used for the sparse matrix input, is equivalent to a
3942     row-major ordering.. i.e for the following matrix, the input data expected is
3943     as shown
3944 
3945 $        1 0 0
3946 $        2 0 3     P0
3947 $       -------
3948 $        4 5 6     P1
3949 $
3950 $     Process0 [P0]: rows_owned=[0,1]
3951 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3952 $        j =  {0,0,2}  [size = 3]
3953 $        v =  {1,2,3}  [size = 3]
3954 $
3955 $     Process1 [P1]: rows_owned=[2]
3956 $        i =  {0,3}    [size = nrow+1  = 1+1]
3957 $        j =  {0,1,2}  [size = 3]
3958 $        v =  {4,5,6}  [size = 3]
3959 
3960 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
3961           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
3962 @*/
3963 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3964 {
3965   PetscFunctionBegin;
3966   PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3967   PetscFunctionReturn(0);
3968 }
3969 
3970 /*@C
3971    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3972    (the default parallel PETSc format).  For good matrix assembly performance
3973    the user should preallocate the matrix storage by setting the parameters
3974    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3975    performance can be increased by more than a factor of 50.
3976 
3977    Collective
3978 
3979    Input Parameters:
3980 +  B - the matrix
3981 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3982            (same value is used for all local rows)
3983 .  d_nnz - array containing the number of nonzeros in the various rows of the
3984            DIAGONAL portion of the local submatrix (possibly different for each row)
3985            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3986            The size of this array is equal to the number of local rows, i.e 'm'.
3987            For matrices that will be factored, you must leave room for (and set)
3988            the diagonal entry even if it is zero.
3989 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3990            submatrix (same value is used for all local rows).
3991 -  o_nnz - array containing the number of nonzeros in the various rows of the
3992            OFF-DIAGONAL portion of the local submatrix (possibly different for
3993            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3994            structure. The size of this array is equal to the number
3995            of local rows, i.e 'm'.
3996 
3997    If the *_nnz parameter is given then the *_nz parameter is ignored
3998 
3999    The AIJ format (also called the Yale sparse matrix format or
4000    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4001    storage.  The stored row and column indices begin with zero.
4002    See Users-Manual: ch_mat for details.
4003 
4004    The parallel matrix is partitioned such that the first m0 rows belong to
4005    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4006    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4007 
4008    The DIAGONAL portion of the local submatrix of a processor can be defined
4009    as the submatrix which is obtained by extraction the part corresponding to
4010    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4011    first row that belongs to the processor, r2 is the last row belonging to
4012    the this processor, and c1-c2 is range of indices of the local part of a
4013    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4014    common case of a square matrix, the row and column ranges are the same and
4015    the DIAGONAL part is also square. The remaining portion of the local
4016    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4017 
4018    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4019 
4020    You can call MatGetInfo() to get information on how effective the preallocation was;
4021    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4022    You can also run with the option -info and look for messages with the string
4023    malloc in them to see if additional memory allocation was needed.
4024 
4025    Example usage:
4026 
4027    Consider the following 8x8 matrix with 34 non-zero values, that is
4028    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4029    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4030    as follows:
4031 
4032 .vb
4033             1  2  0  |  0  3  0  |  0  4
4034     Proc0   0  5  6  |  7  0  0  |  8  0
4035             9  0 10  | 11  0  0  | 12  0
4036     -------------------------------------
4037            13  0 14  | 15 16 17  |  0  0
4038     Proc1   0 18  0  | 19 20 21  |  0  0
4039             0  0  0  | 22 23  0  | 24  0
4040     -------------------------------------
4041     Proc2  25 26 27  |  0  0 28  | 29  0
4042            30  0  0  | 31 32 33  |  0 34
4043 .ve
4044 
4045    This can be represented as a collection of submatrices as:
4046 
4047 .vb
4048       A B C
4049       D E F
4050       G H I
4051 .ve
4052 
4053    Where the submatrices A,B,C are owned by proc0, D,E,F are
4054    owned by proc1, G,H,I are owned by proc2.
4055 
4056    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4057    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4058    The 'M','N' parameters are 8,8, and have the same values on all procs.
4059 
4060    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4061    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4062    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4063    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4064    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4065    matrix, ans [DF] as another SeqAIJ matrix.
4066 
4067    When d_nz, o_nz parameters are specified, d_nz storage elements are
4068    allocated for every row of the local diagonal submatrix, and o_nz
4069    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4070    One way to choose d_nz and o_nz is to use the max nonzerors per local
4071    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4072    In this case, the values of d_nz,o_nz are:
4073 .vb
4074      proc0 : dnz = 2, o_nz = 2
4075      proc1 : dnz = 3, o_nz = 2
4076      proc2 : dnz = 1, o_nz = 4
4077 .ve
4078    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4079    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4080    for proc3. i.e we are using 12+15+10=37 storage locations to store
4081    34 values.
4082 
4083    When d_nnz, o_nnz parameters are specified, the storage is specified
4084    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4085    In the above case the values for d_nnz,o_nnz are:
4086 .vb
4087      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4088      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4089      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4090 .ve
4091    Here the space allocated is sum of all the above values i.e 34, and
4092    hence pre-allocation is perfect.
4093 
4094    Level: intermediate
4095 
4096 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4097           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4098 @*/
4099 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4100 {
4101   PetscFunctionBegin;
4102   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4103   PetscValidType(B,1);
4104   PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
4105   PetscFunctionReturn(0);
4106 }
4107 
4108 /*@
4109      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4110          CSR format for the local rows.
4111 
4112    Collective
4113 
4114    Input Parameters:
4115 +  comm - MPI communicator
4116 .  m - number of local rows (Cannot be PETSC_DECIDE)
4117 .  n - This value should be the same as the local size used in creating the
4118        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4119        calculated if N is given) For square matrices n is almost always m.
4120 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4121 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4122 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4123 .   j - column indices
4124 -   a - optional matrix values
4125 
4126    Output Parameter:
4127 .   mat - the matrix
4128 
4129    Level: intermediate
4130 
4131    Notes:
4132        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4133      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4134      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4135 
4136        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4137 
4138        The format which is used for the sparse matrix input, is equivalent to a
4139     row-major ordering.. i.e for the following matrix, the input data expected is
4140     as shown
4141 
4142        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4143 
4144 $        1 0 0
4145 $        2 0 3     P0
4146 $       -------
4147 $        4 5 6     P1
4148 $
4149 $     Process0 [P0]: rows_owned=[0,1]
4150 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4151 $        j =  {0,0,2}  [size = 3]
4152 $        v =  {1,2,3}  [size = 3]
4153 $
4154 $     Process1 [P1]: rows_owned=[2]
4155 $        i =  {0,3}    [size = nrow+1  = 1+1]
4156 $        j =  {0,1,2}  [size = 3]
4157 $        v =  {4,5,6}  [size = 3]
4158 
4159 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4160           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4161 @*/
4162 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4163 {
4164   PetscFunctionBegin;
4165   PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4166   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4167   PetscCall(MatCreate(comm,mat));
4168   PetscCall(MatSetSizes(*mat,m,n,M,N));
4169   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4170   PetscCall(MatSetType(*mat,MATMPIAIJ));
4171   PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a));
4172   PetscFunctionReturn(0);
4173 }
4174 
4175 /*@
4176      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4177          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from MatCreateMPIAIJWithArrays()
4178 
4179      Deprecated: Use `MatUpdateMPIAIJWithArray()`
4180 
4181    Collective
4182 
4183    Input Parameters:
4184 +  mat - the matrix
4185 .  m - number of local rows (Cannot be PETSC_DECIDE)
4186 .  n - This value should be the same as the local size used in creating the
4187        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4188        calculated if N is given) For square matrices n is almost always m.
4189 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4190 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4191 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4192 .  J - column indices
4193 -  v - matrix values
4194 
4195    Level: intermediate
4196 
4197 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4198           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()`
4199 @*/
4200 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4201 {
4202   PetscInt       nnz,i;
4203   PetscBool      nooffprocentries;
4204   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4205   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4206   PetscScalar    *ad,*ao;
4207   PetscInt       ldi,Iii,md;
4208   const PetscInt *Adi = Ad->i;
4209   PetscInt       *ld = Aij->ld;
4210 
4211   PetscFunctionBegin;
4212   PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4213   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4214   PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4215   PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4216 
4217   PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad));
4218   PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao));
4219 
4220   for (i=0; i<m; i++) {
4221     nnz  = Ii[i+1]- Ii[i];
4222     Iii  = Ii[i];
4223     ldi  = ld[i];
4224     md   = Adi[i+1]-Adi[i];
4225     PetscCall(PetscArraycpy(ao,v + Iii,ldi));
4226     PetscCall(PetscArraycpy(ad,v + Iii + ldi,md));
4227     PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md));
4228     ad  += md;
4229     ao  += nnz - md;
4230   }
4231   nooffprocentries      = mat->nooffprocentries;
4232   mat->nooffprocentries = PETSC_TRUE;
4233   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad));
4234   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao));
4235   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4236   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4237   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4238   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
4239   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
4240   mat->nooffprocentries = nooffprocentries;
4241   PetscFunctionReturn(0);
4242 }
4243 
4244 /*@
4245      MatUpdateMPIAIJWithArray - updates an MPI AIJ matrix using an array that contains the nonzero values
4246 
4247    Collective
4248 
4249    Input Parameters:
4250 +  mat - the matrix
4251 -  v - matrix values, stored by row
4252 
4253    Level: intermediate
4254 
4255    Notes:
4256    The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4257 
4258 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4259           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()`
4260 @*/
4261 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat,const PetscScalar v[])
4262 {
4263   PetscInt       nnz,i,m;
4264   PetscBool      nooffprocentries;
4265   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4266   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4267   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4268   PetscScalar    *ad,*ao;
4269   const PetscInt *Adi = Ad->i,*Adj = Ao->i;
4270   PetscInt       ldi,Iii,md;
4271   PetscInt       *ld = Aij->ld;
4272 
4273   PetscFunctionBegin;
4274   m = mat->rmap->n;
4275 
4276   PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad));
4277   PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao));
4278   Iii = 0;
4279   for (i=0; i<m; i++) {
4280     nnz  = Adi[i+1]-Adi[i] + Adj[i+1]-Adj[i];
4281     ldi  = ld[i];
4282     md   = Adi[i+1]-Adi[i];
4283     PetscCall(PetscArraycpy(ao,v + Iii,ldi));
4284     PetscCall(PetscArraycpy(ad,v + Iii + ldi,md));
4285     PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md));
4286     ad  += md;
4287     ao  += nnz - md;
4288     Iii += nnz;
4289   }
4290   nooffprocentries      = mat->nooffprocentries;
4291   mat->nooffprocentries = PETSC_TRUE;
4292   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad));
4293   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao));
4294   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4295   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4296   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4297   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
4298   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
4299   mat->nooffprocentries = nooffprocentries;
4300   PetscFunctionReturn(0);
4301 }
4302 
4303 /*@C
4304    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4305    (the default parallel PETSc format).  For good matrix assembly performance
4306    the user should preallocate the matrix storage by setting the parameters
4307    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4308    performance can be increased by more than a factor of 50.
4309 
4310    Collective
4311 
4312    Input Parameters:
4313 +  comm - MPI communicator
4314 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4315            This value should be the same as the local size used in creating the
4316            y vector for the matrix-vector product y = Ax.
4317 .  n - This value should be the same as the local size used in creating the
4318        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4319        calculated if N is given) For square matrices n is almost always m.
4320 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4321 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4322 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4323            (same value is used for all local rows)
4324 .  d_nnz - array containing the number of nonzeros in the various rows of the
4325            DIAGONAL portion of the local submatrix (possibly different for each row)
4326            or NULL, if d_nz is used to specify the nonzero structure.
4327            The size of this array is equal to the number of local rows, i.e 'm'.
4328 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4329            submatrix (same value is used for all local rows).
4330 -  o_nnz - array containing the number of nonzeros in the various rows of the
4331            OFF-DIAGONAL portion of the local submatrix (possibly different for
4332            each row) or NULL, if o_nz is used to specify the nonzero
4333            structure. The size of this array is equal to the number
4334            of local rows, i.e 'm'.
4335 
4336    Output Parameter:
4337 .  A - the matrix
4338 
4339    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4340    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4341    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4342 
4343    Notes:
4344    If the *_nnz parameter is given then the *_nz parameter is ignored
4345 
4346    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4347    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4348    storage requirements for this matrix.
4349 
4350    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4351    processor than it must be used on all processors that share the object for
4352    that argument.
4353 
4354    The user MUST specify either the local or global matrix dimensions
4355    (possibly both).
4356 
4357    The parallel matrix is partitioned across processors such that the
4358    first m0 rows belong to process 0, the next m1 rows belong to
4359    process 1, the next m2 rows belong to process 2 etc.. where
4360    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4361    values corresponding to [m x N] submatrix.
4362 
4363    The columns are logically partitioned with the n0 columns belonging
4364    to 0th partition, the next n1 columns belonging to the next
4365    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4366 
4367    The DIAGONAL portion of the local submatrix on any given processor
4368    is the submatrix corresponding to the rows and columns m,n
4369    corresponding to the given processor. i.e diagonal matrix on
4370    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4371    etc. The remaining portion of the local submatrix [m x (N-n)]
4372    constitute the OFF-DIAGONAL portion. The example below better
4373    illustrates this concept.
4374 
4375    For a square global matrix we define each processor's diagonal portion
4376    to be its local rows and the corresponding columns (a square submatrix);
4377    each processor's off-diagonal portion encompasses the remainder of the
4378    local matrix (a rectangular submatrix).
4379 
4380    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4381 
4382    When calling this routine with a single process communicator, a matrix of
4383    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4384    type of communicator, use the construction mechanism
4385 .vb
4386      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4387 .ve
4388 
4389 $     MatCreate(...,&A);
4390 $     MatSetType(A,MATMPIAIJ);
4391 $     MatSetSizes(A, m,n,M,N);
4392 $     MatMPIAIJSetPreallocation(A,...);
4393 
4394    By default, this format uses inodes (identical nodes) when possible.
4395    We search for consecutive rows with the same nonzero structure, thereby
4396    reusing matrix information to achieve increased efficiency.
4397 
4398    Options Database Keys:
4399 +  -mat_no_inode  - Do not use inodes
4400 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4401 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
4402         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4403         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.
4404 
4405    Example usage:
4406 
4407    Consider the following 8x8 matrix with 34 non-zero values, that is
4408    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4409    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4410    as follows
4411 
4412 .vb
4413             1  2  0  |  0  3  0  |  0  4
4414     Proc0   0  5  6  |  7  0  0  |  8  0
4415             9  0 10  | 11  0  0  | 12  0
4416     -------------------------------------
4417            13  0 14  | 15 16 17  |  0  0
4418     Proc1   0 18  0  | 19 20 21  |  0  0
4419             0  0  0  | 22 23  0  | 24  0
4420     -------------------------------------
4421     Proc2  25 26 27  |  0  0 28  | 29  0
4422            30  0  0  | 31 32 33  |  0 34
4423 .ve
4424 
4425    This can be represented as a collection of submatrices as
4426 
4427 .vb
4428       A B C
4429       D E F
4430       G H I
4431 .ve
4432 
4433    Where the submatrices A,B,C are owned by proc0, D,E,F are
4434    owned by proc1, G,H,I are owned by proc2.
4435 
4436    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4437    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4438    The 'M','N' parameters are 8,8, and have the same values on all procs.
4439 
4440    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4441    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4442    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4443    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4444    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4445    matrix, ans [DF] as another SeqAIJ matrix.
4446 
4447    When d_nz, o_nz parameters are specified, d_nz storage elements are
4448    allocated for every row of the local diagonal submatrix, and o_nz
4449    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4450    One way to choose d_nz and o_nz is to use the max nonzerors per local
4451    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4452    In this case, the values of d_nz,o_nz are
4453 .vb
4454      proc0 : dnz = 2, o_nz = 2
4455      proc1 : dnz = 3, o_nz = 2
4456      proc2 : dnz = 1, o_nz = 4
4457 .ve
4458    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4459    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4460    for proc3. i.e we are using 12+15+10=37 storage locations to store
4461    34 values.
4462 
4463    When d_nnz, o_nnz parameters are specified, the storage is specified
4464    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4465    In the above case the values for d_nnz,o_nnz are
4466 .vb
4467      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4468      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4469      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4470 .ve
4471    Here the space allocated is sum of all the above values i.e 34, and
4472    hence pre-allocation is perfect.
4473 
4474    Level: intermediate
4475 
4476 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4477           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4478 @*/
4479 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4480 {
4481   PetscMPIInt    size;
4482 
4483   PetscFunctionBegin;
4484   PetscCall(MatCreate(comm,A));
4485   PetscCall(MatSetSizes(*A,m,n,M,N));
4486   PetscCallMPI(MPI_Comm_size(comm,&size));
4487   if (size > 1) {
4488     PetscCall(MatSetType(*A,MATMPIAIJ));
4489     PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz));
4490   } else {
4491     PetscCall(MatSetType(*A,MATSEQAIJ));
4492     PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz));
4493   }
4494   PetscFunctionReturn(0);
4495 }
4496 
4497 /*@C
4498   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4499 
4500   Not collective
4501 
4502   Input Parameter:
4503 . A - The MPIAIJ matrix
4504 
4505   Output Parameters:
4506 + Ad - The local diagonal block as a SeqAIJ matrix
4507 . Ao - The local off-diagonal block as a SeqAIJ matrix
4508 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4509 
4510   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4511   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4512   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4513   local column numbers to global column numbers in the original matrix.
4514 
4515   Level: intermediate
4516 
4517 .seealso: `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4518 @*/
4519 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4520 {
4521   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4522   PetscBool      flg;
4523 
4524   PetscFunctionBegin;
4525   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg));
4526   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4527   if (Ad)     *Ad     = a->A;
4528   if (Ao)     *Ao     = a->B;
4529   if (colmap) *colmap = a->garray;
4530   PetscFunctionReturn(0);
4531 }
4532 
4533 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4534 {
4535   PetscInt       m,N,i,rstart,nnz,Ii;
4536   PetscInt       *indx;
4537   PetscScalar    *values;
4538   MatType        rootType;
4539 
4540   PetscFunctionBegin;
4541   PetscCall(MatGetSize(inmat,&m,&N));
4542   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4543     PetscInt       *dnz,*onz,sum,bs,cbs;
4544 
4545     if (n == PETSC_DECIDE) {
4546       PetscCall(PetscSplitOwnership(comm,&n,&N));
4547     }
4548     /* Check sum(n) = N */
4549     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
4550     PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
4551 
4552     PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm));
4553     rstart -= m;
4554 
4555     MatPreallocateBegin(comm,m,n,dnz,onz);
4556     for (i=0; i<m; i++) {
4557       PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4558       PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz));
4559       PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4560     }
4561 
4562     PetscCall(MatCreate(comm,outmat));
4563     PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4564     PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
4565     PetscCall(MatSetBlockSizes(*outmat,bs,cbs));
4566     PetscCall(MatGetRootType_Private(inmat,&rootType));
4567     PetscCall(MatSetType(*outmat,rootType));
4568     PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz));
4569     PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz));
4570     MatPreallocateEnd(dnz,onz);
4571     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
4572   }
4573 
4574   /* numeric phase */
4575   PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL));
4576   for (i=0; i<m; i++) {
4577     PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4578     Ii   = i + rstart;
4579     PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
4580     PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4581   }
4582   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
4583   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
4584   PetscFunctionReturn(0);
4585 }
4586 
4587 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4588 {
4589   PetscMPIInt       rank;
4590   PetscInt          m,N,i,rstart,nnz;
4591   size_t            len;
4592   const PetscInt    *indx;
4593   PetscViewer       out;
4594   char              *name;
4595   Mat               B;
4596   const PetscScalar *values;
4597 
4598   PetscFunctionBegin;
4599   PetscCall(MatGetLocalSize(A,&m,NULL));
4600   PetscCall(MatGetSize(A,NULL,&N));
4601   /* Should this be the type of the diagonal block of A? */
4602   PetscCall(MatCreate(PETSC_COMM_SELF,&B));
4603   PetscCall(MatSetSizes(B,m,N,m,N));
4604   PetscCall(MatSetBlockSizesFromMats(B,A,A));
4605   PetscCall(MatSetType(B,MATSEQAIJ));
4606   PetscCall(MatSeqAIJSetPreallocation(B,0,NULL));
4607   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
4608   for (i=0; i<m; i++) {
4609     PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values));
4610     PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES));
4611     PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values));
4612   }
4613   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
4614   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
4615 
4616   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
4617   PetscCall(PetscStrlen(outfile,&len));
4618   PetscCall(PetscMalloc1(len+6,&name));
4619   PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank));
4620   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out));
4621   PetscCall(PetscFree(name));
4622   PetscCall(MatView(B,out));
4623   PetscCall(PetscViewerDestroy(&out));
4624   PetscCall(MatDestroy(&B));
4625   PetscFunctionReturn(0);
4626 }
4627 
4628 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4629 {
4630   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4631 
4632   PetscFunctionBegin;
4633   if (!merge) PetscFunctionReturn(0);
4634   PetscCall(PetscFree(merge->id_r));
4635   PetscCall(PetscFree(merge->len_s));
4636   PetscCall(PetscFree(merge->len_r));
4637   PetscCall(PetscFree(merge->bi));
4638   PetscCall(PetscFree(merge->bj));
4639   PetscCall(PetscFree(merge->buf_ri[0]));
4640   PetscCall(PetscFree(merge->buf_ri));
4641   PetscCall(PetscFree(merge->buf_rj[0]));
4642   PetscCall(PetscFree(merge->buf_rj));
4643   PetscCall(PetscFree(merge->coi));
4644   PetscCall(PetscFree(merge->coj));
4645   PetscCall(PetscFree(merge->owners_co));
4646   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4647   PetscCall(PetscFree(merge));
4648   PetscFunctionReturn(0);
4649 }
4650 
4651 #include <../src/mat/utils/freespace.h>
4652 #include <petscbt.h>
4653 
4654 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4655 {
4656   MPI_Comm            comm;
4657   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4658   PetscMPIInt         size,rank,taga,*len_s;
4659   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4660   PetscInt            proc,m;
4661   PetscInt            **buf_ri,**buf_rj;
4662   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4663   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4664   MPI_Request         *s_waits,*r_waits;
4665   MPI_Status          *status;
4666   const MatScalar     *aa,*a_a;
4667   MatScalar           **abuf_r,*ba_i;
4668   Mat_Merge_SeqsToMPI *merge;
4669   PetscContainer      container;
4670 
4671   PetscFunctionBegin;
4672   PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm));
4673   PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0));
4674 
4675   PetscCallMPI(MPI_Comm_size(comm,&size));
4676   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4677 
4678   PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container));
4679   PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4680   PetscCall(PetscContainerGetPointer(container,(void**)&merge));
4681   PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a));
4682   aa   = a_a;
4683 
4684   bi     = merge->bi;
4685   bj     = merge->bj;
4686   buf_ri = merge->buf_ri;
4687   buf_rj = merge->buf_rj;
4688 
4689   PetscCall(PetscMalloc1(size,&status));
4690   owners = merge->rowmap->range;
4691   len_s  = merge->len_s;
4692 
4693   /* send and recv matrix values */
4694   /*-----------------------------*/
4695   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga));
4696   PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits));
4697 
4698   PetscCall(PetscMalloc1(merge->nsend+1,&s_waits));
4699   for (proc=0,k=0; proc<size; proc++) {
4700     if (!len_s[proc]) continue;
4701     i    = owners[proc];
4702     PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k));
4703     k++;
4704   }
4705 
4706   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status));
4707   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status));
4708   PetscCall(PetscFree(status));
4709 
4710   PetscCall(PetscFree(s_waits));
4711   PetscCall(PetscFree(r_waits));
4712 
4713   /* insert mat values of mpimat */
4714   /*----------------------------*/
4715   PetscCall(PetscMalloc1(N,&ba_i));
4716   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4717 
4718   for (k=0; k<merge->nrecv; k++) {
4719     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4720     nrows       = *(buf_ri_k[k]);
4721     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4722     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4723   }
4724 
4725   /* set values of ba */
4726   m    = merge->rowmap->n;
4727   for (i=0; i<m; i++) {
4728     arow = owners[rank] + i;
4729     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4730     bnzi = bi[i+1] - bi[i];
4731     PetscCall(PetscArrayzero(ba_i,bnzi));
4732 
4733     /* add local non-zero vals of this proc's seqmat into ba */
4734     anzi   = ai[arow+1] - ai[arow];
4735     aj     = a->j + ai[arow];
4736     aa     = a_a + ai[arow];
4737     nextaj = 0;
4738     for (j=0; nextaj<anzi; j++) {
4739       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4740         ba_i[j] += aa[nextaj++];
4741       }
4742     }
4743 
4744     /* add received vals into ba */
4745     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4746       /* i-th row */
4747       if (i == *nextrow[k]) {
4748         anzi   = *(nextai[k]+1) - *nextai[k];
4749         aj     = buf_rj[k] + *(nextai[k]);
4750         aa     = abuf_r[k] + *(nextai[k]);
4751         nextaj = 0;
4752         for (j=0; nextaj<anzi; j++) {
4753           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4754             ba_i[j] += aa[nextaj++];
4755           }
4756         }
4757         nextrow[k]++; nextai[k]++;
4758       }
4759     }
4760     PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES));
4761   }
4762   PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a));
4763   PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY));
4764   PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY));
4765 
4766   PetscCall(PetscFree(abuf_r[0]));
4767   PetscCall(PetscFree(abuf_r));
4768   PetscCall(PetscFree(ba_i));
4769   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4770   PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0));
4771   PetscFunctionReturn(0);
4772 }
4773 
4774 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4775 {
4776   Mat                 B_mpi;
4777   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4778   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4779   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4780   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4781   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4782   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi;
4783   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4784   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4785   MPI_Status          *status;
4786   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4787   PetscBT             lnkbt;
4788   Mat_Merge_SeqsToMPI *merge;
4789   PetscContainer      container;
4790 
4791   PetscFunctionBegin;
4792   PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0));
4793 
4794   /* make sure it is a PETSc comm */
4795   PetscCall(PetscCommDuplicate(comm,&comm,NULL));
4796   PetscCallMPI(MPI_Comm_size(comm,&size));
4797   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4798 
4799   PetscCall(PetscNew(&merge));
4800   PetscCall(PetscMalloc1(size,&status));
4801 
4802   /* determine row ownership */
4803   /*---------------------------------------------------------*/
4804   PetscCall(PetscLayoutCreate(comm,&merge->rowmap));
4805   PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m));
4806   PetscCall(PetscLayoutSetSize(merge->rowmap,M));
4807   PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1));
4808   PetscCall(PetscLayoutSetUp(merge->rowmap));
4809   PetscCall(PetscMalloc1(size,&len_si));
4810   PetscCall(PetscMalloc1(size,&merge->len_s));
4811 
4812   m      = merge->rowmap->n;
4813   owners = merge->rowmap->range;
4814 
4815   /* determine the number of messages to send, their lengths */
4816   /*---------------------------------------------------------*/
4817   len_s = merge->len_s;
4818 
4819   len          = 0; /* length of buf_si[] */
4820   merge->nsend = 0;
4821   for (proc=0; proc<size; proc++) {
4822     len_si[proc] = 0;
4823     if (proc == rank) {
4824       len_s[proc] = 0;
4825     } else {
4826       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4827       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4828     }
4829     if (len_s[proc]) {
4830       merge->nsend++;
4831       nrows = 0;
4832       for (i=owners[proc]; i<owners[proc+1]; i++) {
4833         if (ai[i+1] > ai[i]) nrows++;
4834       }
4835       len_si[proc] = 2*(nrows+1);
4836       len         += len_si[proc];
4837     }
4838   }
4839 
4840   /* determine the number and length of messages to receive for ij-structure */
4841   /*-------------------------------------------------------------------------*/
4842   PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv));
4843   PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri));
4844 
4845   /* post the Irecv of j-structure */
4846   /*-------------------------------*/
4847   PetscCall(PetscCommGetNewTag(comm,&tagj));
4848   PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits));
4849 
4850   /* post the Isend of j-structure */
4851   /*--------------------------------*/
4852   PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits));
4853 
4854   for (proc=0, k=0; proc<size; proc++) {
4855     if (!len_s[proc]) continue;
4856     i    = owners[proc];
4857     PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k));
4858     k++;
4859   }
4860 
4861   /* receives and sends of j-structure are complete */
4862   /*------------------------------------------------*/
4863   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status));
4864   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status));
4865 
4866   /* send and recv i-structure */
4867   /*---------------------------*/
4868   PetscCall(PetscCommGetNewTag(comm,&tagi));
4869   PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits));
4870 
4871   PetscCall(PetscMalloc1(len+1,&buf_s));
4872   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4873   for (proc=0,k=0; proc<size; proc++) {
4874     if (!len_s[proc]) continue;
4875     /* form outgoing message for i-structure:
4876          buf_si[0]:                 nrows to be sent
4877                [1:nrows]:           row index (global)
4878                [nrows+1:2*nrows+1]: i-structure index
4879     */
4880     /*-------------------------------------------*/
4881     nrows       = len_si[proc]/2 - 1;
4882     buf_si_i    = buf_si + nrows+1;
4883     buf_si[0]   = nrows;
4884     buf_si_i[0] = 0;
4885     nrows       = 0;
4886     for (i=owners[proc]; i<owners[proc+1]; i++) {
4887       anzi = ai[i+1] - ai[i];
4888       if (anzi) {
4889         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4890         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4891         nrows++;
4892       }
4893     }
4894     PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k));
4895     k++;
4896     buf_si += len_si[proc];
4897   }
4898 
4899   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status));
4900   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status));
4901 
4902   PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv));
4903   for (i=0; i<merge->nrecv; i++) {
4904     PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));
4905   }
4906 
4907   PetscCall(PetscFree(len_si));
4908   PetscCall(PetscFree(len_ri));
4909   PetscCall(PetscFree(rj_waits));
4910   PetscCall(PetscFree2(si_waits,sj_waits));
4911   PetscCall(PetscFree(ri_waits));
4912   PetscCall(PetscFree(buf_s));
4913   PetscCall(PetscFree(status));
4914 
4915   /* compute a local seq matrix in each processor */
4916   /*----------------------------------------------*/
4917   /* allocate bi array and free space for accumulating nonzero column info */
4918   PetscCall(PetscMalloc1(m+1,&bi));
4919   bi[0] = 0;
4920 
4921   /* create and initialize a linked list */
4922   nlnk = N+1;
4923   PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt));
4924 
4925   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4926   len  = ai[owners[rank+1]] - ai[owners[rank]];
4927   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space));
4928 
4929   current_space = free_space;
4930 
4931   /* determine symbolic info for each local row */
4932   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4933 
4934   for (k=0; k<merge->nrecv; k++) {
4935     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4936     nrows       = *buf_ri_k[k];
4937     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4938     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4939   }
4940 
4941   MatPreallocateBegin(comm,m,n,dnz,onz);
4942   len  = 0;
4943   for (i=0; i<m; i++) {
4944     bnzi = 0;
4945     /* add local non-zero cols of this proc's seqmat into lnk */
4946     arow  = owners[rank] + i;
4947     anzi  = ai[arow+1] - ai[arow];
4948     aj    = a->j + ai[arow];
4949     PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4950     bnzi += nlnk;
4951     /* add received col data into lnk */
4952     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4953       if (i == *nextrow[k]) { /* i-th row */
4954         anzi  = *(nextai[k]+1) - *nextai[k];
4955         aj    = buf_rj[k] + *nextai[k];
4956         PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4957         bnzi += nlnk;
4958         nextrow[k]++; nextai[k]++;
4959       }
4960     }
4961     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4962 
4963     /* if free space is not available, make more free space */
4964     if (current_space->local_remaining<bnzi) {
4965       PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space));
4966     }
4967     /* copy data into free space, then initialize lnk */
4968     PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt));
4969     PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz));
4970 
4971     current_space->array           += bnzi;
4972     current_space->local_used      += bnzi;
4973     current_space->local_remaining -= bnzi;
4974 
4975     bi[i+1] = bi[i] + bnzi;
4976   }
4977 
4978   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4979 
4980   PetscCall(PetscMalloc1(bi[m]+1,&bj));
4981   PetscCall(PetscFreeSpaceContiguous(&free_space,bj));
4982   PetscCall(PetscLLDestroy(lnk,lnkbt));
4983 
4984   /* create symbolic parallel matrix B_mpi */
4985   /*---------------------------------------*/
4986   PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs));
4987   PetscCall(MatCreate(comm,&B_mpi));
4988   if (n==PETSC_DECIDE) {
4989     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N));
4990   } else {
4991     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4992   }
4993   PetscCall(MatSetBlockSizes(B_mpi,bs,cbs));
4994   PetscCall(MatSetType(B_mpi,MATMPIAIJ));
4995   PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz));
4996   MatPreallocateEnd(dnz,onz);
4997   PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE));
4998 
4999   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5000   B_mpi->assembled  = PETSC_FALSE;
5001   merge->bi         = bi;
5002   merge->bj         = bj;
5003   merge->buf_ri     = buf_ri;
5004   merge->buf_rj     = buf_rj;
5005   merge->coi        = NULL;
5006   merge->coj        = NULL;
5007   merge->owners_co  = NULL;
5008 
5009   PetscCall(PetscCommDestroy(&comm));
5010 
5011   /* attach the supporting struct to B_mpi for reuse */
5012   PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container));
5013   PetscCall(PetscContainerSetPointer(container,merge));
5014   PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI));
5015   PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container));
5016   PetscCall(PetscContainerDestroy(&container));
5017   *mpimat = B_mpi;
5018 
5019   PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0));
5020   PetscFunctionReturn(0);
5021 }
5022 
5023 /*@C
5024       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5025                  matrices from each processor
5026 
5027     Collective
5028 
5029    Input Parameters:
5030 +    comm - the communicators the parallel matrix will live on
5031 .    seqmat - the input sequential matrices
5032 .    m - number of local rows (or PETSC_DECIDE)
5033 .    n - number of local columns (or PETSC_DECIDE)
5034 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5035 
5036    Output Parameter:
5037 .    mpimat - the parallel matrix generated
5038 
5039     Level: advanced
5040 
5041    Notes:
5042      The dimensions of the sequential matrix in each processor MUST be the same.
5043      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5044      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5045 @*/
5046 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5047 {
5048   PetscMPIInt    size;
5049 
5050   PetscFunctionBegin;
5051   PetscCallMPI(MPI_Comm_size(comm,&size));
5052   if (size == 1) {
5053     PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
5054     if (scall == MAT_INITIAL_MATRIX) {
5055       PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat));
5056     } else {
5057       PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN));
5058     }
5059     PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
5060     PetscFunctionReturn(0);
5061   }
5062   PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
5063   if (scall == MAT_INITIAL_MATRIX) {
5064     PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat));
5065   }
5066   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat));
5067   PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
5068   PetscFunctionReturn(0);
5069 }
5070 
5071 /*@
5072      MatAIJGetLocalMat - Creates a SeqAIJ from a MATAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5073           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5074           with MatGetSize()
5075 
5076     Not Collective
5077 
5078    Input Parameters:
5079 +    A - the matrix
5080 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5081 
5082    Output Parameter:
5083 .    A_loc - the local sequential matrix generated
5084 
5085     Level: developer
5086 
5087    Notes:
5088      In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix.
5089 
5090      Destroy the matrix with MatDestroy()
5091 
5092 .seealso: MatMPIAIJGetLocalMat()
5093 
5094 @*/
5095 PetscErrorCode MatAIJGetLocalMat(Mat A,Mat *A_loc)
5096 {
5097   PetscBool      mpi;
5098 
5099   PetscFunctionBegin;
5100   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&mpi));
5101   if (mpi) {
5102     PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,A_loc));
5103   } else {
5104     *A_loc = A;
5105     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5106   }
5107   PetscFunctionReturn(0);
5108 }
5109 
5110 /*@
5111      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5112           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5113           with MatGetSize()
5114 
5115     Not Collective
5116 
5117    Input Parameters:
5118 +    A - the matrix
5119 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5120 
5121    Output Parameter:
5122 .    A_loc - the local sequential matrix generated
5123 
5124     Level: developer
5125 
5126    Notes:
5127      In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix.
5128 
5129      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5130      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5131      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5132      modify the values of the returned A_loc.
5133 
5134 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5135 @*/
5136 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5137 {
5138   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5139   Mat_SeqAIJ        *mat,*a,*b;
5140   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5141   const PetscScalar *aa,*ba,*aav,*bav;
5142   PetscScalar       *ca,*cam;
5143   PetscMPIInt       size;
5144   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5145   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5146   PetscBool         match;
5147 
5148   PetscFunctionBegin;
5149   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match));
5150   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5151   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5152   if (size == 1) {
5153     if (scall == MAT_INITIAL_MATRIX) {
5154       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5155       *A_loc = mpimat->A;
5156     } else if (scall == MAT_REUSE_MATRIX) {
5157       PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN));
5158     }
5159     PetscFunctionReturn(0);
5160   }
5161 
5162   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5163   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5164   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5165   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5166   PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav));
5167   PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav));
5168   aa   = aav;
5169   ba   = bav;
5170   if (scall == MAT_INITIAL_MATRIX) {
5171     PetscCall(PetscMalloc1(1+am,&ci));
5172     ci[0] = 0;
5173     for (i=0; i<am; i++) {
5174       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5175     }
5176     PetscCall(PetscMalloc1(1+ci[am],&cj));
5177     PetscCall(PetscMalloc1(1+ci[am],&ca));
5178     k    = 0;
5179     for (i=0; i<am; i++) {
5180       ncols_o = bi[i+1] - bi[i];
5181       ncols_d = ai[i+1] - ai[i];
5182       /* off-diagonal portion of A */
5183       for (jo=0; jo<ncols_o; jo++) {
5184         col = cmap[*bj];
5185         if (col >= cstart) break;
5186         cj[k]   = col; bj++;
5187         ca[k++] = *ba++;
5188       }
5189       /* diagonal portion of A */
5190       for (j=0; j<ncols_d; j++) {
5191         cj[k]   = cstart + *aj++;
5192         ca[k++] = *aa++;
5193       }
5194       /* off-diagonal portion of A */
5195       for (j=jo; j<ncols_o; j++) {
5196         cj[k]   = cmap[*bj++];
5197         ca[k++] = *ba++;
5198       }
5199     }
5200     /* put together the new matrix */
5201     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc));
5202     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5203     /* Since these are PETSc arrays, change flags to free them as necessary. */
5204     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5205     mat->free_a  = PETSC_TRUE;
5206     mat->free_ij = PETSC_TRUE;
5207     mat->nonew   = 0;
5208   } else if (scall == MAT_REUSE_MATRIX) {
5209     mat  =(Mat_SeqAIJ*)(*A_loc)->data;
5210     ci   = mat->i;
5211     cj   = mat->j;
5212     PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam));
5213     for (i=0; i<am; i++) {
5214       /* off-diagonal portion of A */
5215       ncols_o = bi[i+1] - bi[i];
5216       for (jo=0; jo<ncols_o; jo++) {
5217         col = cmap[*bj];
5218         if (col >= cstart) break;
5219         *cam++ = *ba++; bj++;
5220       }
5221       /* diagonal portion of A */
5222       ncols_d = ai[i+1] - ai[i];
5223       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5224       /* off-diagonal portion of A */
5225       for (j=jo; j<ncols_o; j++) {
5226         *cam++ = *ba++; bj++;
5227       }
5228     }
5229     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam));
5230   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5231   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav));
5232   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav));
5233   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5234   PetscFunctionReturn(0);
5235 }
5236 
5237 /*@
5238      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5239           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5240 
5241     Not Collective
5242 
5243    Input Parameters:
5244 +    A - the matrix
5245 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5246 
5247    Output Parameters:
5248 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5249 -    A_loc - the local sequential matrix generated
5250 
5251     Level: developer
5252 
5253    Notes:
5254      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5255 
5256 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5257 
5258 @*/
5259 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5260 {
5261   Mat            Ao,Ad;
5262   const PetscInt *cmap;
5263   PetscMPIInt    size;
5264   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5265 
5266   PetscFunctionBegin;
5267   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap));
5268   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5269   if (size == 1) {
5270     if (scall == MAT_INITIAL_MATRIX) {
5271       PetscCall(PetscObjectReference((PetscObject)Ad));
5272       *A_loc = Ad;
5273     } else if (scall == MAT_REUSE_MATRIX) {
5274       PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN));
5275     }
5276     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob));
5277     PetscFunctionReturn(0);
5278   }
5279   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f));
5280   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5281   if (f) {
5282     PetscCall((*f)(A,scall,glob,A_loc));
5283   } else {
5284     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5285     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5286     Mat_SeqAIJ        *c;
5287     PetscInt          *ai = a->i, *aj = a->j;
5288     PetscInt          *bi = b->i, *bj = b->j;
5289     PetscInt          *ci,*cj;
5290     const PetscScalar *aa,*ba;
5291     PetscScalar       *ca;
5292     PetscInt          i,j,am,dn,on;
5293 
5294     PetscCall(MatGetLocalSize(Ad,&am,&dn));
5295     PetscCall(MatGetLocalSize(Ao,NULL,&on));
5296     PetscCall(MatSeqAIJGetArrayRead(Ad,&aa));
5297     PetscCall(MatSeqAIJGetArrayRead(Ao,&ba));
5298     if (scall == MAT_INITIAL_MATRIX) {
5299       PetscInt k;
5300       PetscCall(PetscMalloc1(1+am,&ci));
5301       PetscCall(PetscMalloc1(ai[am]+bi[am],&cj));
5302       PetscCall(PetscMalloc1(ai[am]+bi[am],&ca));
5303       ci[0] = 0;
5304       for (i=0,k=0; i<am; i++) {
5305         const PetscInt ncols_o = bi[i+1] - bi[i];
5306         const PetscInt ncols_d = ai[i+1] - ai[i];
5307         ci[i+1] = ci[i] + ncols_o + ncols_d;
5308         /* diagonal portion of A */
5309         for (j=0; j<ncols_d; j++,k++) {
5310           cj[k] = *aj++;
5311           ca[k] = *aa++;
5312         }
5313         /* off-diagonal portion of A */
5314         for (j=0; j<ncols_o; j++,k++) {
5315           cj[k] = dn + *bj++;
5316           ca[k] = *ba++;
5317         }
5318       }
5319       /* put together the new matrix */
5320       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc));
5321       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5322       /* Since these are PETSc arrays, change flags to free them as necessary. */
5323       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5324       c->free_a  = PETSC_TRUE;
5325       c->free_ij = PETSC_TRUE;
5326       c->nonew   = 0;
5327       PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name));
5328     } else if (scall == MAT_REUSE_MATRIX) {
5329       PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca));
5330       for (i=0; i<am; i++) {
5331         const PetscInt ncols_d = ai[i+1] - ai[i];
5332         const PetscInt ncols_o = bi[i+1] - bi[i];
5333         /* diagonal portion of A */
5334         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5335         /* off-diagonal portion of A */
5336         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5337       }
5338       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca));
5339     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5340     PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa));
5341     PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa));
5342     if (glob) {
5343       PetscInt cst, *gidx;
5344 
5345       PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL));
5346       PetscCall(PetscMalloc1(dn+on,&gidx));
5347       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5348       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5349       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob));
5350     }
5351   }
5352   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5353   PetscFunctionReturn(0);
5354 }
5355 
5356 /*@C
5357      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5358 
5359     Not Collective
5360 
5361    Input Parameters:
5362 +    A - the matrix
5363 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5364 -    row, col - index sets of rows and columns to extract (or NULL)
5365 
5366    Output Parameter:
5367 .    A_loc - the local sequential matrix generated
5368 
5369     Level: developer
5370 
5371 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5372 
5373 @*/
5374 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5375 {
5376   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5377   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5378   IS             isrowa,iscola;
5379   Mat            *aloc;
5380   PetscBool      match;
5381 
5382   PetscFunctionBegin;
5383   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match));
5384   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5385   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0));
5386   if (!row) {
5387     start = A->rmap->rstart; end = A->rmap->rend;
5388     PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa));
5389   } else {
5390     isrowa = *row;
5391   }
5392   if (!col) {
5393     start = A->cmap->rstart;
5394     cmap  = a->garray;
5395     nzA   = a->A->cmap->n;
5396     nzB   = a->B->cmap->n;
5397     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5398     ncols = 0;
5399     for (i=0; i<nzB; i++) {
5400       if (cmap[i] < start) idx[ncols++] = cmap[i];
5401       else break;
5402     }
5403     imark = i;
5404     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5405     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5406     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola));
5407   } else {
5408     iscola = *col;
5409   }
5410   if (scall != MAT_INITIAL_MATRIX) {
5411     PetscCall(PetscMalloc1(1,&aloc));
5412     aloc[0] = *A_loc;
5413   }
5414   PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc));
5415   if (!col) { /* attach global id of condensed columns */
5416     PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola));
5417   }
5418   *A_loc = aloc[0];
5419   PetscCall(PetscFree(aloc));
5420   if (!row) {
5421     PetscCall(ISDestroy(&isrowa));
5422   }
5423   if (!col) {
5424     PetscCall(ISDestroy(&iscola));
5425   }
5426   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0));
5427   PetscFunctionReturn(0);
5428 }
5429 
5430 /*
5431  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5432  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5433  * on a global size.
5434  * */
5435 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5436 {
5437   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5438   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5439   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5440   PetscMPIInt              owner;
5441   PetscSFNode              *iremote,*oiremote;
5442   const PetscInt           *lrowindices;
5443   PetscSF                  sf,osf;
5444   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5445   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5446   MPI_Comm                 comm;
5447   ISLocalToGlobalMapping   mapping;
5448   const PetscScalar        *pd_a,*po_a;
5449 
5450   PetscFunctionBegin;
5451   PetscCall(PetscObjectGetComm((PetscObject)P,&comm));
5452   /* plocalsize is the number of roots
5453    * nrows is the number of leaves
5454    * */
5455   PetscCall(MatGetLocalSize(P,&plocalsize,NULL));
5456   PetscCall(ISGetLocalSize(rows,&nrows));
5457   PetscCall(PetscCalloc1(nrows,&iremote));
5458   PetscCall(ISGetIndices(rows,&lrowindices));
5459   for (i=0;i<nrows;i++) {
5460     /* Find a remote index and an owner for a row
5461      * The row could be local or remote
5462      * */
5463     owner = 0;
5464     lidx  = 0;
5465     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx));
5466     iremote[i].index = lidx;
5467     iremote[i].rank  = owner;
5468   }
5469   /* Create SF to communicate how many nonzero columns for each row */
5470   PetscCall(PetscSFCreate(comm,&sf));
5471   /* SF will figure out the number of nonzero colunms for each row, and their
5472    * offsets
5473    * */
5474   PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5475   PetscCall(PetscSFSetFromOptions(sf));
5476   PetscCall(PetscSFSetUp(sf));
5477 
5478   PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets));
5479   PetscCall(PetscCalloc1(2*plocalsize,&nrcols));
5480   PetscCall(PetscCalloc1(nrows,&pnnz));
5481   roffsets[0] = 0;
5482   roffsets[1] = 0;
5483   for (i=0;i<plocalsize;i++) {
5484     /* diag */
5485     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5486     /* off diag */
5487     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5488     /* compute offsets so that we relative location for each row */
5489     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5490     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5491   }
5492   PetscCall(PetscCalloc1(2*nrows,&nlcols));
5493   PetscCall(PetscCalloc1(2*nrows,&loffsets));
5494   /* 'r' means root, and 'l' means leaf */
5495   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5496   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5497   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5498   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5499   PetscCall(PetscSFDestroy(&sf));
5500   PetscCall(PetscFree(roffsets));
5501   PetscCall(PetscFree(nrcols));
5502   dntotalcols = 0;
5503   ontotalcols = 0;
5504   ncol = 0;
5505   for (i=0;i<nrows;i++) {
5506     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5507     ncol = PetscMax(pnnz[i],ncol);
5508     /* diag */
5509     dntotalcols += nlcols[i*2+0];
5510     /* off diag */
5511     ontotalcols += nlcols[i*2+1];
5512   }
5513   /* We do not need to figure the right number of columns
5514    * since all the calculations will be done by going through the raw data
5515    * */
5516   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth));
5517   PetscCall(MatSetUp(*P_oth));
5518   PetscCall(PetscFree(pnnz));
5519   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5520   /* diag */
5521   PetscCall(PetscCalloc1(dntotalcols,&iremote));
5522   /* off diag */
5523   PetscCall(PetscCalloc1(ontotalcols,&oiremote));
5524   /* diag */
5525   PetscCall(PetscCalloc1(dntotalcols,&ilocal));
5526   /* off diag */
5527   PetscCall(PetscCalloc1(ontotalcols,&oilocal));
5528   dntotalcols = 0;
5529   ontotalcols = 0;
5530   ntotalcols  = 0;
5531   for (i=0;i<nrows;i++) {
5532     owner = 0;
5533     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL));
5534     /* Set iremote for diag matrix */
5535     for (j=0;j<nlcols[i*2+0];j++) {
5536       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5537       iremote[dntotalcols].rank    = owner;
5538       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5539       ilocal[dntotalcols++]        = ntotalcols++;
5540     }
5541     /* off diag */
5542     for (j=0;j<nlcols[i*2+1];j++) {
5543       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5544       oiremote[ontotalcols].rank    = owner;
5545       oilocal[ontotalcols++]        = ntotalcols++;
5546     }
5547   }
5548   PetscCall(ISRestoreIndices(rows,&lrowindices));
5549   PetscCall(PetscFree(loffsets));
5550   PetscCall(PetscFree(nlcols));
5551   PetscCall(PetscSFCreate(comm,&sf));
5552   /* P serves as roots and P_oth is leaves
5553    * Diag matrix
5554    * */
5555   PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5556   PetscCall(PetscSFSetFromOptions(sf));
5557   PetscCall(PetscSFSetUp(sf));
5558 
5559   PetscCall(PetscSFCreate(comm,&osf));
5560   /* Off diag */
5561   PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER));
5562   PetscCall(PetscSFSetFromOptions(osf));
5563   PetscCall(PetscSFSetUp(osf));
5564   PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5565   PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5566   /* We operate on the matrix internal data for saving memory */
5567   PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5568   PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5569   PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL));
5570   /* Convert to global indices for diag matrix */
5571   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5572   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5573   /* We want P_oth store global indices */
5574   PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping));
5575   /* Use memory scalable approach */
5576   PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH));
5577   PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j));
5578   PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5579   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5580   /* Convert back to local indices */
5581   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5582   PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5583   nout = 0;
5584   PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j));
5585   PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout);
5586   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5587   /* Exchange values */
5588   PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5589   PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5590   PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5591   PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5592   /* Stop PETSc from shrinking memory */
5593   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5594   PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY));
5595   PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY));
5596   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5597   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf));
5598   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf));
5599   PetscCall(PetscSFDestroy(&sf));
5600   PetscCall(PetscSFDestroy(&osf));
5601   PetscFunctionReturn(0);
5602 }
5603 
5604 /*
5605  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5606  * This supports MPIAIJ and MAIJ
5607  * */
5608 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5609 {
5610   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5611   Mat_SeqAIJ            *p_oth;
5612   IS                    rows,map;
5613   PetscHMapI            hamp;
5614   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5615   MPI_Comm              comm;
5616   PetscSF               sf,osf;
5617   PetscBool             has;
5618 
5619   PetscFunctionBegin;
5620   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5621   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0));
5622   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5623    *  and then create a submatrix (that often is an overlapping matrix)
5624    * */
5625   if (reuse == MAT_INITIAL_MATRIX) {
5626     /* Use a hash table to figure out unique keys */
5627     PetscCall(PetscHMapICreate(&hamp));
5628     PetscCall(PetscHMapIResize(hamp,a->B->cmap->n));
5629     PetscCall(PetscCalloc1(a->B->cmap->n,&mapping));
5630     count = 0;
5631     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5632     for (i=0;i<a->B->cmap->n;i++) {
5633       key  = a->garray[i]/dof;
5634       PetscCall(PetscHMapIHas(hamp,key,&has));
5635       if (!has) {
5636         mapping[i] = count;
5637         PetscCall(PetscHMapISet(hamp,key,count++));
5638       } else {
5639         /* Current 'i' has the same value the previous step */
5640         mapping[i] = count-1;
5641       }
5642     }
5643     PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map));
5644     PetscCall(PetscHMapIGetSize(hamp,&htsize));
5645     PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count);
5646     PetscCall(PetscCalloc1(htsize,&rowindices));
5647     off = 0;
5648     PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices));
5649     PetscCall(PetscHMapIDestroy(&hamp));
5650     PetscCall(PetscSortInt(htsize,rowindices));
5651     PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows));
5652     /* In case, the matrix was already created but users want to recreate the matrix */
5653     PetscCall(MatDestroy(P_oth));
5654     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth));
5655     PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map));
5656     PetscCall(ISDestroy(&map));
5657     PetscCall(ISDestroy(&rows));
5658   } else if (reuse == MAT_REUSE_MATRIX) {
5659     /* If matrix was already created, we simply update values using SF objects
5660      * that as attached to the matrix ealier.
5661      */
5662     const PetscScalar *pd_a,*po_a;
5663 
5664     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf));
5665     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf));
5666     PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5667     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5668     /* Update values in place */
5669     PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5670     PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5671     PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5672     PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5673     PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5674     PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5675     PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5676     PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5677   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5678   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0));
5679   PetscFunctionReturn(0);
5680 }
5681 
5682 /*@C
5683   MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5684 
5685   Collective on Mat
5686 
5687   Input Parameters:
5688 + A - the first matrix in mpiaij format
5689 . B - the second matrix in mpiaij format
5690 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5691 
5692   Output Parameters:
5693 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5694 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5695 - B_seq - the sequential matrix generated
5696 
5697   Level: developer
5698 
5699 @*/
5700 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5701 {
5702   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5703   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5704   IS             isrowb,iscolb;
5705   Mat            *bseq=NULL;
5706 
5707   PetscFunctionBegin;
5708   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5709     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5710   }
5711   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0));
5712 
5713   if (scall == MAT_INITIAL_MATRIX) {
5714     start = A->cmap->rstart;
5715     cmap  = a->garray;
5716     nzA   = a->A->cmap->n;
5717     nzB   = a->B->cmap->n;
5718     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5719     ncols = 0;
5720     for (i=0; i<nzB; i++) {  /* row < local row index */
5721       if (cmap[i] < start) idx[ncols++] = cmap[i];
5722       else break;
5723     }
5724     imark = i;
5725     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5726     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5727     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb));
5728     PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb));
5729   } else {
5730     PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5731     isrowb  = *rowb; iscolb = *colb;
5732     PetscCall(PetscMalloc1(1,&bseq));
5733     bseq[0] = *B_seq;
5734   }
5735   PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq));
5736   *B_seq = bseq[0];
5737   PetscCall(PetscFree(bseq));
5738   if (!rowb) {
5739     PetscCall(ISDestroy(&isrowb));
5740   } else {
5741     *rowb = isrowb;
5742   }
5743   if (!colb) {
5744     PetscCall(ISDestroy(&iscolb));
5745   } else {
5746     *colb = iscolb;
5747   }
5748   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0));
5749   PetscFunctionReturn(0);
5750 }
5751 
5752 /*
5753     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5754     of the OFF-DIAGONAL portion of local A
5755 
5756     Collective on Mat
5757 
5758    Input Parameters:
5759 +    A,B - the matrices in mpiaij format
5760 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5761 
5762    Output Parameter:
5763 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5764 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5765 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5766 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5767 
5768     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5769      for this matrix. This is not desirable..
5770 
5771     Level: developer
5772 
5773 */
5774 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5775 {
5776   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5777   Mat_SeqAIJ             *b_oth;
5778   VecScatter             ctx;
5779   MPI_Comm               comm;
5780   const PetscMPIInt      *rprocs,*sprocs;
5781   const PetscInt         *srow,*rstarts,*sstarts;
5782   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5783   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5784   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5785   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5786   PetscMPIInt            size,tag,rank,nreqs;
5787 
5788   PetscFunctionBegin;
5789   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5790   PetscCallMPI(MPI_Comm_size(comm,&size));
5791 
5792   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5793     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5794   }
5795   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0));
5796   PetscCallMPI(MPI_Comm_rank(comm,&rank));
5797 
5798   if (size == 1) {
5799     startsj_s = NULL;
5800     bufa_ptr  = NULL;
5801     *B_oth    = NULL;
5802     PetscFunctionReturn(0);
5803   }
5804 
5805   ctx = a->Mvctx;
5806   tag = ((PetscObject)ctx)->tag;
5807 
5808   PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs));
5809   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5810   PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs));
5811   PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs));
5812   PetscCall(PetscMalloc1(nreqs,&reqs));
5813   rwaits = reqs;
5814   swaits = reqs + nrecvs;
5815 
5816   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5817   if (scall == MAT_INITIAL_MATRIX) {
5818     /* i-array */
5819     /*---------*/
5820     /*  post receives */
5821     if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */
5822     for (i=0; i<nrecvs; i++) {
5823       rowlen = rvalues + rstarts[i]*rbs;
5824       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5825       PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5826     }
5827 
5828     /* pack the outgoing message */
5829     PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj));
5830 
5831     sstartsj[0] = 0;
5832     rstartsj[0] = 0;
5833     len         = 0; /* total length of j or a array to be sent */
5834     if (nsends) {
5835       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5836       PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues));
5837     }
5838     for (i=0; i<nsends; i++) {
5839       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5840       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5841       for (j=0; j<nrows; j++) {
5842         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5843         for (l=0; l<sbs; l++) {
5844           PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */
5845 
5846           rowlen[j*sbs+l] = ncols;
5847 
5848           len += ncols;
5849           PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL));
5850         }
5851         k++;
5852       }
5853       PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i));
5854 
5855       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5856     }
5857     /* recvs and sends of i-array are completed */
5858     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5859     PetscCall(PetscFree(svalues));
5860 
5861     /* allocate buffers for sending j and a arrays */
5862     PetscCall(PetscMalloc1(len+1,&bufj));
5863     PetscCall(PetscMalloc1(len+1,&bufa));
5864 
5865     /* create i-array of B_oth */
5866     PetscCall(PetscMalloc1(aBn+2,&b_othi));
5867 
5868     b_othi[0] = 0;
5869     len       = 0; /* total length of j or a array to be received */
5870     k         = 0;
5871     for (i=0; i<nrecvs; i++) {
5872       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5873       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5874       for (j=0; j<nrows; j++) {
5875         b_othi[k+1] = b_othi[k] + rowlen[j];
5876         PetscCall(PetscIntSumError(rowlen[j],len,&len));
5877         k++;
5878       }
5879       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5880     }
5881     PetscCall(PetscFree(rvalues));
5882 
5883     /* allocate space for j and a arrays of B_oth */
5884     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj));
5885     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha));
5886 
5887     /* j-array */
5888     /*---------*/
5889     /*  post receives of j-array */
5890     for (i=0; i<nrecvs; i++) {
5891       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5892       PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5893     }
5894 
5895     /* pack the outgoing message j-array */
5896     if (nsends) k = sstarts[0];
5897     for (i=0; i<nsends; i++) {
5898       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5899       bufJ  = bufj+sstartsj[i];
5900       for (j=0; j<nrows; j++) {
5901         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5902         for (ll=0; ll<sbs; ll++) {
5903           PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5904           for (l=0; l<ncols; l++) {
5905             *bufJ++ = cols[l];
5906           }
5907           PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5908         }
5909       }
5910       PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i));
5911     }
5912 
5913     /* recvs and sends of j-array are completed */
5914     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5915   } else if (scall == MAT_REUSE_MATRIX) {
5916     sstartsj = *startsj_s;
5917     rstartsj = *startsj_r;
5918     bufa     = *bufa_ptr;
5919     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5920     PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha));
5921   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5922 
5923   /* a-array */
5924   /*---------*/
5925   /*  post receives of a-array */
5926   for (i=0; i<nrecvs; i++) {
5927     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5928     PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i));
5929   }
5930 
5931   /* pack the outgoing message a-array */
5932   if (nsends) k = sstarts[0];
5933   for (i=0; i<nsends; i++) {
5934     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5935     bufA  = bufa+sstartsj[i];
5936     for (j=0; j<nrows; j++) {
5937       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5938       for (ll=0; ll<sbs; ll++) {
5939         PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5940         for (l=0; l<ncols; l++) {
5941           *bufA++ = vals[l];
5942         }
5943         PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5944       }
5945     }
5946     PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i));
5947   }
5948   /* recvs and sends of a-array are completed */
5949   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5950   PetscCall(PetscFree(reqs));
5951 
5952   if (scall == MAT_INITIAL_MATRIX) {
5953     /* put together the new matrix */
5954     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth));
5955 
5956     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5957     /* Since these are PETSc arrays, change flags to free them as necessary. */
5958     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5959     b_oth->free_a  = PETSC_TRUE;
5960     b_oth->free_ij = PETSC_TRUE;
5961     b_oth->nonew   = 0;
5962 
5963     PetscCall(PetscFree(bufj));
5964     if (!startsj_s || !bufa_ptr) {
5965       PetscCall(PetscFree2(sstartsj,rstartsj));
5966       PetscCall(PetscFree(bufa_ptr));
5967     } else {
5968       *startsj_s = sstartsj;
5969       *startsj_r = rstartsj;
5970       *bufa_ptr  = bufa;
5971     }
5972   } else if (scall == MAT_REUSE_MATRIX) {
5973     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha));
5974   }
5975 
5976   PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs));
5977   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs));
5978   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0));
5979   PetscFunctionReturn(0);
5980 }
5981 
5982 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5983 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5984 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5985 #if defined(PETSC_HAVE_MKL_SPARSE)
5986 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5987 #endif
5988 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5989 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5990 #if defined(PETSC_HAVE_ELEMENTAL)
5991 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5992 #endif
5993 #if defined(PETSC_HAVE_SCALAPACK)
5994 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5995 #endif
5996 #if defined(PETSC_HAVE_HYPRE)
5997 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5998 #endif
5999 #if defined(PETSC_HAVE_CUDA)
6000 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
6001 #endif
6002 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6003 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
6004 #endif
6005 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
6006 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
6007 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6008 
6009 /*
6010     Computes (B'*A')' since computing B*A directly is untenable
6011 
6012                n                       p                          p
6013         [             ]       [             ]         [                 ]
6014       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6015         [             ]       [             ]         [                 ]
6016 
6017 */
6018 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
6019 {
6020   Mat            At,Bt,Ct;
6021 
6022   PetscFunctionBegin;
6023   PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At));
6024   PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt));
6025   PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct));
6026   PetscCall(MatDestroy(&At));
6027   PetscCall(MatDestroy(&Bt));
6028   PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C));
6029   PetscCall(MatDestroy(&Ct));
6030   PetscFunctionReturn(0);
6031 }
6032 
6033 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
6034 {
6035   PetscBool      cisdense;
6036 
6037   PetscFunctionBegin;
6038   PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n);
6039   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N));
6040   PetscCall(MatSetBlockSizesFromMats(C,A,B));
6041   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
6042   if (!cisdense) {
6043     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
6044   }
6045   PetscCall(MatSetUp(C));
6046 
6047   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6048   PetscFunctionReturn(0);
6049 }
6050 
6051 /* ----------------------------------------------------------------*/
6052 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6053 {
6054   Mat_Product *product = C->product;
6055   Mat         A = product->A,B=product->B;
6056 
6057   PetscFunctionBegin;
6058   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6059     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6060 
6061   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6062   C->ops->productsymbolic = MatProductSymbolic_AB;
6063   PetscFunctionReturn(0);
6064 }
6065 
6066 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6067 {
6068   Mat_Product    *product = C->product;
6069 
6070   PetscFunctionBegin;
6071   if (product->type == MATPRODUCT_AB) {
6072     PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6073   }
6074   PetscFunctionReturn(0);
6075 }
6076 
6077 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6078 
6079   Input Parameters:
6080 
6081     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
6082     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
6083 
6084     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6085 
6086     For Set1, j1[] contains column indices of the nonzeros.
6087     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6088     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6089     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6090 
6091     Similar for Set2.
6092 
6093     This routine merges the two sets of nonzeros row by row and removes repeats.
6094 
6095   Output Parameters: (memory is allocated by the caller)
6096 
6097     i[],j[]: the CSR of the merged matrix, which has m rows.
6098     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6099     imap2[]: similar to imap1[], but for Set2.
6100     Note we order nonzeros row-by-row and from left to right.
6101 */
6102 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[],
6103   const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[],
6104   PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[])
6105 {
6106   PetscInt       r,m; /* Row index of mat */
6107   PetscCount     t,t1,t2,b1,e1,b2,e2;
6108 
6109   PetscFunctionBegin;
6110   PetscCall(MatGetLocalSize(mat,&m,NULL));
6111   t1   = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6112   i[0] = 0;
6113   for (r=0; r<m; r++) { /* Do row by row merging */
6114     b1   = rowBegin1[r];
6115     e1   = rowEnd1[r];
6116     b2   = rowBegin2[r];
6117     e2   = rowEnd2[r];
6118     while (b1 < e1 && b2 < e2) {
6119       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6120         j[t]      = j1[b1];
6121         imap1[t1] = t;
6122         imap2[t2] = t;
6123         b1       += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */
6124         b2       += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6125         t1++; t2++; t++;
6126       } else if (j1[b1] < j2[b2]) {
6127         j[t]      = j1[b1];
6128         imap1[t1] = t;
6129         b1       += jmap1[t1+1] - jmap1[t1];
6130         t1++; t++;
6131       } else {
6132         j[t]      = j2[b2];
6133         imap2[t2] = t;
6134         b2       += jmap2[t2+1] - jmap2[t2];
6135         t2++; t++;
6136       }
6137     }
6138     /* Merge the remaining in either j1[] or j2[] */
6139     while (b1 < e1) {
6140       j[t]      = j1[b1];
6141       imap1[t1] = t;
6142       b1       += jmap1[t1+1] - jmap1[t1];
6143       t1++; t++;
6144     }
6145     while (b2 < e2) {
6146       j[t]      = j2[b2];
6147       imap2[t2] = t;
6148       b2       += jmap2[t2+1] - jmap2[t2];
6149       t2++; t++;
6150     }
6151     i[r+1] = t;
6152   }
6153   PetscFunctionReturn(0);
6154 }
6155 
6156 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6157 
6158   Input Parameters:
6159     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6160     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6161       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6162 
6163       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6164       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6165 
6166   Output Parameters:
6167     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6168     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6169       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6170       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6171 
6172     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6173       Atot: number of entries belonging to the diagonal block.
6174       Annz: number of unique nonzeros belonging to the diagonal block.
6175       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6176         repeats (i.e., same 'i,j' pair).
6177       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6178         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6179 
6180       Atot: number of entries belonging to the diagonal block
6181       Annz: number of unique nonzeros belonging to the diagonal block.
6182 
6183     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6184 
6185     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6186 */
6187 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[],
6188   PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[],
6189   PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_,
6190   PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_)
6191 {
6192   PetscInt          cstart,cend,rstart,rend,row,col;
6193   PetscCount        Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6194   PetscCount        Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6195   PetscCount        k,m,p,q,r,s,mid;
6196   PetscCount        *Aperm,*Bperm,*Ajmap,*Bjmap;
6197 
6198   PetscFunctionBegin;
6199   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6200   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6201   m    = rend - rstart;
6202 
6203   for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */
6204 
6205   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6206      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6207   */
6208   while (k<n) {
6209     row = i[k];
6210     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6211     for (s=k; s<n; s++) if (i[s] != row) break;
6212     for (p=k; p<s; p++) {
6213       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6214       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]);
6215     }
6216     PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k));
6217     PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6218     rowBegin[row-rstart] = k;
6219     rowMid[row-rstart]   = mid;
6220     rowEnd[row-rstart]   = s;
6221 
6222     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6223     Atot += mid - k;
6224     Btot += s - mid;
6225 
6226     /* Count unique nonzeros of this diag/offdiag row */
6227     for (p=k; p<mid;) {
6228       col = j[p];
6229       do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */
6230       Annz++;
6231     }
6232 
6233     for (p=mid; p<s;) {
6234       col = j[p];
6235       do {p++;} while (p<s && j[p] == col);
6236       Bnnz++;
6237     }
6238     k = s;
6239   }
6240 
6241   /* Allocation according to Atot, Btot, Annz, Bnnz */
6242   PetscCall(PetscMalloc1(Atot,&Aperm));
6243   PetscCall(PetscMalloc1(Btot,&Bperm));
6244   PetscCall(PetscMalloc1(Annz+1,&Ajmap));
6245   PetscCall(PetscMalloc1(Bnnz+1,&Bjmap));
6246 
6247   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6248   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6249   for (r=0; r<m; r++) {
6250     k     = rowBegin[r];
6251     mid   = rowMid[r];
6252     s     = rowEnd[r];
6253     PetscCall(PetscArraycpy(Aperm+Atot,perm+k,  mid-k));
6254     PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid));
6255     Atot += mid - k;
6256     Btot += s - mid;
6257 
6258     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6259     for (p=k; p<mid;) {
6260       col = j[p];
6261       q   = p;
6262       do {p++;} while (p<mid && j[p] == col);
6263       Ajmap[Annz+1] = Ajmap[Annz] + (p - q);
6264       Annz++;
6265     }
6266 
6267     for (p=mid; p<s;) {
6268       col = j[p];
6269       q   = p;
6270       do {p++;} while (p<s && j[p] == col);
6271       Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q);
6272       Bnnz++;
6273     }
6274   }
6275   /* Output */
6276   *Aperm_ = Aperm;
6277   *Annz_  = Annz;
6278   *Atot_  = Atot;
6279   *Ajmap_ = Ajmap;
6280   *Bperm_ = Bperm;
6281   *Bnnz_  = Bnnz;
6282   *Btot_  = Btot;
6283   *Bjmap_ = Bjmap;
6284   PetscFunctionReturn(0);
6285 }
6286 
6287 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6288 
6289   Input Parameters:
6290     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6291     nnz:  number of unique nonzeros in the merged matrix
6292     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6293     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6294 
6295   Output Parameter: (memory is allocated by the caller)
6296     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6297 
6298   Example:
6299     nnz1 = 4
6300     nnz  = 6
6301     imap = [1,3,4,5]
6302     jmap = [0,3,5,6,7]
6303    then,
6304     jmap_new = [0,0,3,3,5,6,7]
6305 */
6306 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[])
6307 {
6308   PetscCount k,p;
6309 
6310   PetscFunctionBegin;
6311   jmap_new[0] = 0;
6312   p = nnz; /* p loops over jmap_new[] backwards */
6313   for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */
6314     for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1];
6315   }
6316   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6317   PetscFunctionReturn(0);
6318 }
6319 
6320 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[])
6321 {
6322   MPI_Comm                  comm;
6323   PetscMPIInt               rank,size;
6324   PetscInt                  m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6325   PetscCount                k,p,q,rem; /* Loop variables over coo arrays */
6326   Mat_MPIAIJ                *mpiaij = (Mat_MPIAIJ*)mat->data;
6327 
6328   PetscFunctionBegin;
6329   PetscCall(PetscFree(mpiaij->garray));
6330   PetscCall(VecDestroy(&mpiaij->lvec));
6331 #if defined(PETSC_USE_CTABLE)
6332   PetscCall(PetscTableDestroy(&mpiaij->colmap));
6333 #else
6334   PetscCall(PetscFree(mpiaij->colmap));
6335 #endif
6336   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6337   mat->assembled = PETSC_FALSE;
6338   mat->was_assembled = PETSC_FALSE;
6339   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6340 
6341   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
6342   PetscCallMPI(MPI_Comm_size(comm,&size));
6343   PetscCallMPI(MPI_Comm_rank(comm,&rank));
6344   PetscCall(PetscLayoutSetUp(mat->rmap));
6345   PetscCall(PetscLayoutSetUp(mat->cmap));
6346   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6347   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6348   PetscCall(MatGetLocalSize(mat,&m,&n));
6349   PetscCall(MatGetSize(mat,&M,&N));
6350 
6351   /* ---------------------------------------------------------------------------*/
6352   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6353   /* entries come first, then local rows, then remote rows.                     */
6354   /* ---------------------------------------------------------------------------*/
6355   PetscCount n1 = coo_n,*perm1;
6356   PetscInt   *i1,*j1; /* Copies of input COOs along with a permutation array */
6357   PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1));
6358   PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */
6359   PetscCall(PetscArraycpy(j1,coo_j,n1));
6360   for (k=0; k<n1; k++) perm1[k] = k;
6361 
6362   /* Manipulate indices so that entries with negative row or col indices will have smallest
6363      row indices, local entries will have greater but negative row indices, and remote entries
6364      will have positive row indices.
6365   */
6366   for (k=0; k<n1; k++) {
6367     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */
6368     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6369     else {
6370       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6371       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6372     }
6373   }
6374 
6375   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6376   PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1));
6377   for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */
6378   PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */
6379   for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/
6380 
6381   /* ---------------------------------------------------------------------------*/
6382   /*           Split local rows into diag/offdiag portions                      */
6383   /* ---------------------------------------------------------------------------*/
6384   PetscCount   *rowBegin1,*rowMid1,*rowEnd1;
6385   PetscCount   *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1;
6386   PetscCount   Annz1,Bnnz1,Atot1,Btot1;
6387 
6388   PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1));
6389   PetscCall(PetscMalloc1(n1-rem,&Cperm1));
6390   PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1));
6391 
6392   /* ---------------------------------------------------------------------------*/
6393   /*           Send remote rows to their owner                                  */
6394   /* ---------------------------------------------------------------------------*/
6395   /* Find which rows should be sent to which remote ranks*/
6396   PetscInt       nsend = 0; /* Number of MPI ranks to send data to */
6397   PetscMPIInt    *sendto; /* [nsend], storing remote ranks */
6398   PetscInt       *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6399   const PetscInt *ranges;
6400   PetscInt       maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6401 
6402   PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges));
6403   PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries));
6404   for (k=rem; k<n1;) {
6405     PetscMPIInt  owner;
6406     PetscInt     firstRow,lastRow;
6407 
6408     /* Locate a row range */
6409     firstRow = i1[k]; /* first row of this owner */
6410     PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner));
6411     lastRow  = ranges[owner+1]-1; /* last row of this owner */
6412 
6413     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6414     PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p));
6415 
6416     /* All entries in [k,p) belong to this remote owner */
6417     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6418       PetscMPIInt *sendto2;
6419       PetscInt    *nentries2;
6420       PetscInt    maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size;
6421 
6422       PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2));
6423       PetscCall(PetscArraycpy(sendto2,sendto,maxNsend));
6424       PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1));
6425       PetscCall(PetscFree2(sendto,nentries2));
6426       sendto      = sendto2;
6427       nentries    = nentries2;
6428       maxNsend    = maxNsend2;
6429     }
6430     sendto[nsend]   = owner;
6431     nentries[nsend] = p - k;
6432     PetscCall(PetscCountCast(p-k,&nentries[nsend]));
6433     nsend++;
6434     k = p;
6435   }
6436 
6437   /* Build 1st SF to know offsets on remote to send data */
6438   PetscSF     sf1;
6439   PetscInt    nroots = 1,nroots2 = 0;
6440   PetscInt    nleaves = nsend,nleaves2 = 0;
6441   PetscInt    *offsets;
6442   PetscSFNode *iremote;
6443 
6444   PetscCall(PetscSFCreate(comm,&sf1));
6445   PetscCall(PetscMalloc1(nsend,&iremote));
6446   PetscCall(PetscMalloc1(nsend,&offsets));
6447   for (k=0; k<nsend; k++) {
6448     iremote[k].rank  = sendto[k];
6449     iremote[k].index = 0;
6450     nleaves2        += nentries[k];
6451     PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt");
6452   }
6453   PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6454   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM));
6455   PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6456   PetscCall(PetscSFDestroy(&sf1));
6457   PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem);
6458 
6459   /* Build 2nd SF to send remote COOs to their owner */
6460   PetscSF sf2;
6461   nroots  = nroots2;
6462   nleaves = nleaves2;
6463   PetscCall(PetscSFCreate(comm,&sf2));
6464   PetscCall(PetscSFSetFromOptions(sf2));
6465   PetscCall(PetscMalloc1(nleaves,&iremote));
6466   p       = 0;
6467   for (k=0; k<nsend; k++) {
6468     PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt");
6469     for (q=0; q<nentries[k]; q++,p++) {
6470       iremote[p].rank  = sendto[k];
6471       iremote[p].index = offsets[k] + q;
6472     }
6473   }
6474   PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6475 
6476   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6477   PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem));
6478 
6479   /* Send the remote COOs to their owner */
6480   PetscInt   n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6481   PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6482   PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2));
6483   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE));
6484   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE));
6485   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE));
6486   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE));
6487 
6488   PetscCall(PetscFree(offsets));
6489   PetscCall(PetscFree2(sendto,nentries));
6490 
6491   /* ---------------------------------------------------------------*/
6492   /* Sort received COOs by row along with the permutation array     */
6493   /* ---------------------------------------------------------------*/
6494   for (k=0; k<n2; k++) perm2[k] = k;
6495   PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2));
6496 
6497   /* ---------------------------------------------------------------*/
6498   /* Split received COOs into diag/offdiag portions                 */
6499   /* ---------------------------------------------------------------*/
6500   PetscCount  *rowBegin2,*rowMid2,*rowEnd2;
6501   PetscCount  *Ajmap2,*Aperm2,*Bjmap2,*Bperm2;
6502   PetscCount  Annz2,Bnnz2,Atot2,Btot2;
6503 
6504   PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2));
6505   PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2));
6506 
6507   /* --------------------------------------------------------------------------*/
6508   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6509   /* --------------------------------------------------------------------------*/
6510   PetscInt   *Ai,*Bi;
6511   PetscInt   *Aj,*Bj;
6512 
6513   PetscCall(PetscMalloc1(m+1,&Ai));
6514   PetscCall(PetscMalloc1(m+1,&Bi));
6515   PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6516   PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj));
6517 
6518   PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2;
6519   PetscCall(PetscMalloc1(Annz1,&Aimap1));
6520   PetscCall(PetscMalloc1(Bnnz1,&Bimap1));
6521   PetscCall(PetscMalloc1(Annz2,&Aimap2));
6522   PetscCall(PetscMalloc1(Bnnz2,&Bimap2));
6523 
6524   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj));
6525   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1,  rowEnd1,rowMid2,  rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj));
6526 
6527   /* --------------------------------------------------------------------------*/
6528   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6529   /* expect nonzeros in A/B most likely have local contributing entries        */
6530   /* --------------------------------------------------------------------------*/
6531   PetscInt Annz = Ai[m];
6532   PetscInt Bnnz = Bi[m];
6533   PetscCount *Ajmap1_new,*Bjmap1_new;
6534 
6535   PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new));
6536   PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new));
6537 
6538   PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new));
6539   PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new));
6540 
6541   PetscCall(PetscFree(Aimap1));
6542   PetscCall(PetscFree(Ajmap1));
6543   PetscCall(PetscFree(Bimap1));
6544   PetscCall(PetscFree(Bjmap1));
6545   PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1));
6546   PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2));
6547   PetscCall(PetscFree3(i1,j1,perm1));
6548   PetscCall(PetscFree3(i2,j2,perm2));
6549 
6550   Ajmap1 = Ajmap1_new;
6551   Bjmap1 = Bjmap1_new;
6552 
6553   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6554   if (Annz < Annz1 + Annz2) {
6555     PetscInt *Aj_new;
6556     PetscCall(PetscMalloc1(Annz,&Aj_new));
6557     PetscCall(PetscArraycpy(Aj_new,Aj,Annz));
6558     PetscCall(PetscFree(Aj));
6559     Aj   = Aj_new;
6560   }
6561 
6562   if (Bnnz < Bnnz1 + Bnnz2) {
6563     PetscInt *Bj_new;
6564     PetscCall(PetscMalloc1(Bnnz,&Bj_new));
6565     PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz));
6566     PetscCall(PetscFree(Bj));
6567     Bj   = Bj_new;
6568   }
6569 
6570   /* --------------------------------------------------------------------------------*/
6571   /* Create new submatrices for on-process and off-process coupling                  */
6572   /* --------------------------------------------------------------------------------*/
6573   PetscScalar   *Aa,*Ba;
6574   MatType       rtype;
6575   Mat_SeqAIJ    *a,*b;
6576   PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */
6577   PetscCall(PetscCalloc1(Bnnz,&Ba));
6578   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6579   if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;}
6580   PetscCall(MatDestroy(&mpiaij->A));
6581   PetscCall(MatDestroy(&mpiaij->B));
6582   PetscCall(MatGetRootType_Private(mat,&rtype));
6583   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A));
6584   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B));
6585   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6586 
6587   a = (Mat_SeqAIJ*)mpiaij->A->data;
6588   b = (Mat_SeqAIJ*)mpiaij->B->data;
6589   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6590   a->free_a       = b->free_a       = PETSC_TRUE;
6591   a->free_ij      = b->free_ij      = PETSC_TRUE;
6592 
6593   /* conversion must happen AFTER multiply setup */
6594   PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A));
6595   PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B));
6596   PetscCall(VecDestroy(&mpiaij->lvec));
6597   PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL));
6598   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec));
6599 
6600   mpiaij->coo_n   = coo_n;
6601   mpiaij->coo_sf  = sf2;
6602   mpiaij->sendlen = nleaves;
6603   mpiaij->recvlen = nroots;
6604 
6605   mpiaij->Annz    = Annz;
6606   mpiaij->Bnnz    = Bnnz;
6607 
6608   mpiaij->Annz2   = Annz2;
6609   mpiaij->Bnnz2   = Bnnz2;
6610 
6611   mpiaij->Atot1   = Atot1;
6612   mpiaij->Atot2   = Atot2;
6613   mpiaij->Btot1   = Btot1;
6614   mpiaij->Btot2   = Btot2;
6615 
6616   mpiaij->Ajmap1  = Ajmap1;
6617   mpiaij->Aperm1  = Aperm1;
6618 
6619   mpiaij->Bjmap1  = Bjmap1;
6620   mpiaij->Bperm1  = Bperm1;
6621 
6622   mpiaij->Aimap2  = Aimap2;
6623   mpiaij->Ajmap2  = Ajmap2;
6624   mpiaij->Aperm2  = Aperm2;
6625 
6626   mpiaij->Bimap2  = Bimap2;
6627   mpiaij->Bjmap2  = Bjmap2;
6628   mpiaij->Bperm2  = Bperm2;
6629 
6630   mpiaij->Cperm1  = Cperm1;
6631 
6632   /* Allocate in preallocation. If not used, it has zero cost on host */
6633   PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf));
6634   PetscFunctionReturn(0);
6635 }
6636 
6637 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode)
6638 {
6639   Mat_MPIAIJ           *mpiaij = (Mat_MPIAIJ*)mat->data;
6640   Mat                  A = mpiaij->A,B = mpiaij->B;
6641   PetscCount           Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2;
6642   PetscScalar          *Aa,*Ba;
6643   PetscScalar          *sendbuf = mpiaij->sendbuf;
6644   PetscScalar          *recvbuf = mpiaij->recvbuf;
6645   const PetscCount     *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2;
6646   const PetscCount     *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2;
6647   const PetscCount     *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2;
6648   const PetscCount     *Cperm1 = mpiaij->Cperm1;
6649 
6650   PetscFunctionBegin;
6651   PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */
6652   PetscCall(MatSeqAIJGetArray(B,&Ba));
6653 
6654   /* Pack entries to be sent to remote */
6655   for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6656 
6657   /* Send remote entries to their owner and overlap the communication with local computation */
6658   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE));
6659   /* Add local entries to A and B */
6660   for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6661     PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */
6662     for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]];
6663     Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum;
6664   }
6665   for (PetscCount i=0; i<Bnnz; i++) {
6666     PetscScalar sum = 0.0;
6667     for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]];
6668     Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum;
6669   }
6670   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE));
6671 
6672   /* Add received remote entries to A and B */
6673   for (PetscCount i=0; i<Annz2; i++) {
6674     for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6675   }
6676   for (PetscCount i=0; i<Bnnz2; i++) {
6677     for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6678   }
6679   PetscCall(MatSeqAIJRestoreArray(A,&Aa));
6680   PetscCall(MatSeqAIJRestoreArray(B,&Ba));
6681   PetscFunctionReturn(0);
6682 }
6683 
6684 /* ----------------------------------------------------------------*/
6685 
6686 /*MC
6687    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6688 
6689    Options Database Keys:
6690 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6691 
6692    Level: beginner
6693 
6694    Notes:
6695     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6696     in this case the values associated with the rows and columns one passes in are set to zero
6697     in the matrix
6698 
6699     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6700     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6701 
6702 .seealso: `MatCreateAIJ()`
6703 M*/
6704 
6705 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6706 {
6707   Mat_MPIAIJ     *b;
6708   PetscMPIInt    size;
6709 
6710   PetscFunctionBegin;
6711   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
6712 
6713   PetscCall(PetscNewLog(B,&b));
6714   B->data       = (void*)b;
6715   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
6716   B->assembled  = PETSC_FALSE;
6717   B->insertmode = NOT_SET_VALUES;
6718   b->size       = size;
6719 
6720   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
6721 
6722   /* build cache for off array entries formed */
6723   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
6724 
6725   b->donotstash  = PETSC_FALSE;
6726   b->colmap      = NULL;
6727   b->garray      = NULL;
6728   b->roworiented = PETSC_TRUE;
6729 
6730   /* stuff used for matrix vector multiply */
6731   b->lvec  = NULL;
6732   b->Mvctx = NULL;
6733 
6734   /* stuff for MatGetRow() */
6735   b->rowindices   = NULL;
6736   b->rowvalues    = NULL;
6737   b->getrowactive = PETSC_FALSE;
6738 
6739   /* flexible pointer used in CUSPARSE classes */
6740   b->spptr = NULL;
6741 
6742   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6743   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ));
6744   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ));
6745   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ));
6746   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ));
6747   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ));
6748   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ));
6749   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ));
6750   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM));
6751   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL));
6752 #if defined(PETSC_HAVE_CUDA)
6753   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6754 #endif
6755 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6756   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos));
6757 #endif
6758 #if defined(PETSC_HAVE_MKL_SPARSE)
6759   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL));
6760 #endif
6761   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL));
6762   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ));
6763   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ));
6764   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
6765 #if defined(PETSC_HAVE_ELEMENTAL)
6766   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental));
6767 #endif
6768 #if defined(PETSC_HAVE_SCALAPACK)
6769   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK));
6770 #endif
6771   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS));
6772   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL));
6773 #if defined(PETSC_HAVE_HYPRE)
6774   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE));
6775   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ));
6776 #endif
6777   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ));
6778   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ));
6779   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ));
6780   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ));
6781   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ));
6782   PetscFunctionReturn(0);
6783 }
6784 
6785 /*@C
6786      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6787          and "off-diagonal" part of the matrix in CSR format.
6788 
6789    Collective
6790 
6791    Input Parameters:
6792 +  comm - MPI communicator
6793 .  m - number of local rows (Cannot be PETSC_DECIDE)
6794 .  n - This value should be the same as the local size used in creating the
6795        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6796        calculated if N is given) For square matrices n is almost always m.
6797 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6798 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6799 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6800 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6801 .   a - matrix values
6802 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6803 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6804 -   oa - matrix values
6805 
6806    Output Parameter:
6807 .   mat - the matrix
6808 
6809    Level: advanced
6810 
6811    Notes:
6812        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6813        must free the arrays once the matrix has been destroyed and not before.
6814 
6815        The i and j indices are 0 based
6816 
6817        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6818 
6819        This sets local rows and cannot be used to set off-processor values.
6820 
6821        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6822        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6823        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6824        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6825        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6826        communication if it is known that only local entries will be set.
6827 
6828 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6829           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6830 @*/
6831 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6832 {
6833   Mat_MPIAIJ     *maij;
6834 
6835   PetscFunctionBegin;
6836   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6837   PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6838   PetscCheck(oi[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6839   PetscCall(MatCreate(comm,mat));
6840   PetscCall(MatSetSizes(*mat,m,n,M,N));
6841   PetscCall(MatSetType(*mat,MATMPIAIJ));
6842   maij = (Mat_MPIAIJ*) (*mat)->data;
6843 
6844   (*mat)->preallocated = PETSC_TRUE;
6845 
6846   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6847   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6848 
6849   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A));
6850   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B));
6851 
6852   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
6853   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
6854   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
6855   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
6856   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
6857   PetscFunctionReturn(0);
6858 }
6859 
6860 typedef struct {
6861   Mat       *mp;    /* intermediate products */
6862   PetscBool *mptmp; /* is the intermediate product temporary ? */
6863   PetscInt  cp;     /* number of intermediate products */
6864 
6865   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6866   PetscInt    *startsj_s,*startsj_r;
6867   PetscScalar *bufa;
6868   Mat         P_oth;
6869 
6870   /* may take advantage of merging product->B */
6871   Mat Bloc; /* B-local by merging diag and off-diag */
6872 
6873   /* cusparse does not have support to split between symbolic and numeric phases.
6874      When api_user is true, we don't need to update the numerical values
6875      of the temporary storage */
6876   PetscBool reusesym;
6877 
6878   /* support for COO values insertion */
6879   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6880   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6881   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6882   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6883   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6884   PetscMemType mtype;
6885 
6886   /* customization */
6887   PetscBool abmerge;
6888   PetscBool P_oth_bind;
6889 } MatMatMPIAIJBACKEND;
6890 
6891 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6892 {
6893   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6894   PetscInt            i;
6895 
6896   PetscFunctionBegin;
6897   PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r));
6898   PetscCall(PetscFree(mmdata->bufa));
6899   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v));
6900   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w));
6901   PetscCall(MatDestroy(&mmdata->P_oth));
6902   PetscCall(MatDestroy(&mmdata->Bloc));
6903   PetscCall(PetscSFDestroy(&mmdata->sf));
6904   for (i = 0; i < mmdata->cp; i++) {
6905     PetscCall(MatDestroy(&mmdata->mp[i]));
6906   }
6907   PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp));
6908   PetscCall(PetscFree(mmdata->own[0]));
6909   PetscCall(PetscFree(mmdata->own));
6910   PetscCall(PetscFree(mmdata->off[0]));
6911   PetscCall(PetscFree(mmdata->off));
6912   PetscCall(PetscFree(mmdata));
6913   PetscFunctionReturn(0);
6914 }
6915 
6916 /* Copy selected n entries with indices in idx[] of A to v[].
6917    If idx is NULL, copy the whole data array of A to v[]
6918  */
6919 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6920 {
6921   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6922 
6923   PetscFunctionBegin;
6924   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f));
6925   if (f) {
6926     PetscCall((*f)(A,n,idx,v));
6927   } else {
6928     const PetscScalar *vv;
6929 
6930     PetscCall(MatSeqAIJGetArrayRead(A,&vv));
6931     if (n && idx) {
6932       PetscScalar    *w = v;
6933       const PetscInt *oi = idx;
6934       PetscInt       j;
6935 
6936       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6937     } else {
6938       PetscCall(PetscArraycpy(v,vv,n));
6939     }
6940     PetscCall(MatSeqAIJRestoreArrayRead(A,&vv));
6941   }
6942   PetscFunctionReturn(0);
6943 }
6944 
6945 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6946 {
6947   MatMatMPIAIJBACKEND *mmdata;
6948   PetscInt            i,n_d,n_o;
6949 
6950   PetscFunctionBegin;
6951   MatCheckProduct(C,1);
6952   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6953   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6954   if (!mmdata->reusesym) { /* update temporary matrices */
6955     if (mmdata->P_oth) {
6956       PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
6957     }
6958     if (mmdata->Bloc) {
6959       PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc));
6960     }
6961   }
6962   mmdata->reusesym = PETSC_FALSE;
6963 
6964   for (i = 0; i < mmdata->cp; i++) {
6965     PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6966     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
6967   }
6968   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6969     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6970 
6971     if (mmdata->mptmp[i]) continue;
6972     if (noff) {
6973       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6974 
6975       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o));
6976       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d));
6977       n_o += noff;
6978       n_d += nown;
6979     } else {
6980       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6981 
6982       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d));
6983       n_d += mm->nz;
6984     }
6985   }
6986   if (mmdata->hasoffproc) { /* offprocess insertion */
6987     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6988     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6989   }
6990   PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES));
6991   PetscFunctionReturn(0);
6992 }
6993 
6994 /* Support for Pt * A, A * P, or Pt * A * P */
6995 #define MAX_NUMBER_INTERMEDIATE 4
6996 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6997 {
6998   Mat_Product            *product = C->product;
6999   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7000   Mat_MPIAIJ             *a,*p;
7001   MatMatMPIAIJBACKEND    *mmdata;
7002   ISLocalToGlobalMapping P_oth_l2g = NULL;
7003   IS                     glob = NULL;
7004   const char             *prefix;
7005   char                   pprefix[256];
7006   const PetscInt         *globidx,*P_oth_idx;
7007   PetscInt               i,j,cp,m,n,M,N,*coo_i,*coo_j;
7008   PetscCount             ncoo,ncoo_d,ncoo_o,ncoo_oown;
7009   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7010                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
7011                                                                                         /* a base offset; type-2: sparse with a local to global map table */
7012   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
7013 
7014   MatProductType         ptype;
7015   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
7016   PetscMPIInt            size;
7017 
7018   PetscFunctionBegin;
7019   MatCheckProduct(C,1);
7020   PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
7021   ptype = product->type;
7022   if (product->A->symmetric && ptype == MATPRODUCT_AtB) {
7023     ptype = MATPRODUCT_AB;
7024     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7025   }
7026   switch (ptype) {
7027   case MATPRODUCT_AB:
7028     A = product->A;
7029     P = product->B;
7030     m = A->rmap->n;
7031     n = P->cmap->n;
7032     M = A->rmap->N;
7033     N = P->cmap->N;
7034     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7035     break;
7036   case MATPRODUCT_AtB:
7037     P = product->A;
7038     A = product->B;
7039     m = P->cmap->n;
7040     n = A->cmap->n;
7041     M = P->cmap->N;
7042     N = A->cmap->N;
7043     hasoffproc = PETSC_TRUE;
7044     break;
7045   case MATPRODUCT_PtAP:
7046     A = product->A;
7047     P = product->B;
7048     m = P->cmap->n;
7049     n = P->cmap->n;
7050     M = P->cmap->N;
7051     N = P->cmap->N;
7052     hasoffproc = PETSC_TRUE;
7053     break;
7054   default:
7055     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
7056   }
7057   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size));
7058   if (size == 1) hasoffproc = PETSC_FALSE;
7059 
7060   /* defaults */
7061   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
7062     mp[i]    = NULL;
7063     mptmp[i] = PETSC_FALSE;
7064     rmapt[i] = -1;
7065     cmapt[i] = -1;
7066     rmapa[i] = NULL;
7067     cmapa[i] = NULL;
7068   }
7069 
7070   /* customization */
7071   PetscCall(PetscNew(&mmdata));
7072   mmdata->reusesym = product->api_user;
7073   if (ptype == MATPRODUCT_AB) {
7074     if (product->api_user) {
7075       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");
7076       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
7077       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7078       PetscOptionsEnd();
7079     } else {
7080       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");
7081       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
7082       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7083       PetscOptionsEnd();
7084     }
7085   } else if (ptype == MATPRODUCT_PtAP) {
7086     if (product->api_user) {
7087       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");
7088       PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7089       PetscOptionsEnd();
7090     } else {
7091       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
7092       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7093       PetscOptionsEnd();
7094     }
7095   }
7096   a = (Mat_MPIAIJ*)A->data;
7097   p = (Mat_MPIAIJ*)P->data;
7098   PetscCall(MatSetSizes(C,m,n,M,N));
7099   PetscCall(PetscLayoutSetUp(C->rmap));
7100   PetscCall(PetscLayoutSetUp(C->cmap));
7101   PetscCall(MatSetType(C,((PetscObject)A)->type_name));
7102   PetscCall(MatGetOptionsPrefix(C,&prefix));
7103 
7104   cp   = 0;
7105   switch (ptype) {
7106   case MATPRODUCT_AB: /* A * P */
7107     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
7108 
7109     /* A_diag * P_local (merged or not) */
7110     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7111       /* P is product->B */
7112       PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7113       PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
7114       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7115       PetscCall(MatProductSetFill(mp[cp],product->fill));
7116       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7117       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7118       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7119       mp[cp]->product->api_user = product->api_user;
7120       PetscCall(MatProductSetFromOptions(mp[cp]));
7121       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7122       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7123       PetscCall(ISGetIndices(glob,&globidx));
7124       rmapt[cp] = 1;
7125       cmapt[cp] = 2;
7126       cmapa[cp] = globidx;
7127       mptmp[cp] = PETSC_FALSE;
7128       cp++;
7129     } else { /* A_diag * P_diag and A_diag * P_off */
7130       PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp]));
7131       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7132       PetscCall(MatProductSetFill(mp[cp],product->fill));
7133       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7134       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7135       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7136       mp[cp]->product->api_user = product->api_user;
7137       PetscCall(MatProductSetFromOptions(mp[cp]));
7138       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7139       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7140       rmapt[cp] = 1;
7141       cmapt[cp] = 1;
7142       mptmp[cp] = PETSC_FALSE;
7143       cp++;
7144       PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp]));
7145       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7146       PetscCall(MatProductSetFill(mp[cp],product->fill));
7147       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7148       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7149       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7150       mp[cp]->product->api_user = product->api_user;
7151       PetscCall(MatProductSetFromOptions(mp[cp]));
7152       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7153       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7154       rmapt[cp] = 1;
7155       cmapt[cp] = 2;
7156       cmapa[cp] = p->garray;
7157       mptmp[cp] = PETSC_FALSE;
7158       cp++;
7159     }
7160 
7161     /* A_off * P_other */
7162     if (mmdata->P_oth) {
7163       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */
7164       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7165       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7166       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7167       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7168       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7169       PetscCall(MatProductSetFill(mp[cp],product->fill));
7170       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7171       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7172       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7173       mp[cp]->product->api_user = product->api_user;
7174       PetscCall(MatProductSetFromOptions(mp[cp]));
7175       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7176       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7177       rmapt[cp] = 1;
7178       cmapt[cp] = 2;
7179       cmapa[cp] = P_oth_idx;
7180       mptmp[cp] = PETSC_FALSE;
7181       cp++;
7182     }
7183     break;
7184 
7185   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7186     /* A is product->B */
7187     PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7188     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7189       PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]));
7190       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7191       PetscCall(MatProductSetFill(mp[cp],product->fill));
7192       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7193       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7194       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7195       mp[cp]->product->api_user = product->api_user;
7196       PetscCall(MatProductSetFromOptions(mp[cp]));
7197       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7198       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7199       PetscCall(ISGetIndices(glob,&globidx));
7200       rmapt[cp] = 2;
7201       rmapa[cp] = globidx;
7202       cmapt[cp] = 2;
7203       cmapa[cp] = globidx;
7204       mptmp[cp] = PETSC_FALSE;
7205       cp++;
7206     } else {
7207       PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]));
7208       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7209       PetscCall(MatProductSetFill(mp[cp],product->fill));
7210       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7211       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7212       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7213       mp[cp]->product->api_user = product->api_user;
7214       PetscCall(MatProductSetFromOptions(mp[cp]));
7215       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7216       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7217       PetscCall(ISGetIndices(glob,&globidx));
7218       rmapt[cp] = 1;
7219       cmapt[cp] = 2;
7220       cmapa[cp] = globidx;
7221       mptmp[cp] = PETSC_FALSE;
7222       cp++;
7223       PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]));
7224       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7225       PetscCall(MatProductSetFill(mp[cp],product->fill));
7226       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7227       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7228       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7229       mp[cp]->product->api_user = product->api_user;
7230       PetscCall(MatProductSetFromOptions(mp[cp]));
7231       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7232       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7233       rmapt[cp] = 2;
7234       rmapa[cp] = p->garray;
7235       cmapt[cp] = 2;
7236       cmapa[cp] = globidx;
7237       mptmp[cp] = PETSC_FALSE;
7238       cp++;
7239     }
7240     break;
7241   case MATPRODUCT_PtAP:
7242     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
7243     /* P is product->B */
7244     PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7245     PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
7246     PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP));
7247     PetscCall(MatProductSetFill(mp[cp],product->fill));
7248     PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7249     PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7250     PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7251     mp[cp]->product->api_user = product->api_user;
7252     PetscCall(MatProductSetFromOptions(mp[cp]));
7253     PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7254     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7255     PetscCall(ISGetIndices(glob,&globidx));
7256     rmapt[cp] = 2;
7257     rmapa[cp] = globidx;
7258     cmapt[cp] = 2;
7259     cmapa[cp] = globidx;
7260     mptmp[cp] = PETSC_FALSE;
7261     cp++;
7262     if (mmdata->P_oth) {
7263       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g));
7264       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7265       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7266       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7267       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7268       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7269       PetscCall(MatProductSetFill(mp[cp],product->fill));
7270       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7271       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7272       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7273       mp[cp]->product->api_user = product->api_user;
7274       PetscCall(MatProductSetFromOptions(mp[cp]));
7275       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7276       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7277       mptmp[cp] = PETSC_TRUE;
7278       cp++;
7279       PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]));
7280       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7281       PetscCall(MatProductSetFill(mp[cp],product->fill));
7282       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7283       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7284       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7285       mp[cp]->product->api_user = product->api_user;
7286       PetscCall(MatProductSetFromOptions(mp[cp]));
7287       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7288       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7289       rmapt[cp] = 2;
7290       rmapa[cp] = globidx;
7291       cmapt[cp] = 2;
7292       cmapa[cp] = P_oth_idx;
7293       mptmp[cp] = PETSC_FALSE;
7294       cp++;
7295     }
7296     break;
7297   default:
7298     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
7299   }
7300   /* sanity check */
7301   if (size > 1) for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i);
7302 
7303   PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp));
7304   for (i = 0; i < cp; i++) {
7305     mmdata->mp[i]    = mp[i];
7306     mmdata->mptmp[i] = mptmp[i];
7307   }
7308   mmdata->cp = cp;
7309   C->product->data       = mmdata;
7310   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7311   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7312 
7313   /* memory type */
7314   mmdata->mtype = PETSC_MEMTYPE_HOST;
7315   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,""));
7316   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,""));
7317   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7318   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7319 
7320   /* prepare coo coordinates for values insertion */
7321 
7322   /* count total nonzeros of those intermediate seqaij Mats
7323     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7324     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7325     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7326   */
7327   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7328     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7329     if (mptmp[cp]) continue;
7330     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7331       const PetscInt *rmap = rmapa[cp];
7332       const PetscInt mr = mp[cp]->rmap->n;
7333       const PetscInt rs = C->rmap->rstart;
7334       const PetscInt re = C->rmap->rend;
7335       const PetscInt *ii  = mm->i;
7336       for (i = 0; i < mr; i++) {
7337         const PetscInt gr = rmap[i];
7338         const PetscInt nz = ii[i+1] - ii[i];
7339         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7340         else ncoo_oown += nz; /* this row is local */
7341       }
7342     } else ncoo_d += mm->nz;
7343   }
7344 
7345   /*
7346     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7347 
7348     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7349 
7350     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
7351 
7352     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7353     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7354     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7355 
7356     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7357     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7358   */
7359   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */
7360   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own));
7361 
7362   /* gather (i,j) of nonzeros inserted by remote procs */
7363   if (hasoffproc) {
7364     PetscSF  msf;
7365     PetscInt ncoo2,*coo_i2,*coo_j2;
7366 
7367     PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0]));
7368     PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0]));
7369     PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */
7370 
7371     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7372       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7373       PetscInt   *idxoff = mmdata->off[cp];
7374       PetscInt   *idxown = mmdata->own[cp];
7375       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7376         const PetscInt *rmap = rmapa[cp];
7377         const PetscInt *cmap = cmapa[cp];
7378         const PetscInt *ii  = mm->i;
7379         PetscInt       *coi = coo_i + ncoo_o;
7380         PetscInt       *coj = coo_j + ncoo_o;
7381         const PetscInt mr = mp[cp]->rmap->n;
7382         const PetscInt rs = C->rmap->rstart;
7383         const PetscInt re = C->rmap->rend;
7384         const PetscInt cs = C->cmap->rstart;
7385         for (i = 0; i < mr; i++) {
7386           const PetscInt *jj = mm->j + ii[i];
7387           const PetscInt gr  = rmap[i];
7388           const PetscInt nz  = ii[i+1] - ii[i];
7389           if (gr < rs || gr >= re) { /* this is an offproc row */
7390             for (j = ii[i]; j < ii[i+1]; j++) {
7391               *coi++ = gr;
7392               *idxoff++ = j;
7393             }
7394             if (!cmapt[cp]) { /* already global */
7395               for (j = 0; j < nz; j++) *coj++ = jj[j];
7396             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7397               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7398             } else { /* offdiag */
7399               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7400             }
7401             ncoo_o += nz;
7402           } else { /* this is a local row */
7403             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
7404           }
7405         }
7406       }
7407       mmdata->off[cp + 1] = idxoff;
7408       mmdata->own[cp + 1] = idxown;
7409     }
7410 
7411     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7412     PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i));
7413     PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf));
7414     PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL));
7415     ncoo = ncoo_d + ncoo_oown + ncoo2;
7416     PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2));
7417     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7418     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown));
7419     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7420     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7421     PetscCall(PetscFree2(coo_i,coo_j));
7422     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7423     PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w));
7424     coo_i = coo_i2;
7425     coo_j = coo_j2;
7426   } else { /* no offproc values insertion */
7427     ncoo = ncoo_d;
7428     PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j));
7429 
7430     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7431     PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER));
7432     PetscCall(PetscSFSetUp(mmdata->sf));
7433   }
7434   mmdata->hasoffproc = hasoffproc;
7435 
7436   /* gather (i,j) of nonzeros inserted locally */
7437   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7438     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
7439     PetscInt       *coi = coo_i + ncoo_d;
7440     PetscInt       *coj = coo_j + ncoo_d;
7441     const PetscInt *jj  = mm->j;
7442     const PetscInt *ii  = mm->i;
7443     const PetscInt *cmap = cmapa[cp];
7444     const PetscInt *rmap = rmapa[cp];
7445     const PetscInt mr = mp[cp]->rmap->n;
7446     const PetscInt rs = C->rmap->rstart;
7447     const PetscInt re = C->rmap->rend;
7448     const PetscInt cs = C->cmap->rstart;
7449 
7450     if (mptmp[cp]) continue;
7451     if (rmapt[cp] == 1) { /* consecutive rows */
7452       /* fill coo_i */
7453       for (i = 0; i < mr; i++) {
7454         const PetscInt gr = i + rs;
7455         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
7456       }
7457       /* fill coo_j */
7458       if (!cmapt[cp]) { /* type-0, already global */
7459         PetscCall(PetscArraycpy(coj,jj,mm->nz));
7460       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
7461         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7462       } else { /* type-2, local to global for sparse columns */
7463         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7464       }
7465       ncoo_d += mm->nz;
7466     } else if (rmapt[cp] == 2) { /* sparse rows */
7467       for (i = 0; i < mr; i++) {
7468         const PetscInt *jj = mm->j + ii[i];
7469         const PetscInt gr  = rmap[i];
7470         const PetscInt nz  = ii[i+1] - ii[i];
7471         if (gr >= rs && gr < re) { /* local rows */
7472           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
7473           if (!cmapt[cp]) { /* type-0, already global */
7474             for (j = 0; j < nz; j++) *coj++ = jj[j];
7475           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7476             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7477           } else { /* type-2, local to global for sparse columns */
7478             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7479           }
7480           ncoo_d += nz;
7481         }
7482       }
7483     }
7484   }
7485   if (glob) {
7486     PetscCall(ISRestoreIndices(glob,&globidx));
7487   }
7488   PetscCall(ISDestroy(&glob));
7489   if (P_oth_l2g) {
7490     PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx));
7491   }
7492   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7493   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7494   PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v));
7495 
7496   /* preallocate with COO data */
7497   PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j));
7498   PetscCall(PetscFree2(coo_i,coo_j));
7499   PetscFunctionReturn(0);
7500 }
7501 
7502 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7503 {
7504   Mat_Product *product = mat->product;
7505 #if defined(PETSC_HAVE_DEVICE)
7506   PetscBool    match   = PETSC_FALSE;
7507   PetscBool    usecpu  = PETSC_FALSE;
7508 #else
7509   PetscBool    match   = PETSC_TRUE;
7510 #endif
7511 
7512   PetscFunctionBegin;
7513   MatCheckProduct(mat,1);
7514 #if defined(PETSC_HAVE_DEVICE)
7515   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7516     PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match));
7517   }
7518   if (match) { /* we can always fallback to the CPU if requested */
7519     switch (product->type) {
7520     case MATPRODUCT_AB:
7521       if (product->api_user) {
7522         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");
7523         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7524         PetscOptionsEnd();
7525       } else {
7526         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");
7527         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7528         PetscOptionsEnd();
7529       }
7530       break;
7531     case MATPRODUCT_AtB:
7532       if (product->api_user) {
7533         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");
7534         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7535         PetscOptionsEnd();
7536       } else {
7537         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");
7538         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7539         PetscOptionsEnd();
7540       }
7541       break;
7542     case MATPRODUCT_PtAP:
7543       if (product->api_user) {
7544         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");
7545         PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7546         PetscOptionsEnd();
7547       } else {
7548         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");
7549         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7550         PetscOptionsEnd();
7551       }
7552       break;
7553     default:
7554       break;
7555     }
7556     match = (PetscBool)!usecpu;
7557   }
7558 #endif
7559   if (match) {
7560     switch (product->type) {
7561     case MATPRODUCT_AB:
7562     case MATPRODUCT_AtB:
7563     case MATPRODUCT_PtAP:
7564       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7565       break;
7566     default:
7567       break;
7568     }
7569   }
7570   /* fallback to MPIAIJ ops */
7571   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7572   PetscFunctionReturn(0);
7573 }
7574 
7575 /*
7576    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7577 
7578    n - the number of block indices in cc[]
7579    cc - the block indices (must be large enough to contain the indices)
7580 */
7581 static inline PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc)
7582 {
7583   PetscInt       cnt = -1,nidx,j;
7584   const PetscInt *idx;
7585 
7586   PetscFunctionBegin;
7587   PetscCall(MatGetRow(Amat,row,&nidx,&idx,NULL));
7588   if (nidx) {
7589     cnt = 0;
7590     cc[cnt] = idx[0]/bs;
7591     for (j=1; j<nidx; j++) {
7592       if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs;
7593     }
7594   }
7595   PetscCall(MatRestoreRow(Amat,row,&nidx,&idx,NULL));
7596   *n = cnt+1;
7597   PetscFunctionReturn(0);
7598 }
7599 
7600 /*
7601     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7602 
7603     ncollapsed - the number of block indices
7604     collapsed - the block indices (must be large enough to contain the indices)
7605 */
7606 static inline PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed)
7607 {
7608   PetscInt       i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp;
7609 
7610   PetscFunctionBegin;
7611   PetscCall(MatCollapseRow(Amat,start,bs,&nprev,cprev));
7612   for (i=start+1; i<start+bs; i++) {
7613     PetscCall(MatCollapseRow(Amat,i,bs,&ncur,ccur));
7614     PetscCall(PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged));
7615     cprevtmp = cprev; cprev = merged; merged = cprevtmp;
7616   }
7617   *ncollapsed = nprev;
7618   if (collapsed) *collapsed  = cprev;
7619   PetscFunctionReturn(0);
7620 }
7621 
7622 /* -------------------------------------------------------------------------- */
7623 /*
7624  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7625 
7626  Input Parameter:
7627  . Amat - matrix
7628  - symmetrize - make the result symmetric
7629  + scale - scale with diagonal
7630 
7631  Output Parameter:
7632  . a_Gmat - output scalar graph >= 0
7633 
7634  */
7635 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, Mat *a_Gmat)
7636 {
7637   PetscInt       Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs;
7638   MPI_Comm       comm;
7639   Mat            Gmat;
7640   PetscBool      ismpiaij,isseqaij;
7641   Mat            a, b, c;
7642   MatType        jtype;
7643 
7644   PetscFunctionBegin;
7645   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
7646   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7647   PetscCall(MatGetSize(Amat, &MM, &NN));
7648   PetscCall(MatGetBlockSize(Amat, &bs));
7649   nloc = (Iend-Istart)/bs;
7650 
7651   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij));
7652   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij));
7653   PetscCheck(isseqaij || ismpiaij,comm,PETSC_ERR_USER,"Require (MPI)AIJ matrix type");
7654 
7655   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7656   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7657      implementation */
7658   if (bs > 1) {
7659     PetscCall(MatGetType(Amat,&jtype));
7660     PetscCall(MatCreate(comm, &Gmat));
7661     PetscCall(MatSetType(Gmat, jtype));
7662     PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE));
7663     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7664     if (isseqaij || ((Mat_MPIAIJ*)Amat->data)->garray) {
7665       PetscInt  *d_nnz, *o_nnz;
7666       MatScalar *aa,val,AA[4096];
7667       PetscInt  *aj,*ai,AJ[4096],nc;
7668       if (isseqaij) { a = Amat; b = NULL; }
7669       else {
7670         Mat_MPIAIJ *d = (Mat_MPIAIJ*)Amat->data;
7671         a = d->A; b = d->B;
7672       }
7673       PetscCall(PetscInfo(Amat,"New bs>1 Graph. nloc=%" PetscInt_FMT "\n",nloc));
7674       PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz));
7675       for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
7676         PetscInt       *nnz = (c==a) ? d_nnz : o_nnz, nmax=0;
7677         const PetscInt *cols;
7678         for (PetscInt brow=0,jj,ok=1,j0; brow < nloc*bs; brow += bs) { // block rows
7679           PetscCall(MatGetRow(c,brow,&jj,&cols,NULL));
7680           nnz[brow/bs] = jj/bs;
7681           if (jj%bs) ok = 0;
7682           if (cols) j0 = cols[0];
7683           else j0 = -1;
7684           PetscCall(MatRestoreRow(c,brow,&jj,&cols,NULL));
7685           if (nnz[brow/bs]>nmax) nmax = nnz[brow/bs];
7686           for (PetscInt ii=1; ii < bs && nnz[brow/bs] ; ii++) { // check for non-dense blocks
7687             PetscCall(MatGetRow(c,brow+ii,&jj,&cols,NULL));
7688             if (jj%bs) ok = 0;
7689             if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0;
7690             if (nnz[brow/bs] != jj/bs) ok = 0;
7691             PetscCall(MatRestoreRow(c,brow+ii,&jj,&cols,NULL));
7692           }
7693           if (!ok) {
7694             PetscCall(PetscFree2(d_nnz,o_nnz));
7695             goto old_bs;
7696           }
7697         }
7698         PetscCheck(nmax<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer %" PetscInt_FMT " too small 4096.",nmax);
7699       }
7700       PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz));
7701       PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz));
7702       PetscCall(PetscFree2(d_nnz,o_nnz));
7703       // diag
7704       for (PetscInt brow=0,n,grow; brow < nloc*bs; brow += bs) { // block rows
7705         Mat_SeqAIJ *aseq  = (Mat_SeqAIJ*)a->data;
7706         ai = aseq->i;
7707         n  = ai[brow+1] - ai[brow];
7708         aj = aseq->j + ai[brow];
7709         for (int k=0; k<n; k += bs) { // block columns
7710           AJ[k/bs] = aj[k]/bs + Istart/bs; // diag starts at (Istart,Istart)
7711           val = 0;
7712           for (int ii=0; ii<bs; ii++) { // rows in block
7713             aa = aseq->a + ai[brow+ii] + k;
7714             for (int jj=0; jj<bs; jj++) { // columns in block
7715               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7716             }
7717           }
7718           AA[k/bs] = val;
7719         }
7720         grow = Istart/bs + brow/bs;
7721         PetscCall(MatSetValues(Gmat,1,&grow,n/bs,AJ,AA,INSERT_VALUES));
7722       }
7723       // off-diag
7724       if (ismpiaij) {
7725         Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)Amat->data;
7726         const PetscScalar *vals;
7727         const PetscInt    *cols, *garray = aij->garray;
7728         PetscCheck(garray,PETSC_COMM_SELF,PETSC_ERR_USER,"No garray ?");
7729         for (PetscInt brow=0,grow; brow < nloc*bs; brow += bs) { // block rows
7730           PetscCall(MatGetRow(b,brow,&ncols,&cols,NULL));
7731           for (int k=0,cidx=0 ; k < ncols ; k += bs, cidx++) {
7732             AA[k/bs] = 0;
7733             AJ[cidx] = garray[cols[k]]/bs;
7734           }
7735           nc = ncols/bs;
7736           PetscCall(MatRestoreRow(b,brow,&ncols,&cols,NULL));
7737           for (int ii=0; ii<bs; ii++) { // rows in block
7738             PetscCall(MatGetRow(b,brow+ii,&ncols,&cols,&vals));
7739             for (int k=0; k<ncols; k += bs) {
7740               for (int jj=0; jj<bs; jj++) { // cols in block
7741                 AA[k/bs] += PetscAbs(PetscRealPart(vals[k+jj]));
7742               }
7743             }
7744             PetscCall(MatRestoreRow(b,brow+ii,&ncols,&cols,&vals));
7745           }
7746           grow = Istart/bs + brow/bs;
7747           PetscCall(MatSetValues(Gmat,1,&grow,nc,AJ,AA,INSERT_VALUES));
7748         }
7749       }
7750       PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY));
7751       PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY));
7752     } else {
7753       const PetscScalar *vals;
7754       const PetscInt    *idx;
7755       PetscInt          *d_nnz, *o_nnz,*w0,*w1,*w2;
7756       old_bs:
7757       /*
7758        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7759        */
7760       PetscCall(PetscInfo(Amat,"OLD bs>1 CreateGraph\n"));
7761       PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz));
7762       if (isseqaij) {
7763         PetscInt max_d_nnz;
7764         /*
7765          Determine exact preallocation count for (sequential) scalar matrix
7766          */
7767         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz));
7768         max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
7769         PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2));
7770         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) {
7771           PetscCall(MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL));
7772         }
7773         PetscCall(PetscFree3(w0,w1,w2));
7774       } else if (ismpiaij) {
7775         Mat            Daij,Oaij;
7776         const PetscInt *garray;
7777         PetscInt       max_d_nnz;
7778         PetscCall(MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray));
7779         /*
7780          Determine exact preallocation count for diagonal block portion of scalar matrix
7781          */
7782         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz));
7783         max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
7784         PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2));
7785         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7786           PetscCall(MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL));
7787         }
7788         PetscCall(PetscFree3(w0,w1,w2));
7789         /*
7790          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7791          */
7792         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7793           o_nnz[jj] = 0;
7794           for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */
7795             PetscCall(MatGetRow(Oaij,Ii+kk,&ncols,NULL,NULL));
7796             o_nnz[jj] += ncols;
7797             PetscCall(MatRestoreRow(Oaij,Ii+kk,&ncols,NULL,NULL));
7798           }
7799           if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc;
7800         }
7801       } else SETERRQ(comm,PETSC_ERR_USER,"Require AIJ matrix type");
7802       /* get scalar copy (norms) of matrix */
7803       PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz));
7804       PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz));
7805       PetscCall(PetscFree2(d_nnz,o_nnz));
7806       for (Ii = Istart; Ii < Iend; Ii++) {
7807         PetscInt dest_row = Ii/bs;
7808         PetscCall(MatGetRow(Amat,Ii,&ncols,&idx,&vals));
7809         for (jj=0; jj<ncols; jj++) {
7810           PetscInt    dest_col = idx[jj]/bs;
7811           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7812           PetscCall(MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES));
7813         }
7814         PetscCall(MatRestoreRow(Amat,Ii,&ncols,&idx,&vals));
7815       }
7816       PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY));
7817       PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY));
7818     }
7819   } else {
7820     /* TODO GPU: optimization proposal, each class provides fast implementation of this
7821      procedure via MatAbs API */
7822     /* just copy scalar matrix & abs() */
7823     PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7824     if (isseqaij) { a = Gmat; b = NULL; }
7825     else {
7826       Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data;
7827       a = d->A; b = d->B;
7828     }
7829     /* abs */
7830     for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
7831       MatInfo     info;
7832       PetscScalar *avals;
7833       PetscCall(MatGetInfo(c,MAT_LOCAL,&info));
7834       PetscCall(MatSeqAIJGetArray(c,&avals));
7835       for (int jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7836       PetscCall(MatSeqAIJRestoreArray(c,&avals));
7837     }
7838   }
7839   if (symmetrize) {
7840     PetscBool issym;
7841     PetscCall(MatGetOption(Amat,MAT_SYMMETRIC,&issym));
7842     if (!issym) {
7843       Mat matTrans;
7844       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
7845       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
7846       PetscCall(MatDestroy(&matTrans));
7847     }
7848     PetscCall(MatSetOption(Gmat,MAT_SYMMETRIC,PETSC_TRUE));
7849   } else {
7850     PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
7851   }
7852   if (scale) {
7853     /* scale c for all diagonal values = 1 or -1 */
7854     Vec               diag;
7855     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
7856     PetscCall(MatGetDiagonal(Gmat, diag));
7857     PetscCall(VecReciprocal(diag));
7858     PetscCall(VecSqrtAbs(diag));
7859     PetscCall(MatDiagonalScale(Gmat, diag, diag));
7860     PetscCall(VecDestroy(&diag));
7861   }
7862   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
7863   *a_Gmat = Gmat;
7864   PetscFunctionReturn(0);
7865 }
7866 
7867 /* -------------------------------------------------------------------------- */
7868 /*@C
7869    MatFilter_AIJ - filter values with small absolute values
7870      With vfilter < 0 does nothing so should not be called.
7871 
7872    Collective on Mat
7873 
7874    Input Parameters:
7875 +   Gmat - the graph
7876 .   vfilter - threshold parameter [0,1)
7877 
7878  Output Parameter:
7879  .  filteredG - output filtered scalar graph
7880 
7881    Level: developer
7882 
7883    Notes:
7884     This is called before graph coarsers are called.
7885     This could go into Mat, move 'symm' to GAMG
7886 
7887 .seealso: `PCGAMGSetThreshold()`
7888 @*/
7889 PETSC_INTERN PetscErrorCode MatFilter_AIJ(Mat Gmat,PetscReal vfilter, Mat *filteredG)
7890 {
7891   PetscInt          Istart,Iend,ncols,nnz0,nnz1, NN, MM, nloc;
7892   Mat               tGmat;
7893   MPI_Comm          comm;
7894   const PetscScalar *vals;
7895   const PetscInt    *idx;
7896   PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols=0;
7897   MatScalar         *AA; // this is checked in graph
7898   PetscBool         isseqaij;
7899   Mat               a, b, c;
7900   MatType           jtype;
7901 
7902   PetscFunctionBegin;
7903   PetscCall(PetscObjectGetComm((PetscObject)Gmat,&comm));
7904   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isseqaij));
7905   PetscCall(MatGetType(Gmat,&jtype));
7906   PetscCall(MatCreate(comm, &tGmat));
7907   PetscCall(MatSetType(tGmat, jtype));
7908 
7909   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
7910                Also, if the matrix is symmetric, can we skip this
7911                operation? It can be very expensive on large matrices. */
7912 
7913   // global sizes
7914   PetscCall(MatGetSize(Gmat, &MM, &NN));
7915   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
7916   nloc = Iend - Istart;
7917   PetscCall(PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz));
7918   if (isseqaij) { a = Gmat; b = NULL; }
7919   else {
7920     Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data;
7921     a = d->A; b = d->B;
7922     garray = d->garray;
7923   }
7924   /* Determine upper bound on non-zeros needed in new filtered matrix */
7925   for (PetscInt row=0; row < nloc; row++) {
7926     PetscCall(MatGetRow(a,row,&ncols,NULL,NULL));
7927     d_nnz[row] = ncols;
7928     if (ncols>maxcols) maxcols=ncols;
7929     PetscCall(MatRestoreRow(a,row,&ncols,NULL,NULL));
7930   }
7931   if (b) {
7932     for (PetscInt row=0; row < nloc; row++) {
7933       PetscCall(MatGetRow(b,row,&ncols,NULL,NULL));
7934       o_nnz[row] = ncols;
7935       if (ncols>maxcols) maxcols=ncols;
7936       PetscCall(MatRestoreRow(b,row,&ncols,NULL,NULL));
7937     }
7938   }
7939   PetscCall(MatSetSizes(tGmat,nloc,nloc,MM,MM));
7940   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
7941   PetscCall(MatSeqAIJSetPreallocation(tGmat,0,d_nnz));
7942   PetscCall(MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz));
7943   PetscCall(MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
7944   PetscCall(PetscFree2(d_nnz,o_nnz));
7945   //
7946   PetscCall(PetscMalloc2(maxcols, &AA,maxcols, &AJ));
7947   nnz0 = nnz1 = 0;
7948   for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
7949     for (PetscInt row=0, grow=Istart, ncol_row, jj ; row < nloc; row++,grow++) {
7950       PetscCall(MatGetRow(c,row,&ncols,&idx,&vals));
7951       for (ncol_row=jj=0; jj<ncols; jj++,nnz0++) {
7952         PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
7953         if (PetscRealPart(sv) > vfilter) {
7954           nnz1++;
7955           PetscInt cid = idx[jj] + Istart; //diag
7956           if (c!=a) cid = garray[idx[jj]];
7957           AA[ncol_row] = vals[jj];
7958           AJ[ncol_row] = cid;
7959           ncol_row++;
7960         }
7961       }
7962       PetscCall(MatRestoreRow(c,row,&ncols,&idx,&vals));
7963       PetscCall(MatSetValues(tGmat,1,&grow,ncol_row,AJ,AA,INSERT_VALUES));
7964     }
7965   }
7966   PetscCall(PetscFree2(AA,AJ));
7967   PetscCall(MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY));
7968   PetscCall(MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY));
7969   PetscCall(MatPropagateSymmetryOptions(Gmat,tGmat)); /* Normal Mat options are not relevant ? */
7970 
7971   PetscCall(PetscInfo(tGmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n",
7972                       (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, (double)vfilter,
7973                       (!nloc) ? 1. : (double)nnz0/(double)nloc,MM,(int)maxcols));
7974 
7975   *filteredG = tGmat;
7976   PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view"));
7977   PetscFunctionReturn(0);
7978 }
7979 
7980 /*
7981     Special version for direct calls from Fortran
7982 */
7983 #include <petsc/private/fortranimpl.h>
7984 
7985 /* Change these macros so can be used in void function */
7986 /* Identical to PetscCallVoid, except it assigns to *_ierr */
7987 #undef  PetscCall
7988 #define PetscCall(...) do {                                                                    \
7989     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__;                                              \
7990     if (PetscUnlikely(ierr_msv_mpiaij)) {                                                      \
7991       *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \
7992       return;                                                                                  \
7993     }                                                                                          \
7994   } while (0)
7995 
7996 #undef SETERRQ
7997 #define SETERRQ(comm,ierr,...) do {                                                            \
7998     *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \
7999     return;                                                                                    \
8000   } while (0)
8001 
8002 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8003 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8004 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8005 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8006 #else
8007 #endif
8008 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
8009 {
8010   Mat          mat  = *mmat;
8011   PetscInt     m    = *mm, n = *mn;
8012   InsertMode   addv = *maddv;
8013   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ*)mat->data;
8014   PetscScalar  value;
8015 
8016   MatCheckPreallocated(mat,1);
8017   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8018   else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
8019   {
8020     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
8021     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
8022     PetscBool roworiented = aij->roworiented;
8023 
8024     /* Some Variables required in the macro */
8025     Mat        A                    = aij->A;
8026     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
8027     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
8028     MatScalar  *aa;
8029     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8030     Mat        B                    = aij->B;
8031     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
8032     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
8033     MatScalar  *ba;
8034     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8035      * cannot use "#if defined" inside a macro. */
8036     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8037 
8038     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
8039     PetscInt  nonew = a->nonew;
8040     MatScalar *ap1,*ap2;
8041 
8042     PetscFunctionBegin;
8043     PetscCall(MatSeqAIJGetArray(A,&aa));
8044     PetscCall(MatSeqAIJGetArray(B,&ba));
8045     for (i=0; i<m; i++) {
8046       if (im[i] < 0) continue;
8047       PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
8048       if (im[i] >= rstart && im[i] < rend) {
8049         row      = im[i] - rstart;
8050         lastcol1 = -1;
8051         rp1      = aj + ai[row];
8052         ap1      = aa + ai[row];
8053         rmax1    = aimax[row];
8054         nrow1    = ailen[row];
8055         low1     = 0;
8056         high1    = nrow1;
8057         lastcol2 = -1;
8058         rp2      = bj + bi[row];
8059         ap2      = ba + bi[row];
8060         rmax2    = bimax[row];
8061         nrow2    = bilen[row];
8062         low2     = 0;
8063         high2    = nrow2;
8064 
8065         for (j=0; j<n; j++) {
8066           if (roworiented) value = v[i*n+j];
8067           else value = v[i+j*m];
8068           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8069           if (in[j] >= cstart && in[j] < cend) {
8070             col = in[j] - cstart;
8071             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
8072           } else if (in[j] < 0) continue;
8073           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8074             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
8075             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
8076           } else {
8077             if (mat->was_assembled) {
8078               if (!aij->colmap) {
8079                 PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8080               }
8081 #if defined(PETSC_USE_CTABLE)
8082               PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col));
8083               col--;
8084 #else
8085               col = aij->colmap[in[j]] - 1;
8086 #endif
8087               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
8088                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8089                 col  =  in[j];
8090                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8091                 B        = aij->B;
8092                 b        = (Mat_SeqAIJ*)B->data;
8093                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
8094                 rp2      = bj + bi[row];
8095                 ap2      = ba + bi[row];
8096                 rmax2    = bimax[row];
8097                 nrow2    = bilen[row];
8098                 low2     = 0;
8099                 high2    = nrow2;
8100                 bm       = aij->B->rmap->n;
8101                 ba       = b->a;
8102                 inserted = PETSC_FALSE;
8103               }
8104             } else col = in[j];
8105             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
8106           }
8107         }
8108       } else if (!aij->donotstash) {
8109         if (roworiented) {
8110           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8111         } else {
8112           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8113         }
8114       }
8115     }
8116     PetscCall(MatSeqAIJRestoreArray(A,&aa));
8117     PetscCall(MatSeqAIJRestoreArray(B,&ba));
8118   }
8119   PetscFunctionReturnVoid();
8120 }
8121 
8122 /* Undefining these here since they were redefined from their original definition above! No
8123  * other PETSc functions should be defined past this point, as it is impossible to recover the
8124  * original definitions */
8125 #undef PetscCall
8126 #undef SETERRQ
8127