xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 2292213e75f50eade7dffbb625f9e7b8550bf661)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62 
63   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
64    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
65    * to differ from the parent matrix. */
66   if (a->lvec) {
67     ierr = VecBindToCPU(a->lvec,flg);CHKERRQ(ierr);
68   }
69   if (a->diag) {
70     ierr = VecBindToCPU(a->diag,flg);CHKERRQ(ierr);
71   }
72 
73   PetscFunctionReturn(0);
74 }
75 
76 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
77 {
78   PetscErrorCode ierr;
79   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
80 
81   PetscFunctionBegin;
82   if (mat->A) {
83     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
84     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
85   }
86   PetscFunctionReturn(0);
87 }
88 
89 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
90 {
91   PetscErrorCode  ierr;
92   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
93   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
94   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
95   const PetscInt  *ia,*ib;
96   const MatScalar *aa,*bb,*aav,*bav;
97   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
98   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
99 
100   PetscFunctionBegin;
101   *keptrows = NULL;
102 
103   ia   = a->i;
104   ib   = b->i;
105   ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr);
106   ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr);
107   for (i=0; i<m; i++) {
108     na = ia[i+1] - ia[i];
109     nb = ib[i+1] - ib[i];
110     if (!na && !nb) {
111       cnt++;
112       goto ok1;
113     }
114     aa = aav + ia[i];
115     for (j=0; j<na; j++) {
116       if (aa[j] != 0.0) goto ok1;
117     }
118     bb = bav + ib[i];
119     for (j=0; j <nb; j++) {
120       if (bb[j] != 0.0) goto ok1;
121     }
122     cnt++;
123 ok1:;
124   }
125   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr);
126   if (!n0rows) {
127     ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
128     ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
129     PetscFunctionReturn(0);
130   }
131   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
132   cnt  = 0;
133   for (i=0; i<m; i++) {
134     na = ia[i+1] - ia[i];
135     nb = ib[i+1] - ib[i];
136     if (!na && !nb) continue;
137     aa = aav + ia[i];
138     for (j=0; j<na;j++) {
139       if (aa[j] != 0.0) {
140         rows[cnt++] = rstart + i;
141         goto ok2;
142       }
143     }
144     bb = bav + ib[i];
145     for (j=0; j<nb; j++) {
146       if (bb[j] != 0.0) {
147         rows[cnt++] = rstart + i;
148         goto ok2;
149       }
150     }
151 ok2:;
152   }
153   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
154   ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
155   ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
156   PetscFunctionReturn(0);
157 }
158 
159 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
160 {
161   PetscErrorCode    ierr;
162   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
163   PetscBool         cong;
164 
165   PetscFunctionBegin;
166   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
167   if (Y->assembled && cong) {
168     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
169   } else {
170     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
171   }
172   PetscFunctionReturn(0);
173 }
174 
175 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
176 {
177   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
178   PetscErrorCode ierr;
179   PetscInt       i,rstart,nrows,*rows;
180 
181   PetscFunctionBegin;
182   *zrows = NULL;
183   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
184   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
185   for (i=0; i<nrows; i++) rows[i] += rstart;
186   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
187   PetscFunctionReturn(0);
188 }
189 
190 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
191 {
192   PetscErrorCode    ierr;
193   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
194   PetscInt          i,m,n,*garray = aij->garray;
195   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
196   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
197   PetscReal         *work;
198   const PetscScalar *dummy;
199 
200   PetscFunctionBegin;
201   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
202   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
203   ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr);
204   ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr);
205   ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr);
206   ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr);
207   if (type == NORM_2) {
208     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
209       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
210     }
211     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
212       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
213     }
214   } else if (type == NORM_1) {
215     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
216       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
217     }
218     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
219       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
220     }
221   } else if (type == NORM_INFINITY) {
222     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
223       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
224     }
225     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
226       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
227     }
228   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
229     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
230       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
231     }
232     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
233       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
234     }
235   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
236     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
237       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
238     }
239     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
240       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
241     }
242   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
243   if (type == NORM_INFINITY) {
244     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
245   } else {
246     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
247   }
248   ierr = PetscFree(work);CHKERRQ(ierr);
249   if (type == NORM_2) {
250     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
251   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
252     for (i=0; i<n; i++) reductions[i] /= m;
253   }
254   PetscFunctionReturn(0);
255 }
256 
257 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
258 {
259   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
260   IS              sis,gis;
261   PetscErrorCode  ierr;
262   const PetscInt  *isis,*igis;
263   PetscInt        n,*iis,nsis,ngis,rstart,i;
264 
265   PetscFunctionBegin;
266   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
267   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
268   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
269   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
270   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
271   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
272 
273   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
274   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
275   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
276   n    = ngis + nsis;
277   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
278   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
279   for (i=0; i<n; i++) iis[i] += rstart;
280   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
281 
282   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
283   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
284   ierr = ISDestroy(&sis);CHKERRQ(ierr);
285   ierr = ISDestroy(&gis);CHKERRQ(ierr);
286   PetscFunctionReturn(0);
287 }
288 
289 /*
290   Local utility routine that creates a mapping from the global column
291 number to the local number in the off-diagonal part of the local
292 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
293 a slightly higher hash table cost; without it it is not scalable (each processor
294 has an order N integer array but is fast to access.
295 */
296 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
297 {
298   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
299   PetscErrorCode ierr;
300   PetscInt       n = aij->B->cmap->n,i;
301 
302   PetscFunctionBegin;
303   PetscCheckFalse(n && !aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
304 #if defined(PETSC_USE_CTABLE)
305   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
306   for (i=0; i<n; i++) {
307     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
308   }
309 #else
310   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
311   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
312   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
313 #endif
314   PetscFunctionReturn(0);
315 }
316 
317 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
318 { \
319     if (col <= lastcol1)  low1 = 0;     \
320     else                 high1 = nrow1; \
321     lastcol1 = col;\
322     while (high1-low1 > 5) { \
323       t = (low1+high1)/2; \
324       if (rp1[t] > col) high1 = t; \
325       else              low1  = t; \
326     } \
327       for (_i=low1; _i<high1; _i++) { \
328         if (rp1[_i] > col) break; \
329         if (rp1[_i] == col) { \
330           if (addv == ADD_VALUES) { \
331             ap1[_i] += value;   \
332             /* Not sure LogFlops will slow dow the code or not */ \
333             (void)PetscLogFlops(1.0);   \
334            } \
335           else                    ap1[_i] = value; \
336           goto a_noinsert; \
337         } \
338       }  \
339       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
340       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
341       PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
342       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
343       N = nrow1++ - 1; a->nz++; high1++; \
344       /* shift up all the later entries in this row */ \
345       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
346       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
347       rp1[_i] = col;  \
348       ap1[_i] = value;  \
349       A->nonzerostate++;\
350       a_noinsert: ; \
351       ailen[row] = nrow1; \
352 }
353 
354 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
355   { \
356     if (col <= lastcol2) low2 = 0;                        \
357     else high2 = nrow2;                                   \
358     lastcol2 = col;                                       \
359     while (high2-low2 > 5) {                              \
360       t = (low2+high2)/2;                                 \
361       if (rp2[t] > col) high2 = t;                        \
362       else             low2  = t;                         \
363     }                                                     \
364     for (_i=low2; _i<high2; _i++) {                       \
365       if (rp2[_i] > col) break;                           \
366       if (rp2[_i] == col) {                               \
367         if (addv == ADD_VALUES) {                         \
368           ap2[_i] += value;                               \
369           (void)PetscLogFlops(1.0);                       \
370         }                                                 \
371         else                    ap2[_i] = value;          \
372         goto b_noinsert;                                  \
373       }                                                   \
374     }                                                     \
375     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
376     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
377     PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
378     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
379     N = nrow2++ - 1; b->nz++; high2++;                    \
380     /* shift up all the later entries in this row */      \
381     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
382     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
383     rp2[_i] = col;                                        \
384     ap2[_i] = value;                                      \
385     B->nonzerostate++;                                    \
386     b_noinsert: ;                                         \
387     bilen[row] = nrow2;                                   \
388   }
389 
390 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
391 {
392   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
393   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
394   PetscErrorCode ierr;
395   PetscInt       l,*garray = mat->garray,diag;
396   PetscScalar    *aa,*ba;
397 
398   PetscFunctionBegin;
399   /* code only works for square matrices A */
400 
401   /* find size of row to the left of the diagonal part */
402   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
403   row  = row - diag;
404   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
405     if (garray[b->j[b->i[row]+l]] > diag) break;
406   }
407   if (l) {
408     ierr = MatSeqAIJGetArray(mat->B,&ba);CHKERRQ(ierr);
409     ierr = PetscArraycpy(ba+b->i[row],v,l);CHKERRQ(ierr);
410     ierr = MatSeqAIJRestoreArray(mat->B,&ba);CHKERRQ(ierr);
411   }
412 
413   /* diagonal part */
414   if (a->i[row+1]-a->i[row]) {
415     ierr = MatSeqAIJGetArray(mat->A,&aa);CHKERRQ(ierr);
416     ierr = PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
417     ierr = MatSeqAIJRestoreArray(mat->A,&aa);CHKERRQ(ierr);
418   }
419 
420   /* right of diagonal part */
421   if (b->i[row+1]-b->i[row]-l) {
422     ierr = MatSeqAIJGetArray(mat->B,&ba);CHKERRQ(ierr);
423     ierr = PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
424     ierr = MatSeqAIJRestoreArray(mat->B,&ba);CHKERRQ(ierr);
425   }
426   PetscFunctionReturn(0);
427 }
428 
429 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
430 {
431   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
432   PetscScalar    value = 0.0;
433   PetscErrorCode ierr;
434   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
435   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
436   PetscBool      roworiented = aij->roworiented;
437 
438   /* Some Variables required in the macro */
439   Mat        A                    = aij->A;
440   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
441   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
442   PetscBool  ignorezeroentries    = a->ignorezeroentries;
443   Mat        B                    = aij->B;
444   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
445   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
446   MatScalar  *aa,*ba;
447   PetscInt   *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
448   PetscInt   nonew;
449   MatScalar  *ap1,*ap2;
450 
451   PetscFunctionBegin;
452   ierr = MatSeqAIJGetArray(A,&aa);CHKERRQ(ierr);
453   ierr = MatSeqAIJGetArray(B,&ba);CHKERRQ(ierr);
454   for (i=0; i<m; i++) {
455     if (im[i] < 0) continue;
456     PetscCheckFalse(im[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
457     if (im[i] >= rstart && im[i] < rend) {
458       row      = im[i] - rstart;
459       lastcol1 = -1;
460       rp1      = aj + ai[row];
461       ap1      = aa + ai[row];
462       rmax1    = aimax[row];
463       nrow1    = ailen[row];
464       low1     = 0;
465       high1    = nrow1;
466       lastcol2 = -1;
467       rp2      = bj + bi[row];
468       ap2      = ba + bi[row];
469       rmax2    = bimax[row];
470       nrow2    = bilen[row];
471       low2     = 0;
472       high2    = nrow2;
473 
474       for (j=0; j<n; j++) {
475         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
476         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
477         if (in[j] >= cstart && in[j] < cend) {
478           col   = in[j] - cstart;
479           nonew = a->nonew;
480           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
481         } else if (in[j] < 0) continue;
482         else PetscCheckFalse(in[j] >= mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
483         else {
484           if (mat->was_assembled) {
485             if (!aij->colmap) {
486               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
487             }
488 #if defined(PETSC_USE_CTABLE)
489             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); /* map global col ids to local ones */
490             col--;
491 #else
492             col = aij->colmap[in[j]] - 1;
493 #endif
494             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
495               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); /* Change aij->B from reduced/local format to expanded/global format */
496               col  =  in[j];
497               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
498               B        = aij->B;
499               b        = (Mat_SeqAIJ*)B->data;
500               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
501               rp2      = bj + bi[row];
502               ap2      = ba + bi[row];
503               rmax2    = bimax[row];
504               nrow2    = bilen[row];
505               low2     = 0;
506               high2    = nrow2;
507               bm       = aij->B->rmap->n;
508               ba       = b->a;
509             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
510               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
511                 ierr = PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
512               } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
513             }
514           } else col = in[j];
515           nonew = b->nonew;
516           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
517         }
518       }
519     } else {
520       PetscCheckFalse(mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
521       if (!aij->donotstash) {
522         mat->assembled = PETSC_FALSE;
523         if (roworiented) {
524           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
525         } else {
526           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
527         }
528       }
529     }
530   }
531   ierr = MatSeqAIJRestoreArray(A,&aa);CHKERRQ(ierr);
532   ierr = MatSeqAIJRestoreArray(B,&ba);CHKERRQ(ierr);
533   PetscFunctionReturn(0);
534 }
535 
536 /*
537     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
538     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
539     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
540 */
541 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
542 {
543   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
544   Mat            A           = aij->A; /* diagonal part of the matrix */
545   Mat            B           = aij->B; /* offdiagonal part of the matrix */
546   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
547   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
548   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
549   PetscInt       *ailen      = a->ilen,*aj = a->j;
550   PetscInt       *bilen      = b->ilen,*bj = b->j;
551   PetscInt       am          = aij->A->rmap->n,j;
552   PetscInt       diag_so_far = 0,dnz;
553   PetscInt       offd_so_far = 0,onz;
554 
555   PetscFunctionBegin;
556   /* Iterate over all rows of the matrix */
557   for (j=0; j<am; j++) {
558     dnz = onz = 0;
559     /*  Iterate over all non-zero columns of the current row */
560     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
561       /* If column is in the diagonal */
562       if (mat_j[col] >= cstart && mat_j[col] < cend) {
563         aj[diag_so_far++] = mat_j[col] - cstart;
564         dnz++;
565       } else { /* off-diagonal entries */
566         bj[offd_so_far++] = mat_j[col];
567         onz++;
568       }
569     }
570     ailen[j] = dnz;
571     bilen[j] = onz;
572   }
573   PetscFunctionReturn(0);
574 }
575 
576 /*
577     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
578     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
579     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
580     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
581     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
582 */
583 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
584 {
585   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
586   Mat            A      = aij->A; /* diagonal part of the matrix */
587   Mat            B      = aij->B; /* offdiagonal part of the matrix */
588   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
589   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
590   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
591   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
592   PetscInt       *ailen = a->ilen,*aj = a->j;
593   PetscInt       *bilen = b->ilen,*bj = b->j;
594   PetscInt       am     = aij->A->rmap->n,j;
595   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
596   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
597   PetscScalar    *aa = a->a,*ba = b->a;
598 
599   PetscFunctionBegin;
600   /* Iterate over all rows of the matrix */
601   for (j=0; j<am; j++) {
602     dnz_row = onz_row = 0;
603     rowstart_offd = full_offd_i[j];
604     rowstart_diag = full_diag_i[j];
605     /*  Iterate over all non-zero columns of the current row */
606     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
607       /* If column is in the diagonal */
608       if (mat_j[col] >= cstart && mat_j[col] < cend) {
609         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
610         aa[rowstart_diag+dnz_row] = mat_a[col];
611         dnz_row++;
612       } else { /* off-diagonal entries */
613         bj[rowstart_offd+onz_row] = mat_j[col];
614         ba[rowstart_offd+onz_row] = mat_a[col];
615         onz_row++;
616       }
617     }
618     ailen[j] = dnz_row;
619     bilen[j] = onz_row;
620   }
621   PetscFunctionReturn(0);
622 }
623 
624 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
625 {
626   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
627   PetscErrorCode ierr;
628   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
629   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
630 
631   PetscFunctionBegin;
632   for (i=0; i<m; i++) {
633     if (idxm[i] < 0) continue; /* negative row */
634     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
635     if (idxm[i] >= rstart && idxm[i] < rend) {
636       row = idxm[i] - rstart;
637       for (j=0; j<n; j++) {
638         if (idxn[j] < 0) continue; /* negative column */
639         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
640         if (idxn[j] >= cstart && idxn[j] < cend) {
641           col  = idxn[j] - cstart;
642           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
643         } else {
644           if (!aij->colmap) {
645             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
646           }
647 #if defined(PETSC_USE_CTABLE)
648           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
649           col--;
650 #else
651           col = aij->colmap[idxn[j]] - 1;
652 #endif
653           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
654           else {
655             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
656           }
657         }
658       }
659     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
660   }
661   PetscFunctionReturn(0);
662 }
663 
664 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
665 {
666   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
667   PetscErrorCode ierr;
668   PetscInt       nstash,reallocs;
669 
670   PetscFunctionBegin;
671   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
672 
673   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
674   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
675   ierr = PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
676   PetscFunctionReturn(0);
677 }
678 
679 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
680 {
681   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
682   PetscErrorCode ierr;
683   PetscMPIInt    n;
684   PetscInt       i,j,rstart,ncols,flg;
685   PetscInt       *row,*col;
686   PetscBool      other_disassembled;
687   PetscScalar    *val;
688 
689   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
690 
691   PetscFunctionBegin;
692   if (!aij->donotstash && !mat->nooffprocentries) {
693     while (1) {
694       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
695       if (!flg) break;
696 
697       for (i=0; i<n;) {
698         /* Now identify the consecutive vals belonging to the same row */
699         for (j=i,rstart=row[j]; j<n; j++) {
700           if (row[j] != rstart) break;
701         }
702         if (j < n) ncols = j-i;
703         else       ncols = n-i;
704         /* Now assemble all these values with a single function call */
705         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
706         i    = j;
707       }
708     }
709     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
710   }
711 #if defined(PETSC_HAVE_DEVICE)
712   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
713   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
714   if (mat->boundtocpu) {
715     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
716     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
717   }
718 #endif
719   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
720   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
721 
722   /* determine if any processor has disassembled, if so we must
723      also disassemble ourself, in order that we may reassemble. */
724   /*
725      if nonzero structure of submatrix B cannot change then we know that
726      no processor disassembled thus we can skip this stuff
727   */
728   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
729     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
730     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
731       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
732     }
733   }
734   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
735     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
736   }
737   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
738 #if defined(PETSC_HAVE_DEVICE)
739   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
740 #endif
741   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
742   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
743 
744   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
745 
746   aij->rowvalues = NULL;
747 
748   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
749 
750   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
751   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
752     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
753     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
754   }
755 #if defined(PETSC_HAVE_DEVICE)
756   mat->offloadmask = PETSC_OFFLOAD_BOTH;
757 #endif
758   PetscFunctionReturn(0);
759 }
760 
761 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
762 {
763   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
764   PetscErrorCode ierr;
765 
766   PetscFunctionBegin;
767   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
768   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
769   PetscFunctionReturn(0);
770 }
771 
772 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
773 {
774   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
775   PetscObjectState sA, sB;
776   PetscInt        *lrows;
777   PetscInt         r, len;
778   PetscBool        cong, lch, gch;
779   PetscErrorCode   ierr;
780 
781   PetscFunctionBegin;
782   /* get locally owned rows */
783   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
784   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
785   /* fix right hand side if needed */
786   if (x && b) {
787     const PetscScalar *xx;
788     PetscScalar       *bb;
789 
790     PetscCheckFalse(!cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
791     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
792     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
793     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
794     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
795     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
796   }
797 
798   sA = mat->A->nonzerostate;
799   sB = mat->B->nonzerostate;
800 
801   if (diag != 0.0 && cong) {
802     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
803     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
804   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
805     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
806     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
807     PetscInt   nnwA, nnwB;
808     PetscBool  nnzA, nnzB;
809 
810     nnwA = aijA->nonew;
811     nnwB = aijB->nonew;
812     nnzA = aijA->keepnonzeropattern;
813     nnzB = aijB->keepnonzeropattern;
814     if (!nnzA) {
815       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
816       aijA->nonew = 0;
817     }
818     if (!nnzB) {
819       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
820       aijB->nonew = 0;
821     }
822     /* Must zero here before the next loop */
823     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
824     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
825     for (r = 0; r < len; ++r) {
826       const PetscInt row = lrows[r] + A->rmap->rstart;
827       if (row >= A->cmap->N) continue;
828       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
829     }
830     aijA->nonew = nnwA;
831     aijB->nonew = nnwB;
832   } else {
833     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
834     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
835   }
836   ierr = PetscFree(lrows);CHKERRQ(ierr);
837   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
838   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
839 
840   /* reduce nonzerostate */
841   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
842   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
843   if (gch) A->nonzerostate++;
844   PetscFunctionReturn(0);
845 }
846 
847 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
848 {
849   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
850   PetscErrorCode    ierr;
851   PetscMPIInt       n = A->rmap->n;
852   PetscInt          i,j,r,m,len = 0;
853   PetscInt          *lrows,*owners = A->rmap->range;
854   PetscMPIInt       p = 0;
855   PetscSFNode       *rrows;
856   PetscSF           sf;
857   const PetscScalar *xx;
858   PetscScalar       *bb,*mask,*aij_a;
859   Vec               xmask,lmask;
860   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
861   const PetscInt    *aj, *ii,*ridx;
862   PetscScalar       *aa;
863 
864   PetscFunctionBegin;
865   /* Create SF where leaves are input rows and roots are owned rows */
866   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
867   for (r = 0; r < n; ++r) lrows[r] = -1;
868   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
869   for (r = 0; r < N; ++r) {
870     const PetscInt idx   = rows[r];
871     PetscCheckFalse(idx < 0 || A->rmap->N <= idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
872     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
873       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
874     }
875     rrows[r].rank  = p;
876     rrows[r].index = rows[r] - owners[p];
877   }
878   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
879   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
880   /* Collect flags for rows to be zeroed */
881   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
882   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
883   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
884   /* Compress and put in row numbers */
885   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
886   /* zero diagonal part of matrix */
887   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
888   /* handle off diagonal part of matrix */
889   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
890   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
891   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
892   for (i=0; i<len; i++) bb[lrows[i]] = 1;
893   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
894   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
895   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
896   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
897   if (x && b) { /* this code is buggy when the row and column layout don't match */
898     PetscBool cong;
899 
900     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
901     PetscCheckFalse(!cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
902     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
903     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
904     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
905     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
906   }
907   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
908   /* remove zeroed rows of off diagonal matrix */
909   ierr = MatSeqAIJGetArray(l->B,&aij_a);CHKERRQ(ierr);
910   ii = aij->i;
911   for (i=0; i<len; i++) {
912     ierr = PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
913   }
914   /* loop over all elements of off process part of matrix zeroing removed columns*/
915   if (aij->compressedrow.use) {
916     m    = aij->compressedrow.nrows;
917     ii   = aij->compressedrow.i;
918     ridx = aij->compressedrow.rindex;
919     for (i=0; i<m; i++) {
920       n  = ii[i+1] - ii[i];
921       aj = aij->j + ii[i];
922       aa = aij_a + ii[i];
923 
924       for (j=0; j<n; j++) {
925         if (PetscAbsScalar(mask[*aj])) {
926           if (b) bb[*ridx] -= *aa*xx[*aj];
927           *aa = 0.0;
928         }
929         aa++;
930         aj++;
931       }
932       ridx++;
933     }
934   } else { /* do not use compressed row format */
935     m = l->B->rmap->n;
936     for (i=0; i<m; i++) {
937       n  = ii[i+1] - ii[i];
938       aj = aij->j + ii[i];
939       aa = aij_a + ii[i];
940       for (j=0; j<n; j++) {
941         if (PetscAbsScalar(mask[*aj])) {
942           if (b) bb[i] -= *aa*xx[*aj];
943           *aa = 0.0;
944         }
945         aa++;
946         aj++;
947       }
948     }
949   }
950   if (x && b) {
951     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
952     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
953   }
954   ierr = MatSeqAIJRestoreArray(l->B,&aij_a);CHKERRQ(ierr);
955   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
956   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
957   ierr = PetscFree(lrows);CHKERRQ(ierr);
958 
959   /* only change matrix nonzero state if pattern was allowed to be changed */
960   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
961     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
962     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
963   }
964   PetscFunctionReturn(0);
965 }
966 
967 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
968 {
969   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
970   PetscErrorCode ierr;
971   PetscInt       nt;
972   VecScatter     Mvctx = a->Mvctx;
973 
974   PetscFunctionBegin;
975   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
976   PetscCheckFalse(nt != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt);
977   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
978   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
979   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
980   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
985 {
986   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
987   PetscErrorCode ierr;
988 
989   PetscFunctionBegin;
990   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
991   PetscFunctionReturn(0);
992 }
993 
994 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
995 {
996   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
997   PetscErrorCode ierr;
998   VecScatter     Mvctx = a->Mvctx;
999 
1000   PetscFunctionBegin;
1001   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1003   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1004   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1009 {
1010   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1011   PetscErrorCode ierr;
1012 
1013   PetscFunctionBegin;
1014   /* do nondiagonal part */
1015   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1016   /* do local part */
1017   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1018   /* add partial results together */
1019   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1020   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1025 {
1026   MPI_Comm       comm;
1027   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1028   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1029   IS             Me,Notme;
1030   PetscErrorCode ierr;
1031   PetscInt       M,N,first,last,*notme,i;
1032   PetscBool      lf;
1033   PetscMPIInt    size;
1034 
1035   PetscFunctionBegin;
1036   /* Easy test: symmetric diagonal block */
1037   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1038   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1039   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr);
1040   if (!*f) PetscFunctionReturn(0);
1041   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1042   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1043   if (size == 1) PetscFunctionReturn(0);
1044 
1045   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1046   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1047   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1048   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1049   for (i=0; i<first; i++) notme[i] = i;
1050   for (i=last; i<M; i++) notme[i-last+first] = i;
1051   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1052   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1053   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1054   Aoff = Aoffs[0];
1055   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1056   Boff = Boffs[0];
1057   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1058   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1059   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1060   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1061   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1062   ierr = PetscFree(notme);CHKERRQ(ierr);
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1067 {
1068   PetscErrorCode ierr;
1069 
1070   PetscFunctionBegin;
1071   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1076 {
1077   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1078   PetscErrorCode ierr;
1079 
1080   PetscFunctionBegin;
1081   /* do nondiagonal part */
1082   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1083   /* do local part */
1084   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1085   /* add partial results together */
1086   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1087   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 /*
1092   This only works correctly for square matrices where the subblock A->A is the
1093    diagonal block
1094 */
1095 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1096 {
1097   PetscErrorCode ierr;
1098   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1099 
1100   PetscFunctionBegin;
1101   PetscCheckFalse(A->rmap->N != A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1102   PetscCheckFalse(A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1103   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1108 {
1109   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1110   PetscErrorCode ierr;
1111 
1112   PetscFunctionBegin;
1113   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1114   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1119 {
1120   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1121   PetscErrorCode ierr;
1122 
1123   PetscFunctionBegin;
1124 #if defined(PETSC_USE_LOG)
1125   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1126 #endif
1127   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1128   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1129   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1130   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1131 #if defined(PETSC_USE_CTABLE)
1132   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1133 #else
1134   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1135 #endif
1136   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1137   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1138   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1139   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1140   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1141 
1142   /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1143   ierr = PetscSFDestroy(&aij->coo_sf);CHKERRQ(ierr);
1144   ierr = PetscFree4(aij->Aperm1,aij->Bperm1,aij->Ajmap1,aij->Bjmap1);CHKERRQ(ierr);
1145   ierr = PetscFree4(aij->Aperm2,aij->Bperm2,aij->Ajmap2,aij->Bjmap2);CHKERRQ(ierr);
1146   ierr = PetscFree4(aij->Aimap1,aij->Bimap1,aij->Aimap2,aij->Bimap2);CHKERRQ(ierr);
1147   ierr = PetscFree2(aij->sendbuf,aij->recvbuf);CHKERRQ(ierr);
1148   ierr = PetscFree(aij->Cperm1);CHKERRQ(ierr);
1149 
1150   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1151 
1152   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1153   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1154 
1155   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1156   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1157   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1158   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1159   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1160   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1161   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1162   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1163   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1164   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1165 #if defined(PETSC_HAVE_CUDA)
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1167 #endif
1168 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1169   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr);
1170 #endif
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
1172 #if defined(PETSC_HAVE_ELEMENTAL)
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1174 #endif
1175 #if defined(PETSC_HAVE_SCALAPACK)
1176   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1177 #endif
1178 #if defined(PETSC_HAVE_HYPRE)
1179   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1181 #endif
1182   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr);
1186   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr);
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr);
1188 #if defined(PETSC_HAVE_MKL_SPARSE)
1189   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr);
1190 #endif
1191   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr);
1192   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1193   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr);
1194   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL);CHKERRQ(ierr);
1195   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL);CHKERRQ(ierr);
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1200 {
1201   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1202   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1203   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1204   const PetscInt    *garray = aij->garray;
1205   const PetscScalar *aa,*ba;
1206   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1207   PetscInt          *rowlens;
1208   PetscInt          *colidxs;
1209   PetscScalar       *matvals;
1210   PetscErrorCode    ierr;
1211 
1212   PetscFunctionBegin;
1213   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1214 
1215   M  = mat->rmap->N;
1216   N  = mat->cmap->N;
1217   m  = mat->rmap->n;
1218   rs = mat->rmap->rstart;
1219   cs = mat->cmap->rstart;
1220   nz = A->nz + B->nz;
1221 
1222   /* write matrix header */
1223   header[0] = MAT_FILE_CLASSID;
1224   header[1] = M; header[2] = N; header[3] = nz;
1225   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1226   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1227 
1228   /* fill in and store row lengths  */
1229   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1230   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1231   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1232   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1233 
1234   /* fill in and store column indices */
1235   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1236   for (cnt=0, i=0; i<m; i++) {
1237     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1238       if (garray[B->j[jb]] > cs) break;
1239       colidxs[cnt++] = garray[B->j[jb]];
1240     }
1241     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1242       colidxs[cnt++] = A->j[ja] + cs;
1243     for (; jb<B->i[i+1]; jb++)
1244       colidxs[cnt++] = garray[B->j[jb]];
1245   }
1246   PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1247   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1248   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1249 
1250   /* fill in and store nonzero values */
1251   ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr);
1252   ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr);
1253   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1254   for (cnt=0, i=0; i<m; i++) {
1255     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1256       if (garray[B->j[jb]] > cs) break;
1257       matvals[cnt++] = ba[jb];
1258     }
1259     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1260       matvals[cnt++] = aa[ja];
1261     for (; jb<B->i[i+1]; jb++)
1262       matvals[cnt++] = ba[jb];
1263   }
1264   ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr);
1265   ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr);
1266   PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1267   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1268   ierr = PetscFree(matvals);CHKERRQ(ierr);
1269 
1270   /* write block size option to the viewer's .info file */
1271   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 #include <petscdraw.h>
1276 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1277 {
1278   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1279   PetscErrorCode    ierr;
1280   PetscMPIInt       rank = aij->rank,size = aij->size;
1281   PetscBool         isdraw,iascii,isbinary;
1282   PetscViewer       sviewer;
1283   PetscViewerFormat format;
1284 
1285   PetscFunctionBegin;
1286   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1287   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1288   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1289   if (iascii) {
1290     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1291     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1292       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1293       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1294       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1295       for (i=0; i<(PetscInt)size; i++) {
1296         nmax = PetscMax(nmax,nz[i]);
1297         nmin = PetscMin(nmin,nz[i]);
1298         navg += nz[i];
1299       }
1300       ierr = PetscFree(nz);CHKERRQ(ierr);
1301       navg = navg/size;
1302       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n",nmin,navg,nmax);CHKERRQ(ierr);
1303       PetscFunctionReturn(0);
1304     }
1305     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1306     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1307       MatInfo   info;
1308       PetscInt *inodes=NULL;
1309 
1310       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
1311       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1312       ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr);
1313       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1314       if (!inodes) {
1315         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n",
1316                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1317       } else {
1318         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n",
1319                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1320       }
1321       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1322       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1323       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1324       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1325       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1326       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1327       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1328       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1329       PetscFunctionReturn(0);
1330     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1331       PetscInt inodecount,inodelimit,*inodes;
1332       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1333       if (inodes) {
1334         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit);CHKERRQ(ierr);
1335       } else {
1336         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1337       }
1338       PetscFunctionReturn(0);
1339     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1340       PetscFunctionReturn(0);
1341     }
1342   } else if (isbinary) {
1343     if (size == 1) {
1344       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1345       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1346     } else {
1347       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1348     }
1349     PetscFunctionReturn(0);
1350   } else if (iascii && size == 1) {
1351     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1352     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1353     PetscFunctionReturn(0);
1354   } else if (isdraw) {
1355     PetscDraw draw;
1356     PetscBool isnull;
1357     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1358     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1359     if (isnull) PetscFunctionReturn(0);
1360   }
1361 
1362   { /* assemble the entire matrix onto first processor */
1363     Mat A = NULL, Av;
1364     IS  isrow,iscol;
1365 
1366     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1367     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1368     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1369     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1370 /*  The commented code uses MatCreateSubMatrices instead */
1371 /*
1372     Mat *AA, A = NULL, Av;
1373     IS  isrow,iscol;
1374 
1375     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1376     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1377     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1378     if (rank == 0) {
1379        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1380        A    = AA[0];
1381        Av   = AA[0];
1382     }
1383     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1384 */
1385     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1386     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1387     /*
1388        Everyone has to call to draw the matrix since the graphics waits are
1389        synchronized across all processors that share the PetscDraw object
1390     */
1391     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1392     if (rank == 0) {
1393       if (((PetscObject)mat)->name) {
1394         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1395       }
1396       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1397     }
1398     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1399     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1400     ierr = MatDestroy(&A);CHKERRQ(ierr);
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1406 {
1407   PetscErrorCode ierr;
1408   PetscBool      iascii,isdraw,issocket,isbinary;
1409 
1410   PetscFunctionBegin;
1411   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1412   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1413   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1414   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1415   if (iascii || isdraw || isbinary || issocket) {
1416     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1417   }
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1422 {
1423   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1424   PetscErrorCode ierr;
1425   Vec            bb1 = NULL;
1426   PetscBool      hasop;
1427 
1428   PetscFunctionBegin;
1429   if (flag == SOR_APPLY_UPPER) {
1430     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1431     PetscFunctionReturn(0);
1432   }
1433 
1434   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1435     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1436   }
1437 
1438   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1439     if (flag & SOR_ZERO_INITIAL_GUESS) {
1440       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1441       its--;
1442     }
1443 
1444     while (its--) {
1445       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1446       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1447 
1448       /* update rhs: bb1 = bb - B*x */
1449       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1450       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1451 
1452       /* local sweep */
1453       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1454     }
1455   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1456     if (flag & SOR_ZERO_INITIAL_GUESS) {
1457       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1458       its--;
1459     }
1460     while (its--) {
1461       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1462       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1463 
1464       /* update rhs: bb1 = bb - B*x */
1465       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1466       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1467 
1468       /* local sweep */
1469       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1470     }
1471   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1472     if (flag & SOR_ZERO_INITIAL_GUESS) {
1473       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1474       its--;
1475     }
1476     while (its--) {
1477       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1478       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1479 
1480       /* update rhs: bb1 = bb - B*x */
1481       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1482       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1483 
1484       /* local sweep */
1485       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1486     }
1487   } else if (flag & SOR_EISENSTAT) {
1488     Vec xx1;
1489 
1490     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1491     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1492 
1493     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1494     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1495     if (!mat->diag) {
1496       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1497       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1498     }
1499     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1500     if (hasop) {
1501       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1502     } else {
1503       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1504     }
1505     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1506 
1507     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1508 
1509     /* local sweep */
1510     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1511     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1512     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1513   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1514 
1515   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1516 
1517   matin->factorerrortype = mat->A->factorerrortype;
1518   PetscFunctionReturn(0);
1519 }
1520 
1521 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1522 {
1523   Mat            aA,aB,Aperm;
1524   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1525   PetscScalar    *aa,*ba;
1526   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1527   PetscSF        rowsf,sf;
1528   IS             parcolp = NULL;
1529   PetscBool      done;
1530   PetscErrorCode ierr;
1531 
1532   PetscFunctionBegin;
1533   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1534   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1535   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1536   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1537 
1538   /* Invert row permutation to find out where my rows should go */
1539   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1540   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1541   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1542   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1543   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1544   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1545 
1546   /* Invert column permutation to find out where my columns should go */
1547   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1548   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1549   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1550   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1551   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1552   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1553   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1554 
1555   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1556   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1557   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1558 
1559   /* Find out where my gcols should go */
1560   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1561   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1562   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1563   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1564   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1565   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1566   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1567   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1568 
1569   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1570   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1571   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1572   for (i=0; i<m; i++) {
1573     PetscInt    row = rdest[i];
1574     PetscMPIInt rowner;
1575     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1576     for (j=ai[i]; j<ai[i+1]; j++) {
1577       PetscInt    col = cdest[aj[j]];
1578       PetscMPIInt cowner;
1579       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1580       if (rowner == cowner) dnnz[i]++;
1581       else onnz[i]++;
1582     }
1583     for (j=bi[i]; j<bi[i+1]; j++) {
1584       PetscInt    col = gcdest[bj[j]];
1585       PetscMPIInt cowner;
1586       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1587       if (rowner == cowner) dnnz[i]++;
1588       else onnz[i]++;
1589     }
1590   }
1591   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1592   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1593   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1594   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1595   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1596 
1597   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1598   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1599   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1600   for (i=0; i<m; i++) {
1601     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1602     PetscInt j0,rowlen;
1603     rowlen = ai[i+1] - ai[i];
1604     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1605       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1606       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1607     }
1608     rowlen = bi[i+1] - bi[i];
1609     for (j0=j=0; j<rowlen; j0=j) {
1610       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1611       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1612     }
1613   }
1614   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1615   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1616   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1617   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1618   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1619   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1620   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1621   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1622   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1623   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1624   *B = Aperm;
1625   PetscFunctionReturn(0);
1626 }
1627 
1628 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1629 {
1630   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1631   PetscErrorCode ierr;
1632 
1633   PetscFunctionBegin;
1634   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1635   if (ghosts) *ghosts = aij->garray;
1636   PetscFunctionReturn(0);
1637 }
1638 
1639 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1640 {
1641   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1642   Mat            A    = mat->A,B = mat->B;
1643   PetscErrorCode ierr;
1644   PetscLogDouble isend[5],irecv[5];
1645 
1646   PetscFunctionBegin;
1647   info->block_size = 1.0;
1648   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1649 
1650   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1651   isend[3] = info->memory;  isend[4] = info->mallocs;
1652 
1653   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1654 
1655   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1656   isend[3] += info->memory;  isend[4] += info->mallocs;
1657   if (flag == MAT_LOCAL) {
1658     info->nz_used      = isend[0];
1659     info->nz_allocated = isend[1];
1660     info->nz_unneeded  = isend[2];
1661     info->memory       = isend[3];
1662     info->mallocs      = isend[4];
1663   } else if (flag == MAT_GLOBAL_MAX) {
1664     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1665 
1666     info->nz_used      = irecv[0];
1667     info->nz_allocated = irecv[1];
1668     info->nz_unneeded  = irecv[2];
1669     info->memory       = irecv[3];
1670     info->mallocs      = irecv[4];
1671   } else if (flag == MAT_GLOBAL_SUM) {
1672     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1673 
1674     info->nz_used      = irecv[0];
1675     info->nz_allocated = irecv[1];
1676     info->nz_unneeded  = irecv[2];
1677     info->memory       = irecv[3];
1678     info->mallocs      = irecv[4];
1679   }
1680   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1681   info->fill_ratio_needed = 0;
1682   info->factor_mallocs    = 0;
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1687 {
1688   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1689   PetscErrorCode ierr;
1690 
1691   PetscFunctionBegin;
1692   switch (op) {
1693   case MAT_NEW_NONZERO_LOCATIONS:
1694   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1695   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1696   case MAT_KEEP_NONZERO_PATTERN:
1697   case MAT_NEW_NONZERO_LOCATION_ERR:
1698   case MAT_USE_INODES:
1699   case MAT_IGNORE_ZERO_ENTRIES:
1700   case MAT_FORM_EXPLICIT_TRANSPOSE:
1701     MatCheckPreallocated(A,1);
1702     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1703     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1704     break;
1705   case MAT_ROW_ORIENTED:
1706     MatCheckPreallocated(A,1);
1707     a->roworiented = flg;
1708 
1709     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1710     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1711     break;
1712   case MAT_FORCE_DIAGONAL_ENTRIES:
1713   case MAT_SORTED_FULL:
1714     ierr = PetscInfo(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1715     break;
1716   case MAT_IGNORE_OFF_PROC_ENTRIES:
1717     a->donotstash = flg;
1718     break;
1719   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1720   case MAT_SPD:
1721   case MAT_SYMMETRIC:
1722   case MAT_STRUCTURALLY_SYMMETRIC:
1723   case MAT_HERMITIAN:
1724   case MAT_SYMMETRY_ETERNAL:
1725     break;
1726   case MAT_SUBMAT_SINGLEIS:
1727     A->submat_singleis = flg;
1728     break;
1729   case MAT_STRUCTURE_ONLY:
1730     /* The option is handled directly by MatSetOption() */
1731     break;
1732   default:
1733     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1734   }
1735   PetscFunctionReturn(0);
1736 }
1737 
1738 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1739 {
1740   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1741   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1742   PetscErrorCode ierr;
1743   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1744   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1745   PetscInt       *cmap,*idx_p;
1746 
1747   PetscFunctionBegin;
1748   PetscCheckFalse(mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1749   mat->getrowactive = PETSC_TRUE;
1750 
1751   if (!mat->rowvalues && (idx || v)) {
1752     /*
1753         allocate enough space to hold information from the longest row.
1754     */
1755     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1756     PetscInt   max = 1,tmp;
1757     for (i=0; i<matin->rmap->n; i++) {
1758       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1759       if (max < tmp) max = tmp;
1760     }
1761     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1762   }
1763 
1764   PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1765   lrow = row - rstart;
1766 
1767   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1768   if (!v)   {pvA = NULL; pvB = NULL;}
1769   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1770   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1771   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1772   nztot = nzA + nzB;
1773 
1774   cmap = mat->garray;
1775   if (v  || idx) {
1776     if (nztot) {
1777       /* Sort by increasing column numbers, assuming A and B already sorted */
1778       PetscInt imark = -1;
1779       if (v) {
1780         *v = v_p = mat->rowvalues;
1781         for (i=0; i<nzB; i++) {
1782           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1783           else break;
1784         }
1785         imark = i;
1786         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1787         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1788       }
1789       if (idx) {
1790         *idx = idx_p = mat->rowindices;
1791         if (imark > -1) {
1792           for (i=0; i<imark; i++) {
1793             idx_p[i] = cmap[cworkB[i]];
1794           }
1795         } else {
1796           for (i=0; i<nzB; i++) {
1797             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1798             else break;
1799           }
1800           imark = i;
1801         }
1802         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1803         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1804       }
1805     } else {
1806       if (idx) *idx = NULL;
1807       if (v)   *v   = NULL;
1808     }
1809   }
1810   *nz  = nztot;
1811   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1812   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1813   PetscFunctionReturn(0);
1814 }
1815 
1816 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1817 {
1818   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1819 
1820   PetscFunctionBegin;
1821   PetscCheckFalse(!aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1822   aij->getrowactive = PETSC_FALSE;
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1827 {
1828   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ*)mat->data;
1829   Mat_SeqAIJ      *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1830   PetscErrorCode  ierr;
1831   PetscInt        i,j,cstart = mat->cmap->rstart;
1832   PetscReal       sum = 0.0;
1833   const MatScalar *v,*amata,*bmata;
1834 
1835   PetscFunctionBegin;
1836   if (aij->size == 1) {
1837     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1838   } else {
1839     ierr = MatSeqAIJGetArrayRead(aij->A,&amata);CHKERRQ(ierr);
1840     ierr = MatSeqAIJGetArrayRead(aij->B,&bmata);CHKERRQ(ierr);
1841     if (type == NORM_FROBENIUS) {
1842       v = amata;
1843       for (i=0; i<amat->nz; i++) {
1844         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1845       }
1846       v = bmata;
1847       for (i=0; i<bmat->nz; i++) {
1848         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1849       }
1850       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1851       *norm = PetscSqrtReal(*norm);
1852       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1853     } else if (type == NORM_1) { /* max column norm */
1854       PetscReal *tmp,*tmp2;
1855       PetscInt  *jj,*garray = aij->garray;
1856       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1857       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1858       *norm = 0.0;
1859       v     = amata; jj = amat->j;
1860       for (j=0; j<amat->nz; j++) {
1861         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1862       }
1863       v = bmata; jj = bmat->j;
1864       for (j=0; j<bmat->nz; j++) {
1865         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1866       }
1867       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1868       for (j=0; j<mat->cmap->N; j++) {
1869         if (tmp2[j] > *norm) *norm = tmp2[j];
1870       }
1871       ierr = PetscFree(tmp);CHKERRQ(ierr);
1872       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1873       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1874     } else if (type == NORM_INFINITY) { /* max row norm */
1875       PetscReal ntemp = 0.0;
1876       for (j=0; j<aij->A->rmap->n; j++) {
1877         v   = amata + amat->i[j];
1878         sum = 0.0;
1879         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1880           sum += PetscAbsScalar(*v); v++;
1881         }
1882         v = bmata + bmat->i[j];
1883         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1884           sum += PetscAbsScalar(*v); v++;
1885         }
1886         if (sum > ntemp) ntemp = sum;
1887       }
1888       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1889       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1890     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1891     ierr = MatSeqAIJRestoreArrayRead(aij->A,&amata);CHKERRQ(ierr);
1892     ierr = MatSeqAIJRestoreArrayRead(aij->B,&bmata);CHKERRQ(ierr);
1893   }
1894   PetscFunctionReturn(0);
1895 }
1896 
1897 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1898 {
1899   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1900   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1901   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1902   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1903   PetscErrorCode  ierr;
1904   Mat             B,A_diag,*B_diag;
1905   const MatScalar *pbv,*bv;
1906 
1907   PetscFunctionBegin;
1908   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1909   ai = Aloc->i; aj = Aloc->j;
1910   bi = Bloc->i; bj = Bloc->j;
1911   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1912     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1913     PetscSFNode          *oloc;
1914     PETSC_UNUSED PetscSF sf;
1915 
1916     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1917     /* compute d_nnz for preallocation */
1918     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
1919     for (i=0; i<ai[ma]; i++) {
1920       d_nnz[aj[i]]++;
1921     }
1922     /* compute local off-diagonal contributions */
1923     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
1924     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1925     /* map those to global */
1926     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1927     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1928     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1929     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
1930     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1931     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1932     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1933 
1934     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1935     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1936     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1937     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1938     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1939     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1940   } else {
1941     B    = *matout;
1942     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1943   }
1944 
1945   b           = (Mat_MPIAIJ*)B->data;
1946   A_diag      = a->A;
1947   B_diag      = &b->A;
1948   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1949   A_diag_ncol = A_diag->cmap->N;
1950   B_diag_ilen = sub_B_diag->ilen;
1951   B_diag_i    = sub_B_diag->i;
1952 
1953   /* Set ilen for diagonal of B */
1954   for (i=0; i<A_diag_ncol; i++) {
1955     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1956   }
1957 
1958   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1959   very quickly (=without using MatSetValues), because all writes are local. */
1960   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
1961 
1962   /* copy over the B part */
1963   ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
1964   ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr);
1965   pbv  = bv;
1966   row  = A->rmap->rstart;
1967   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1968   cols_tmp = cols;
1969   for (i=0; i<mb; i++) {
1970     ncol = bi[i+1]-bi[i];
1971     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr);
1972     row++;
1973     pbv += ncol; cols_tmp += ncol;
1974   }
1975   ierr = PetscFree(cols);CHKERRQ(ierr);
1976   ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr);
1977 
1978   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1979   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1980   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1981     *matout = B;
1982   } else {
1983     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1984   }
1985   PetscFunctionReturn(0);
1986 }
1987 
1988 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1989 {
1990   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1991   Mat            a    = aij->A,b = aij->B;
1992   PetscErrorCode ierr;
1993   PetscInt       s1,s2,s3;
1994 
1995   PetscFunctionBegin;
1996   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1997   if (rr) {
1998     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1999     PetscCheckFalse(s1!=s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2000     /* Overlap communication with computation. */
2001     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2002   }
2003   if (ll) {
2004     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2005     PetscCheckFalse(s1!=s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2006     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
2007   }
2008   /* scale  the diagonal block */
2009   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2010 
2011   if (rr) {
2012     /* Do a scatter end and then right scale the off-diagonal block */
2013     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2014     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
2015   }
2016   PetscFunctionReturn(0);
2017 }
2018 
2019 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2020 {
2021   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2022   PetscErrorCode ierr;
2023 
2024   PetscFunctionBegin;
2025   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2026   PetscFunctionReturn(0);
2027 }
2028 
2029 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2030 {
2031   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2032   Mat            a,b,c,d;
2033   PetscBool      flg;
2034   PetscErrorCode ierr;
2035 
2036   PetscFunctionBegin;
2037   a = matA->A; b = matA->B;
2038   c = matB->A; d = matB->B;
2039 
2040   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2041   if (flg) {
2042     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2043   }
2044   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2045   PetscFunctionReturn(0);
2046 }
2047 
2048 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2049 {
2050   PetscErrorCode ierr;
2051   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2052   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2053 
2054   PetscFunctionBegin;
2055   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2056   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2057     /* because of the column compression in the off-processor part of the matrix a->B,
2058        the number of columns in a->B and b->B may be different, hence we cannot call
2059        the MatCopy() directly on the two parts. If need be, we can provide a more
2060        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2061        then copying the submatrices */
2062     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2063   } else {
2064     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2065     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2066   }
2067   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2068   PetscFunctionReturn(0);
2069 }
2070 
2071 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2072 {
2073   PetscErrorCode ierr;
2074 
2075   PetscFunctionBegin;
2076   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2077   PetscFunctionReturn(0);
2078 }
2079 
2080 /*
2081    Computes the number of nonzeros per row needed for preallocation when X and Y
2082    have different nonzero structure.
2083 */
2084 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2085 {
2086   PetscInt       i,j,k,nzx,nzy;
2087 
2088   PetscFunctionBegin;
2089   /* Set the number of nonzeros in the new matrix */
2090   for (i=0; i<m; i++) {
2091     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2092     nzx = xi[i+1] - xi[i];
2093     nzy = yi[i+1] - yi[i];
2094     nnz[i] = 0;
2095     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2096       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2097       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2098       nnz[i]++;
2099     }
2100     for (; k<nzy; k++) nnz[i]++;
2101   }
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2106 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2107 {
2108   PetscErrorCode ierr;
2109   PetscInt       m = Y->rmap->N;
2110   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2111   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2112 
2113   PetscFunctionBegin;
2114   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2119 {
2120   PetscErrorCode ierr;
2121   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2122 
2123   PetscFunctionBegin;
2124   if (str == SAME_NONZERO_PATTERN) {
2125     ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr);
2126     ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr);
2127   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2128     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2129   } else {
2130     Mat      B;
2131     PetscInt *nnz_d,*nnz_o;
2132 
2133     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2134     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2135     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2136     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2137     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2138     ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr);
2139     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2140     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2141     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2142     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2143     ierr = MatHeaderMerge(Y,&B);CHKERRQ(ierr);
2144     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2145     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2146   }
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2151 
2152 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2153 {
2154 #if defined(PETSC_USE_COMPLEX)
2155   PetscErrorCode ierr;
2156   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2157 
2158   PetscFunctionBegin;
2159   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2160   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2161 #else
2162   PetscFunctionBegin;
2163 #endif
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2168 {
2169   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2170   PetscErrorCode ierr;
2171 
2172   PetscFunctionBegin;
2173   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2174   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2179 {
2180   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2181   PetscErrorCode ierr;
2182 
2183   PetscFunctionBegin;
2184   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2185   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2186   PetscFunctionReturn(0);
2187 }
2188 
2189 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2190 {
2191   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2192   PetscErrorCode    ierr;
2193   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2194   PetscScalar       *va,*vv;
2195   Vec               vB,vA;
2196   const PetscScalar *vb;
2197 
2198   PetscFunctionBegin;
2199   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
2200   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2201 
2202   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2203   if (idx) {
2204     for (i=0; i<m; i++) {
2205       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2206     }
2207   }
2208 
2209   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
2210   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
2211   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2212 
2213   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2214   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2215   for (i=0; i<m; i++) {
2216     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2217       vv[i] = vb[i];
2218       if (idx) idx[i] = a->garray[idxb[i]];
2219     } else {
2220       vv[i] = va[i];
2221       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2222         idx[i] = a->garray[idxb[i]];
2223     }
2224   }
2225   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2226   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2227   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2228   ierr = PetscFree(idxb);CHKERRQ(ierr);
2229   ierr = VecDestroy(&vA);CHKERRQ(ierr);
2230   ierr = VecDestroy(&vB);CHKERRQ(ierr);
2231   PetscFunctionReturn(0);
2232 }
2233 
2234 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2235 {
2236   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2237   PetscInt          m = A->rmap->n,n = A->cmap->n;
2238   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2239   PetscInt          *cmap  = mat->garray;
2240   PetscInt          *diagIdx, *offdiagIdx;
2241   Vec               diagV, offdiagV;
2242   PetscScalar       *a, *diagA, *offdiagA;
2243   const PetscScalar *ba,*bav;
2244   PetscInt          r,j,col,ncols,*bi,*bj;
2245   PetscErrorCode    ierr;
2246   Mat               B = mat->B;
2247   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2248 
2249   PetscFunctionBegin;
2250   /* When a process holds entire A and other processes have no entry */
2251   if (A->cmap->N == n) {
2252     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2253     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2254     ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr);
2255     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2256     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2257     PetscFunctionReturn(0);
2258   } else if (n == 0) {
2259     if (m) {
2260       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2261       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2262       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2263     }
2264     PetscFunctionReturn(0);
2265   }
2266 
2267   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2268   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2269   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2270   ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2271 
2272   /* Get offdiagIdx[] for implicit 0.0 */
2273   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2274   ba   = bav;
2275   bi   = b->i;
2276   bj   = b->j;
2277   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2278   for (r = 0; r < m; r++) {
2279     ncols = bi[r+1] - bi[r];
2280     if (ncols == A->cmap->N - n) { /* Brow is dense */
2281       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2282     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2283       offdiagA[r] = 0.0;
2284 
2285       /* Find first hole in the cmap */
2286       for (j=0; j<ncols; j++) {
2287         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2288         if (col > j && j < cstart) {
2289           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2290           break;
2291         } else if (col > j + n && j >= cstart) {
2292           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2293           break;
2294         }
2295       }
2296       if (j == ncols && ncols < A->cmap->N - n) {
2297         /* a hole is outside compressed Bcols */
2298         if (ncols == 0) {
2299           if (cstart) {
2300             offdiagIdx[r] = 0;
2301           } else offdiagIdx[r] = cend;
2302         } else { /* ncols > 0 */
2303           offdiagIdx[r] = cmap[ncols-1] + 1;
2304           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2305         }
2306       }
2307     }
2308 
2309     for (j=0; j<ncols; j++) {
2310       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2311       ba++; bj++;
2312     }
2313   }
2314 
2315   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2316   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2317   for (r = 0; r < m; ++r) {
2318     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2319       a[r]   = diagA[r];
2320       if (idx) idx[r] = cstart + diagIdx[r];
2321     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2322       a[r] = diagA[r];
2323       if (idx) {
2324         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2325           idx[r] = cstart + diagIdx[r];
2326         } else idx[r] = offdiagIdx[r];
2327       }
2328     } else {
2329       a[r]   = offdiagA[r];
2330       if (idx) idx[r] = offdiagIdx[r];
2331     }
2332   }
2333   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2334   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2335   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2336   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2337   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2338   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2339   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2340   PetscFunctionReturn(0);
2341 }
2342 
2343 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2344 {
2345   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2346   PetscInt          m = A->rmap->n,n = A->cmap->n;
2347   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2348   PetscInt          *cmap  = mat->garray;
2349   PetscInt          *diagIdx, *offdiagIdx;
2350   Vec               diagV, offdiagV;
2351   PetscScalar       *a, *diagA, *offdiagA;
2352   const PetscScalar *ba,*bav;
2353   PetscInt          r,j,col,ncols,*bi,*bj;
2354   PetscErrorCode    ierr;
2355   Mat               B = mat->B;
2356   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2357 
2358   PetscFunctionBegin;
2359   /* When a process holds entire A and other processes have no entry */
2360   if (A->cmap->N == n) {
2361     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2362     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2363     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2364     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2365     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2366     PetscFunctionReturn(0);
2367   } else if (n == 0) {
2368     if (m) {
2369       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2370       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2371       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2372     }
2373     PetscFunctionReturn(0);
2374   }
2375 
2376   ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2377   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2378   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2379   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2380 
2381   /* Get offdiagIdx[] for implicit 0.0 */
2382   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2383   ba   = bav;
2384   bi   = b->i;
2385   bj   = b->j;
2386   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2387   for (r = 0; r < m; r++) {
2388     ncols = bi[r+1] - bi[r];
2389     if (ncols == A->cmap->N - n) { /* Brow is dense */
2390       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2391     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2392       offdiagA[r] = 0.0;
2393 
2394       /* Find first hole in the cmap */
2395       for (j=0; j<ncols; j++) {
2396         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2397         if (col > j && j < cstart) {
2398           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2399           break;
2400         } else if (col > j + n && j >= cstart) {
2401           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2402           break;
2403         }
2404       }
2405       if (j == ncols && ncols < A->cmap->N - n) {
2406         /* a hole is outside compressed Bcols */
2407         if (ncols == 0) {
2408           if (cstart) {
2409             offdiagIdx[r] = 0;
2410           } else offdiagIdx[r] = cend;
2411         } else { /* ncols > 0 */
2412           offdiagIdx[r] = cmap[ncols-1] + 1;
2413           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2414         }
2415       }
2416     }
2417 
2418     for (j=0; j<ncols; j++) {
2419       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2420       ba++; bj++;
2421     }
2422   }
2423 
2424   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2425   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2426   for (r = 0; r < m; ++r) {
2427     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2428       a[r]   = diagA[r];
2429       if (idx) idx[r] = cstart + diagIdx[r];
2430     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2431       a[r] = diagA[r];
2432       if (idx) {
2433         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2434           idx[r] = cstart + diagIdx[r];
2435         } else idx[r] = offdiagIdx[r];
2436       }
2437     } else {
2438       a[r]   = offdiagA[r];
2439       if (idx) idx[r] = offdiagIdx[r];
2440     }
2441   }
2442   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2443   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2444   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2445   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2446   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2447   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2448   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2449   PetscFunctionReturn(0);
2450 }
2451 
2452 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2453 {
2454   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2455   PetscInt          m = A->rmap->n,n = A->cmap->n;
2456   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2457   PetscInt          *cmap  = mat->garray;
2458   PetscInt          *diagIdx, *offdiagIdx;
2459   Vec               diagV, offdiagV;
2460   PetscScalar       *a, *diagA, *offdiagA;
2461   const PetscScalar *ba,*bav;
2462   PetscInt          r,j,col,ncols,*bi,*bj;
2463   PetscErrorCode    ierr;
2464   Mat               B = mat->B;
2465   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2466 
2467   PetscFunctionBegin;
2468   /* When a process holds entire A and other processes have no entry */
2469   if (A->cmap->N == n) {
2470     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2471     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2472     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2473     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2474     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2475     PetscFunctionReturn(0);
2476   } else if (n == 0) {
2477     if (m) {
2478       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2479       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2480       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2481     }
2482     PetscFunctionReturn(0);
2483   }
2484 
2485   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2486   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2487   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2488   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2489 
2490   /* Get offdiagIdx[] for implicit 0.0 */
2491   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2492   ba   = bav;
2493   bi   = b->i;
2494   bj   = b->j;
2495   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2496   for (r = 0; r < m; r++) {
2497     ncols = bi[r+1] - bi[r];
2498     if (ncols == A->cmap->N - n) { /* Brow is dense */
2499       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2500     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2501       offdiagA[r] = 0.0;
2502 
2503       /* Find first hole in the cmap */
2504       for (j=0; j<ncols; j++) {
2505         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2506         if (col > j && j < cstart) {
2507           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2508           break;
2509         } else if (col > j + n && j >= cstart) {
2510           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2511           break;
2512         }
2513       }
2514       if (j == ncols && ncols < A->cmap->N - n) {
2515         /* a hole is outside compressed Bcols */
2516         if (ncols == 0) {
2517           if (cstart) {
2518             offdiagIdx[r] = 0;
2519           } else offdiagIdx[r] = cend;
2520         } else { /* ncols > 0 */
2521           offdiagIdx[r] = cmap[ncols-1] + 1;
2522           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2523         }
2524       }
2525     }
2526 
2527     for (j=0; j<ncols; j++) {
2528       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2529       ba++; bj++;
2530     }
2531   }
2532 
2533   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2534   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2535   for (r = 0; r < m; ++r) {
2536     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2537       a[r] = diagA[r];
2538       if (idx) idx[r] = cstart + diagIdx[r];
2539     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2540       a[r] = diagA[r];
2541       if (idx) {
2542         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2543           idx[r] = cstart + diagIdx[r];
2544         } else idx[r] = offdiagIdx[r];
2545       }
2546     } else {
2547       a[r] = offdiagA[r];
2548       if (idx) idx[r] = offdiagIdx[r];
2549     }
2550   }
2551   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2552   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2553   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2554   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2555   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2556   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2557   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2562 {
2563   PetscErrorCode ierr;
2564   Mat            *dummy;
2565 
2566   PetscFunctionBegin;
2567   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2568   *newmat = *dummy;
2569   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2570   PetscFunctionReturn(0);
2571 }
2572 
2573 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2574 {
2575   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2576   PetscErrorCode ierr;
2577 
2578   PetscFunctionBegin;
2579   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2580   A->factorerrortype = a->A->factorerrortype;
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2585 {
2586   PetscErrorCode ierr;
2587   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2588 
2589   PetscFunctionBegin;
2590   PetscCheckFalse(!x->assembled && !x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2591   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2592   if (x->assembled) {
2593     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2594   } else {
2595     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2596   }
2597   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2598   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2599   PetscFunctionReturn(0);
2600 }
2601 
2602 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2603 {
2604   PetscFunctionBegin;
2605   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2606   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2607   PetscFunctionReturn(0);
2608 }
2609 
2610 /*@
2611    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2612 
2613    Collective on Mat
2614 
2615    Input Parameters:
2616 +    A - the matrix
2617 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2618 
2619  Level: advanced
2620 
2621 @*/
2622 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2623 {
2624   PetscErrorCode       ierr;
2625 
2626   PetscFunctionBegin;
2627   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2628   PetscFunctionReturn(0);
2629 }
2630 
2631 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2632 {
2633   PetscErrorCode       ierr;
2634   PetscBool            sc = PETSC_FALSE,flg;
2635 
2636   PetscFunctionBegin;
2637   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2638   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2639   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2640   if (flg) {
2641     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2642   }
2643   ierr = PetscOptionsTail();CHKERRQ(ierr);
2644   PetscFunctionReturn(0);
2645 }
2646 
2647 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2648 {
2649   PetscErrorCode ierr;
2650   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2651   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2652 
2653   PetscFunctionBegin;
2654   if (!Y->preallocated) {
2655     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2656   } else if (!aij->nz) {
2657     PetscInt nonew = aij->nonew;
2658     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2659     aij->nonew = nonew;
2660   }
2661   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2662   PetscFunctionReturn(0);
2663 }
2664 
2665 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2666 {
2667   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2668   PetscErrorCode ierr;
2669 
2670   PetscFunctionBegin;
2671   PetscCheckFalse(A->rmap->n != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2672   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2673   if (d) {
2674     PetscInt rstart;
2675     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2676     *d += rstart;
2677 
2678   }
2679   PetscFunctionReturn(0);
2680 }
2681 
2682 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2683 {
2684   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2685   PetscErrorCode ierr;
2686 
2687   PetscFunctionBegin;
2688   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2689   PetscFunctionReturn(0);
2690 }
2691 
2692 /* -------------------------------------------------------------------*/
2693 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2694                                        MatGetRow_MPIAIJ,
2695                                        MatRestoreRow_MPIAIJ,
2696                                        MatMult_MPIAIJ,
2697                                 /* 4*/ MatMultAdd_MPIAIJ,
2698                                        MatMultTranspose_MPIAIJ,
2699                                        MatMultTransposeAdd_MPIAIJ,
2700                                        NULL,
2701                                        NULL,
2702                                        NULL,
2703                                 /*10*/ NULL,
2704                                        NULL,
2705                                        NULL,
2706                                        MatSOR_MPIAIJ,
2707                                        MatTranspose_MPIAIJ,
2708                                 /*15*/ MatGetInfo_MPIAIJ,
2709                                        MatEqual_MPIAIJ,
2710                                        MatGetDiagonal_MPIAIJ,
2711                                        MatDiagonalScale_MPIAIJ,
2712                                        MatNorm_MPIAIJ,
2713                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2714                                        MatAssemblyEnd_MPIAIJ,
2715                                        MatSetOption_MPIAIJ,
2716                                        MatZeroEntries_MPIAIJ,
2717                                 /*24*/ MatZeroRows_MPIAIJ,
2718                                        NULL,
2719                                        NULL,
2720                                        NULL,
2721                                        NULL,
2722                                 /*29*/ MatSetUp_MPIAIJ,
2723                                        NULL,
2724                                        NULL,
2725                                        MatGetDiagonalBlock_MPIAIJ,
2726                                        NULL,
2727                                 /*34*/ MatDuplicate_MPIAIJ,
2728                                        NULL,
2729                                        NULL,
2730                                        NULL,
2731                                        NULL,
2732                                 /*39*/ MatAXPY_MPIAIJ,
2733                                        MatCreateSubMatrices_MPIAIJ,
2734                                        MatIncreaseOverlap_MPIAIJ,
2735                                        MatGetValues_MPIAIJ,
2736                                        MatCopy_MPIAIJ,
2737                                 /*44*/ MatGetRowMax_MPIAIJ,
2738                                        MatScale_MPIAIJ,
2739                                        MatShift_MPIAIJ,
2740                                        MatDiagonalSet_MPIAIJ,
2741                                        MatZeroRowsColumns_MPIAIJ,
2742                                 /*49*/ MatSetRandom_MPIAIJ,
2743                                        NULL,
2744                                        NULL,
2745                                        NULL,
2746                                        NULL,
2747                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2748                                        NULL,
2749                                        MatSetUnfactored_MPIAIJ,
2750                                        MatPermute_MPIAIJ,
2751                                        NULL,
2752                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2753                                        MatDestroy_MPIAIJ,
2754                                        MatView_MPIAIJ,
2755                                        NULL,
2756                                        NULL,
2757                                 /*64*/ NULL,
2758                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2759                                        NULL,
2760                                        NULL,
2761                                        NULL,
2762                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2763                                        MatGetRowMinAbs_MPIAIJ,
2764                                        NULL,
2765                                        NULL,
2766                                        NULL,
2767                                        NULL,
2768                                 /*75*/ MatFDColoringApply_AIJ,
2769                                        MatSetFromOptions_MPIAIJ,
2770                                        NULL,
2771                                        NULL,
2772                                        MatFindZeroDiagonals_MPIAIJ,
2773                                 /*80*/ NULL,
2774                                        NULL,
2775                                        NULL,
2776                                 /*83*/ MatLoad_MPIAIJ,
2777                                        MatIsSymmetric_MPIAIJ,
2778                                        NULL,
2779                                        NULL,
2780                                        NULL,
2781                                        NULL,
2782                                 /*89*/ NULL,
2783                                        NULL,
2784                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2785                                        NULL,
2786                                        NULL,
2787                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2788                                        NULL,
2789                                        NULL,
2790                                        NULL,
2791                                        MatBindToCPU_MPIAIJ,
2792                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2793                                        NULL,
2794                                        NULL,
2795                                        MatConjugate_MPIAIJ,
2796                                        NULL,
2797                                 /*104*/MatSetValuesRow_MPIAIJ,
2798                                        MatRealPart_MPIAIJ,
2799                                        MatImaginaryPart_MPIAIJ,
2800                                        NULL,
2801                                        NULL,
2802                                 /*109*/NULL,
2803                                        NULL,
2804                                        MatGetRowMin_MPIAIJ,
2805                                        NULL,
2806                                        MatMissingDiagonal_MPIAIJ,
2807                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2808                                        NULL,
2809                                        MatGetGhosts_MPIAIJ,
2810                                        NULL,
2811                                        NULL,
2812                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2813                                        NULL,
2814                                        NULL,
2815                                        NULL,
2816                                        MatGetMultiProcBlock_MPIAIJ,
2817                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2818                                        MatGetColumnReductions_MPIAIJ,
2819                                        MatInvertBlockDiagonal_MPIAIJ,
2820                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2821                                        MatCreateSubMatricesMPI_MPIAIJ,
2822                                 /*129*/NULL,
2823                                        NULL,
2824                                        NULL,
2825                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2826                                        NULL,
2827                                 /*134*/NULL,
2828                                        NULL,
2829                                        NULL,
2830                                        NULL,
2831                                        NULL,
2832                                 /*139*/MatSetBlockSizes_MPIAIJ,
2833                                        NULL,
2834                                        NULL,
2835                                        MatFDColoringSetUp_MPIXAIJ,
2836                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2837                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2838                                 /*145*/NULL,
2839                                        NULL,
2840                                        NULL
2841 };
2842 
2843 /* ----------------------------------------------------------------------------------------*/
2844 
2845 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2846 {
2847   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2848   PetscErrorCode ierr;
2849 
2850   PetscFunctionBegin;
2851   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2852   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2853   PetscFunctionReturn(0);
2854 }
2855 
2856 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2857 {
2858   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2859   PetscErrorCode ierr;
2860 
2861   PetscFunctionBegin;
2862   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2863   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2864   PetscFunctionReturn(0);
2865 }
2866 
2867 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2868 {
2869   Mat_MPIAIJ     *b;
2870   PetscErrorCode ierr;
2871   PetscMPIInt    size;
2872 
2873   PetscFunctionBegin;
2874   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2875   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2876   b = (Mat_MPIAIJ*)B->data;
2877 
2878 #if defined(PETSC_USE_CTABLE)
2879   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2880 #else
2881   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2882 #endif
2883   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2884   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2885   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2886 
2887   /* Because the B will have been resized we simply destroy it and create a new one each time */
2888   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2889   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2890   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2891   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2892   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2893   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2894   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2895 
2896   if (!B->preallocated) {
2897     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2898     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2899     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2900     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2901     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2902   }
2903 
2904   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2905   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2906   B->preallocated  = PETSC_TRUE;
2907   B->was_assembled = PETSC_FALSE;
2908   B->assembled     = PETSC_FALSE;
2909   PetscFunctionReturn(0);
2910 }
2911 
2912 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2913 {
2914   Mat_MPIAIJ     *b;
2915   PetscErrorCode ierr;
2916 
2917   PetscFunctionBegin;
2918   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2919   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2920   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2921   b = (Mat_MPIAIJ*)B->data;
2922 
2923 #if defined(PETSC_USE_CTABLE)
2924   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2925 #else
2926   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2927 #endif
2928   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2929   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2930   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2931 
2932   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2933   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2934   B->preallocated  = PETSC_TRUE;
2935   B->was_assembled = PETSC_FALSE;
2936   B->assembled = PETSC_FALSE;
2937   PetscFunctionReturn(0);
2938 }
2939 
2940 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2941 {
2942   Mat            mat;
2943   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2944   PetscErrorCode ierr;
2945 
2946   PetscFunctionBegin;
2947   *newmat = NULL;
2948   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2949   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2950   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2951   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2952   a       = (Mat_MPIAIJ*)mat->data;
2953 
2954   mat->factortype   = matin->factortype;
2955   mat->assembled    = matin->assembled;
2956   mat->insertmode   = NOT_SET_VALUES;
2957   mat->preallocated = matin->preallocated;
2958 
2959   a->size         = oldmat->size;
2960   a->rank         = oldmat->rank;
2961   a->donotstash   = oldmat->donotstash;
2962   a->roworiented  = oldmat->roworiented;
2963   a->rowindices   = NULL;
2964   a->rowvalues    = NULL;
2965   a->getrowactive = PETSC_FALSE;
2966 
2967   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2968   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2969 
2970   if (oldmat->colmap) {
2971 #if defined(PETSC_USE_CTABLE)
2972     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2973 #else
2974     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2975     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2976     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2977 #endif
2978   } else a->colmap = NULL;
2979   if (oldmat->garray) {
2980     PetscInt len;
2981     len  = oldmat->B->cmap->n;
2982     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2983     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2984     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2985   } else a->garray = NULL;
2986 
2987   /* It may happen MatDuplicate is called with a non-assembled matrix
2988      In fact, MatDuplicate only requires the matrix to be preallocated
2989      This may happen inside a DMCreateMatrix_Shell */
2990   if (oldmat->lvec) {
2991     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2992     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2993   }
2994   if (oldmat->Mvctx) {
2995     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2996     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2997   }
2998   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2999   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3000   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3001   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3002   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3003   *newmat = mat;
3004   PetscFunctionReturn(0);
3005 }
3006 
3007 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3008 {
3009   PetscBool      isbinary, ishdf5;
3010   PetscErrorCode ierr;
3011 
3012   PetscFunctionBegin;
3013   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
3014   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
3015   /* force binary viewer to load .info file if it has not yet done so */
3016   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3017   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3018   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
3019   if (isbinary) {
3020     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
3021   } else if (ishdf5) {
3022 #if defined(PETSC_HAVE_HDF5)
3023     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
3024 #else
3025     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3026 #endif
3027   } else {
3028     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3029   }
3030   PetscFunctionReturn(0);
3031 }
3032 
3033 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3034 {
3035   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3036   PetscInt       *rowidxs,*colidxs;
3037   PetscScalar    *matvals;
3038   PetscErrorCode ierr;
3039 
3040   PetscFunctionBegin;
3041   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3042 
3043   /* read in matrix header */
3044   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3045   PetscCheckFalse(header[0] != MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3046   M  = header[1]; N = header[2]; nz = header[3];
3047   PetscCheckFalse(M < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
3048   PetscCheckFalse(N < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
3049   PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3050 
3051   /* set block sizes from the viewer's .info file */
3052   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3053   /* set global sizes if not set already */
3054   if (mat->rmap->N < 0) mat->rmap->N = M;
3055   if (mat->cmap->N < 0) mat->cmap->N = N;
3056   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3057   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3058 
3059   /* check if the matrix sizes are correct */
3060   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3061   PetscCheckFalse(M != rows || N != cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3062 
3063   /* read in row lengths and build row indices */
3064   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3065   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3066   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3067   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3068   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr);
3069   PetscCheckFalse(sum != nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3070   /* read in column indices and matrix values */
3071   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3072   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3073   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3074   /* store matrix indices and values */
3075   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3076   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3077   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3078   PetscFunctionReturn(0);
3079 }
3080 
3081 /* Not scalable because of ISAllGather() unless getting all columns. */
3082 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3083 {
3084   PetscErrorCode ierr;
3085   IS             iscol_local;
3086   PetscBool      isstride;
3087   PetscMPIInt    lisstride=0,gisstride;
3088 
3089   PetscFunctionBegin;
3090   /* check if we are grabbing all columns*/
3091   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3092 
3093   if (isstride) {
3094     PetscInt  start,len,mstart,mlen;
3095     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3096     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3097     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3098     if (mstart == start && mlen-mstart == len) lisstride = 1;
3099   }
3100 
3101   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3102   if (gisstride) {
3103     PetscInt N;
3104     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3105     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3106     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3107     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3108   } else {
3109     PetscInt cbs;
3110     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3111     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3112     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3113   }
3114 
3115   *isseq = iscol_local;
3116   PetscFunctionReturn(0);
3117 }
3118 
3119 /*
3120  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3121  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3122 
3123  Input Parameters:
3124    mat - matrix
3125    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3126            i.e., mat->rstart <= isrow[i] < mat->rend
3127    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3128            i.e., mat->cstart <= iscol[i] < mat->cend
3129  Output Parameter:
3130    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3131    iscol_o - sequential column index set for retrieving mat->B
3132    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3133  */
3134 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3135 {
3136   PetscErrorCode ierr;
3137   Vec            x,cmap;
3138   const PetscInt *is_idx;
3139   PetscScalar    *xarray,*cmaparray;
3140   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3141   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3142   Mat            B=a->B;
3143   Vec            lvec=a->lvec,lcmap;
3144   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3145   MPI_Comm       comm;
3146   VecScatter     Mvctx=a->Mvctx;
3147 
3148   PetscFunctionBegin;
3149   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3150   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3151 
3152   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3153   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3154   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3155   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3156   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3157 
3158   /* Get start indices */
3159   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3160   isstart -= ncols;
3161   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3162 
3163   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3164   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3165   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3166   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3167   for (i=0; i<ncols; i++) {
3168     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3169     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3170     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3171   }
3172   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3173   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3174   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3175 
3176   /* Get iscol_d */
3177   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3178   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3179   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3180 
3181   /* Get isrow_d */
3182   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3183   rstart = mat->rmap->rstart;
3184   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3185   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3186   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3187   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3188 
3189   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3190   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3191   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3192 
3193   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3194   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3195   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3196 
3197   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3198 
3199   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3200   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3201 
3202   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3203   /* off-process column indices */
3204   count = 0;
3205   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3206   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3207 
3208   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3209   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3210   for (i=0; i<Bn; i++) {
3211     if (PetscRealPart(xarray[i]) > -1.0) {
3212       idx[count]     = i;                   /* local column index in off-diagonal part B */
3213       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3214       count++;
3215     }
3216   }
3217   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3218   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3219 
3220   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3221   /* cannot ensure iscol_o has same blocksize as iscol! */
3222 
3223   ierr = PetscFree(idx);CHKERRQ(ierr);
3224   *garray = cmap1;
3225 
3226   ierr = VecDestroy(&x);CHKERRQ(ierr);
3227   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3228   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3229   PetscFunctionReturn(0);
3230 }
3231 
3232 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3233 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3234 {
3235   PetscErrorCode ierr;
3236   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3237   Mat            M = NULL;
3238   MPI_Comm       comm;
3239   IS             iscol_d,isrow_d,iscol_o;
3240   Mat            Asub = NULL,Bsub = NULL;
3241   PetscInt       n;
3242 
3243   PetscFunctionBegin;
3244   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3245 
3246   if (call == MAT_REUSE_MATRIX) {
3247     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3248     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3249     PetscCheckFalse(!isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3250 
3251     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3252     PetscCheckFalse(!iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3253 
3254     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3255     PetscCheckFalse(!iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3256 
3257     /* Update diagonal and off-diagonal portions of submat */
3258     asub = (Mat_MPIAIJ*)(*submat)->data;
3259     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3260     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3261     if (n) {
3262       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3263     }
3264     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3265     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3266 
3267   } else { /* call == MAT_INITIAL_MATRIX) */
3268     const PetscInt *garray;
3269     PetscInt        BsubN;
3270 
3271     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3272     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3273 
3274     /* Create local submatrices Asub and Bsub */
3275     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3276     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3277 
3278     /* Create submatrix M */
3279     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3280 
3281     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3282     asub = (Mat_MPIAIJ*)M->data;
3283 
3284     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3285     n = asub->B->cmap->N;
3286     if (BsubN > n) {
3287       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3288       const PetscInt *idx;
3289       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3290       ierr = PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3291 
3292       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3293       j = 0;
3294       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3295       for (i=0; i<n; i++) {
3296         if (j >= BsubN) break;
3297         while (subgarray[i] > garray[j]) j++;
3298 
3299         if (subgarray[i] == garray[j]) {
3300           idx_new[i] = idx[j++];
3301         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]);
3302       }
3303       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3304 
3305       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3306       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3307 
3308     } else if (BsubN < n) {
3309       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N);
3310     }
3311 
3312     ierr = PetscFree(garray);CHKERRQ(ierr);
3313     *submat = M;
3314 
3315     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3316     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3317     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3318 
3319     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3320     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3321 
3322     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3323     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3324   }
3325   PetscFunctionReturn(0);
3326 }
3327 
3328 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3329 {
3330   PetscErrorCode ierr;
3331   IS             iscol_local=NULL,isrow_d;
3332   PetscInt       csize;
3333   PetscInt       n,i,j,start,end;
3334   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3335   MPI_Comm       comm;
3336 
3337   PetscFunctionBegin;
3338   /* If isrow has same processor distribution as mat,
3339      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3340   if (call == MAT_REUSE_MATRIX) {
3341     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3342     if (isrow_d) {
3343       sameRowDist  = PETSC_TRUE;
3344       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3345     } else {
3346       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3347       if (iscol_local) {
3348         sameRowDist  = PETSC_TRUE;
3349         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3350       }
3351     }
3352   } else {
3353     /* Check if isrow has same processor distribution as mat */
3354     sameDist[0] = PETSC_FALSE;
3355     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3356     if (!n) {
3357       sameDist[0] = PETSC_TRUE;
3358     } else {
3359       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3360       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3361       if (i >= start && j < end) {
3362         sameDist[0] = PETSC_TRUE;
3363       }
3364     }
3365 
3366     /* Check if iscol has same processor distribution as mat */
3367     sameDist[1] = PETSC_FALSE;
3368     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3369     if (!n) {
3370       sameDist[1] = PETSC_TRUE;
3371     } else {
3372       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3373       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3374       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3375     }
3376 
3377     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3378     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr);
3379     sameRowDist = tsameDist[0];
3380   }
3381 
3382   if (sameRowDist) {
3383     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3384       /* isrow and iscol have same processor distribution as mat */
3385       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3386       PetscFunctionReturn(0);
3387     } else { /* sameRowDist */
3388       /* isrow has same processor distribution as mat */
3389       if (call == MAT_INITIAL_MATRIX) {
3390         PetscBool sorted;
3391         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3392         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3393         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3394         PetscCheckFalse(n != i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i);
3395 
3396         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3397         if (sorted) {
3398           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3399           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3400           PetscFunctionReturn(0);
3401         }
3402       } else { /* call == MAT_REUSE_MATRIX */
3403         IS iscol_sub;
3404         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3405         if (iscol_sub) {
3406           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3407           PetscFunctionReturn(0);
3408         }
3409       }
3410     }
3411   }
3412 
3413   /* General case: iscol -> iscol_local which has global size of iscol */
3414   if (call == MAT_REUSE_MATRIX) {
3415     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3416     PetscCheckFalse(!iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3417   } else {
3418     if (!iscol_local) {
3419       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3420     }
3421   }
3422 
3423   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3424   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3425 
3426   if (call == MAT_INITIAL_MATRIX) {
3427     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3428     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3429   }
3430   PetscFunctionReturn(0);
3431 }
3432 
3433 /*@C
3434      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3435          and "off-diagonal" part of the matrix in CSR format.
3436 
3437    Collective
3438 
3439    Input Parameters:
3440 +  comm - MPI communicator
3441 .  A - "diagonal" portion of matrix
3442 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3443 -  garray - global index of B columns
3444 
3445    Output Parameter:
3446 .   mat - the matrix, with input A as its local diagonal matrix
3447    Level: advanced
3448 
3449    Notes:
3450        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3451        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3452 
3453 .seealso: MatCreateMPIAIJWithSplitArrays()
3454 @*/
3455 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3456 {
3457   PetscErrorCode    ierr;
3458   Mat_MPIAIJ        *maij;
3459   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3460   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3461   const PetscScalar *oa;
3462   Mat               Bnew;
3463   PetscInt          m,n,N;
3464 
3465   PetscFunctionBegin;
3466   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3467   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3468   PetscCheckFalse(m != B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N);
3469   PetscCheckFalse(A->rmap->bs != B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs);
3470   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3471   /* PetscCheckFalse(A->cmap->bs != B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3472 
3473   /* Get global columns of mat */
3474   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3475 
3476   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3477   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3478   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3479   maij = (Mat_MPIAIJ*)(*mat)->data;
3480 
3481   (*mat)->preallocated = PETSC_TRUE;
3482 
3483   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3484   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3485 
3486   /* Set A as diagonal portion of *mat */
3487   maij->A = A;
3488 
3489   nz = oi[m];
3490   for (i=0; i<nz; i++) {
3491     col   = oj[i];
3492     oj[i] = garray[col];
3493   }
3494 
3495   /* Set Bnew as off-diagonal portion of *mat */
3496   ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr);
3497   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr);
3498   ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr);
3499   bnew        = (Mat_SeqAIJ*)Bnew->data;
3500   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3501   maij->B     = Bnew;
3502 
3503   PetscCheckFalse(B->rmap->N != Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N);
3504 
3505   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3506   b->free_a       = PETSC_FALSE;
3507   b->free_ij      = PETSC_FALSE;
3508   ierr = MatDestroy(&B);CHKERRQ(ierr);
3509 
3510   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3511   bnew->free_a       = PETSC_TRUE;
3512   bnew->free_ij      = PETSC_TRUE;
3513 
3514   /* condense columns of maij->B */
3515   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3516   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3517   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3518   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3519   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3520   PetscFunctionReturn(0);
3521 }
3522 
3523 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3524 
3525 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3526 {
3527   PetscErrorCode ierr;
3528   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3529   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3530   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3531   Mat            M,Msub,B=a->B;
3532   MatScalar      *aa;
3533   Mat_SeqAIJ     *aij;
3534   PetscInt       *garray = a->garray,*colsub,Ncols;
3535   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3536   IS             iscol_sub,iscmap;
3537   const PetscInt *is_idx,*cmap;
3538   PetscBool      allcolumns=PETSC_FALSE;
3539   MPI_Comm       comm;
3540 
3541   PetscFunctionBegin;
3542   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3543   if (call == MAT_REUSE_MATRIX) {
3544     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3545     PetscCheckFalse(!iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3546     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3547 
3548     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3549     PetscCheckFalse(!iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3550 
3551     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3552     PetscCheckFalse(!Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3553 
3554     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3555 
3556   } else { /* call == MAT_INITIAL_MATRIX) */
3557     PetscBool flg;
3558 
3559     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3560     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3561 
3562     /* (1) iscol -> nonscalable iscol_local */
3563     /* Check for special case: each processor gets entire matrix columns */
3564     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3565     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3566     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3567     if (allcolumns) {
3568       iscol_sub = iscol_local;
3569       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3570       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3571 
3572     } else {
3573       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3574       PetscInt *idx,*cmap1,k;
3575       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3576       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3577       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3578       count = 0;
3579       k     = 0;
3580       for (i=0; i<Ncols; i++) {
3581         j = is_idx[i];
3582         if (j >= cstart && j < cend) {
3583           /* diagonal part of mat */
3584           idx[count]     = j;
3585           cmap1[count++] = i; /* column index in submat */
3586         } else if (Bn) {
3587           /* off-diagonal part of mat */
3588           if (j == garray[k]) {
3589             idx[count]     = j;
3590             cmap1[count++] = i;  /* column index in submat */
3591           } else if (j > garray[k]) {
3592             while (j > garray[k] && k < Bn-1) k++;
3593             if (j == garray[k]) {
3594               idx[count]     = j;
3595               cmap1[count++] = i; /* column index in submat */
3596             }
3597           }
3598         }
3599       }
3600       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3601 
3602       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3603       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3604       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3605 
3606       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3607     }
3608 
3609     /* (3) Create sequential Msub */
3610     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3611   }
3612 
3613   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3614   aij  = (Mat_SeqAIJ*)(Msub)->data;
3615   ii   = aij->i;
3616   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3617 
3618   /*
3619       m - number of local rows
3620       Ncols - number of columns (same on all processors)
3621       rstart - first row in new global matrix generated
3622   */
3623   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3624 
3625   if (call == MAT_INITIAL_MATRIX) {
3626     /* (4) Create parallel newmat */
3627     PetscMPIInt    rank,size;
3628     PetscInt       csize;
3629 
3630     ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3631     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3632 
3633     /*
3634         Determine the number of non-zeros in the diagonal and off-diagonal
3635         portions of the matrix in order to do correct preallocation
3636     */
3637 
3638     /* first get start and end of "diagonal" columns */
3639     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3640     if (csize == PETSC_DECIDE) {
3641       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3642       if (mglobal == Ncols) { /* square matrix */
3643         nlocal = m;
3644       } else {
3645         nlocal = Ncols/size + ((Ncols % size) > rank);
3646       }
3647     } else {
3648       nlocal = csize;
3649     }
3650     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3651     rstart = rend - nlocal;
3652     PetscCheckFalse(rank == size - 1 && rend != Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols);
3653 
3654     /* next, compute all the lengths */
3655     jj    = aij->j;
3656     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3657     olens = dlens + m;
3658     for (i=0; i<m; i++) {
3659       jend = ii[i+1] - ii[i];
3660       olen = 0;
3661       dlen = 0;
3662       for (j=0; j<jend; j++) {
3663         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3664         else dlen++;
3665         jj++;
3666       }
3667       olens[i] = olen;
3668       dlens[i] = dlen;
3669     }
3670 
3671     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3672     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3673 
3674     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3675     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3676     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3677     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3678     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3679     ierr = PetscFree(dlens);CHKERRQ(ierr);
3680 
3681   } else { /* call == MAT_REUSE_MATRIX */
3682     M    = *newmat;
3683     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3684     PetscCheckFalse(i != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3685     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3686     /*
3687          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3688        rather than the slower MatSetValues().
3689     */
3690     M->was_assembled = PETSC_TRUE;
3691     M->assembled     = PETSC_FALSE;
3692   }
3693 
3694   /* (5) Set values of Msub to *newmat */
3695   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3696   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3697 
3698   jj   = aij->j;
3699   ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3700   for (i=0; i<m; i++) {
3701     row = rstart + i;
3702     nz  = ii[i+1] - ii[i];
3703     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3704     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3705     jj += nz; aa += nz;
3706   }
3707   ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3708   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3709 
3710   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3711   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3712 
3713   ierr = PetscFree(colsub);CHKERRQ(ierr);
3714 
3715   /* save Msub, iscol_sub and iscmap used in processor for next request */
3716   if (call == MAT_INITIAL_MATRIX) {
3717     *newmat = M;
3718     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3719     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3720 
3721     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3722     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3723 
3724     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3725     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3726 
3727     if (iscol_local) {
3728       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3729       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3730     }
3731   }
3732   PetscFunctionReturn(0);
3733 }
3734 
3735 /*
3736     Not great since it makes two copies of the submatrix, first an SeqAIJ
3737   in local and then by concatenating the local matrices the end result.
3738   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3739 
3740   Note: This requires a sequential iscol with all indices.
3741 */
3742 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3743 {
3744   PetscErrorCode ierr;
3745   PetscMPIInt    rank,size;
3746   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3747   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3748   Mat            M,Mreuse;
3749   MatScalar      *aa,*vwork;
3750   MPI_Comm       comm;
3751   Mat_SeqAIJ     *aij;
3752   PetscBool      colflag,allcolumns=PETSC_FALSE;
3753 
3754   PetscFunctionBegin;
3755   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3756   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3757   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3758 
3759   /* Check for special case: each processor gets entire matrix columns */
3760   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3761   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3762   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3763   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3764 
3765   if (call ==  MAT_REUSE_MATRIX) {
3766     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3767     PetscCheckFalse(!Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3768     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3769   } else {
3770     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3771   }
3772 
3773   /*
3774       m - number of local rows
3775       n - number of columns (same on all processors)
3776       rstart - first row in new global matrix generated
3777   */
3778   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3779   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3780   if (call == MAT_INITIAL_MATRIX) {
3781     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3782     ii  = aij->i;
3783     jj  = aij->j;
3784 
3785     /*
3786         Determine the number of non-zeros in the diagonal and off-diagonal
3787         portions of the matrix in order to do correct preallocation
3788     */
3789 
3790     /* first get start and end of "diagonal" columns */
3791     if (csize == PETSC_DECIDE) {
3792       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3793       if (mglobal == n) { /* square matrix */
3794         nlocal = m;
3795       } else {
3796         nlocal = n/size + ((n % size) > rank);
3797       }
3798     } else {
3799       nlocal = csize;
3800     }
3801     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3802     rstart = rend - nlocal;
3803     PetscCheckFalse(rank == size - 1 && rend != n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
3804 
3805     /* next, compute all the lengths */
3806     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3807     olens = dlens + m;
3808     for (i=0; i<m; i++) {
3809       jend = ii[i+1] - ii[i];
3810       olen = 0;
3811       dlen = 0;
3812       for (j=0; j<jend; j++) {
3813         if (*jj < rstart || *jj >= rend) olen++;
3814         else dlen++;
3815         jj++;
3816       }
3817       olens[i] = olen;
3818       dlens[i] = dlen;
3819     }
3820     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3821     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3822     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3823     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3824     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3825     ierr = PetscFree(dlens);CHKERRQ(ierr);
3826   } else {
3827     PetscInt ml,nl;
3828 
3829     M    = *newmat;
3830     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3831     PetscCheckFalse(ml != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3832     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3833     /*
3834          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3835        rather than the slower MatSetValues().
3836     */
3837     M->was_assembled = PETSC_TRUE;
3838     M->assembled     = PETSC_FALSE;
3839   }
3840   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3841   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3842   ii   = aij->i;
3843   jj   = aij->j;
3844 
3845   /* trigger copy to CPU if needed */
3846   ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3847   for (i=0; i<m; i++) {
3848     row   = rstart + i;
3849     nz    = ii[i+1] - ii[i];
3850     cwork = jj; jj += nz;
3851     vwork = aa; aa += nz;
3852     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3853   }
3854   ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3855 
3856   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3857   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3858   *newmat = M;
3859 
3860   /* save submatrix used in processor for next request */
3861   if (call ==  MAT_INITIAL_MATRIX) {
3862     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3863     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3864   }
3865   PetscFunctionReturn(0);
3866 }
3867 
3868 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3869 {
3870   PetscInt       m,cstart, cend,j,nnz,i,d;
3871   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3872   const PetscInt *JJ;
3873   PetscErrorCode ierr;
3874   PetscBool      nooffprocentries;
3875 
3876   PetscFunctionBegin;
3877   PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]);
3878 
3879   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3880   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3881   m      = B->rmap->n;
3882   cstart = B->cmap->rstart;
3883   cend   = B->cmap->rend;
3884   rstart = B->rmap->rstart;
3885 
3886   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3887 
3888   if (PetscDefined(USE_DEBUG)) {
3889     for (i=0; i<m; i++) {
3890       nnz = Ii[i+1]- Ii[i];
3891       JJ  = J + Ii[i];
3892       PetscCheckFalse(nnz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz);
3893       PetscCheckFalse(nnz && (JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]);
3894       PetscCheckFalse(nnz && (JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N);
3895     }
3896   }
3897 
3898   for (i=0; i<m; i++) {
3899     nnz     = Ii[i+1]- Ii[i];
3900     JJ      = J + Ii[i];
3901     nnz_max = PetscMax(nnz_max,nnz);
3902     d       = 0;
3903     for (j=0; j<nnz; j++) {
3904       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3905     }
3906     d_nnz[i] = d;
3907     o_nnz[i] = nnz - d;
3908   }
3909   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3910   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3911 
3912   for (i=0; i<m; i++) {
3913     ii   = i + rstart;
3914     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3915   }
3916   nooffprocentries    = B->nooffprocentries;
3917   B->nooffprocentries = PETSC_TRUE;
3918   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3919   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3920   B->nooffprocentries = nooffprocentries;
3921 
3922   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3923   PetscFunctionReturn(0);
3924 }
3925 
3926 /*@
3927    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3928    (the default parallel PETSc format).
3929 
3930    Collective
3931 
3932    Input Parameters:
3933 +  B - the matrix
3934 .  i - the indices into j for the start of each local row (starts with zero)
3935 .  j - the column indices for each local row (starts with zero)
3936 -  v - optional values in the matrix
3937 
3938    Level: developer
3939 
3940    Notes:
3941        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3942      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3943      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3944 
3945        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3946 
3947        The format which is used for the sparse matrix input, is equivalent to a
3948     row-major ordering.. i.e for the following matrix, the input data expected is
3949     as shown
3950 
3951 $        1 0 0
3952 $        2 0 3     P0
3953 $       -------
3954 $        4 5 6     P1
3955 $
3956 $     Process0 [P0]: rows_owned=[0,1]
3957 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3958 $        j =  {0,0,2}  [size = 3]
3959 $        v =  {1,2,3}  [size = 3]
3960 $
3961 $     Process1 [P1]: rows_owned=[2]
3962 $        i =  {0,3}    [size = nrow+1  = 1+1]
3963 $        j =  {0,1,2}  [size = 3]
3964 $        v =  {4,5,6}  [size = 3]
3965 
3966 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3967           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3968 @*/
3969 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3970 {
3971   PetscErrorCode ierr;
3972 
3973   PetscFunctionBegin;
3974   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3975   PetscFunctionReturn(0);
3976 }
3977 
3978 /*@C
3979    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3980    (the default parallel PETSc format).  For good matrix assembly performance
3981    the user should preallocate the matrix storage by setting the parameters
3982    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3983    performance can be increased by more than a factor of 50.
3984 
3985    Collective
3986 
3987    Input Parameters:
3988 +  B - the matrix
3989 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3990            (same value is used for all local rows)
3991 .  d_nnz - array containing the number of nonzeros in the various rows of the
3992            DIAGONAL portion of the local submatrix (possibly different for each row)
3993            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3994            The size of this array is equal to the number of local rows, i.e 'm'.
3995            For matrices that will be factored, you must leave room for (and set)
3996            the diagonal entry even if it is zero.
3997 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3998            submatrix (same value is used for all local rows).
3999 -  o_nnz - array containing the number of nonzeros in the various rows of the
4000            OFF-DIAGONAL portion of the local submatrix (possibly different for
4001            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4002            structure. The size of this array is equal to the number
4003            of local rows, i.e 'm'.
4004 
4005    If the *_nnz parameter is given then the *_nz parameter is ignored
4006 
4007    The AIJ format (also called the Yale sparse matrix format or
4008    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4009    storage.  The stored row and column indices begin with zero.
4010    See Users-Manual: ch_mat for details.
4011 
4012    The parallel matrix is partitioned such that the first m0 rows belong to
4013    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4014    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4015 
4016    The DIAGONAL portion of the local submatrix of a processor can be defined
4017    as the submatrix which is obtained by extraction the part corresponding to
4018    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4019    first row that belongs to the processor, r2 is the last row belonging to
4020    the this processor, and c1-c2 is range of indices of the local part of a
4021    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4022    common case of a square matrix, the row and column ranges are the same and
4023    the DIAGONAL part is also square. The remaining portion of the local
4024    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4025 
4026    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4027 
4028    You can call MatGetInfo() to get information on how effective the preallocation was;
4029    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4030    You can also run with the option -info and look for messages with the string
4031    malloc in them to see if additional memory allocation was needed.
4032 
4033    Example usage:
4034 
4035    Consider the following 8x8 matrix with 34 non-zero values, that is
4036    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4037    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4038    as follows:
4039 
4040 .vb
4041             1  2  0  |  0  3  0  |  0  4
4042     Proc0   0  5  6  |  7  0  0  |  8  0
4043             9  0 10  | 11  0  0  | 12  0
4044     -------------------------------------
4045            13  0 14  | 15 16 17  |  0  0
4046     Proc1   0 18  0  | 19 20 21  |  0  0
4047             0  0  0  | 22 23  0  | 24  0
4048     -------------------------------------
4049     Proc2  25 26 27  |  0  0 28  | 29  0
4050            30  0  0  | 31 32 33  |  0 34
4051 .ve
4052 
4053    This can be represented as a collection of submatrices as:
4054 
4055 .vb
4056       A B C
4057       D E F
4058       G H I
4059 .ve
4060 
4061    Where the submatrices A,B,C are owned by proc0, D,E,F are
4062    owned by proc1, G,H,I are owned by proc2.
4063 
4064    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4065    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4066    The 'M','N' parameters are 8,8, and have the same values on all procs.
4067 
4068    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4069    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4070    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4071    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4072    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4073    matrix, ans [DF] as another SeqAIJ matrix.
4074 
4075    When d_nz, o_nz parameters are specified, d_nz storage elements are
4076    allocated for every row of the local diagonal submatrix, and o_nz
4077    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4078    One way to choose d_nz and o_nz is to use the max nonzerors per local
4079    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4080    In this case, the values of d_nz,o_nz are:
4081 .vb
4082      proc0 : dnz = 2, o_nz = 2
4083      proc1 : dnz = 3, o_nz = 2
4084      proc2 : dnz = 1, o_nz = 4
4085 .ve
4086    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4087    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4088    for proc3. i.e we are using 12+15+10=37 storage locations to store
4089    34 values.
4090 
4091    When d_nnz, o_nnz parameters are specified, the storage is specified
4092    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4093    In the above case the values for d_nnz,o_nnz are:
4094 .vb
4095      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4096      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4097      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4098 .ve
4099    Here the space allocated is sum of all the above values i.e 34, and
4100    hence pre-allocation is perfect.
4101 
4102    Level: intermediate
4103 
4104 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4105           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4106 @*/
4107 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4108 {
4109   PetscErrorCode ierr;
4110 
4111   PetscFunctionBegin;
4112   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4113   PetscValidType(B,1);
4114   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4115   PetscFunctionReturn(0);
4116 }
4117 
4118 /*@
4119      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4120          CSR format for the local rows.
4121 
4122    Collective
4123 
4124    Input Parameters:
4125 +  comm - MPI communicator
4126 .  m - number of local rows (Cannot be PETSC_DECIDE)
4127 .  n - This value should be the same as the local size used in creating the
4128        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4129        calculated if N is given) For square matrices n is almost always m.
4130 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4131 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4132 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4133 .   j - column indices
4134 -   a - matrix values
4135 
4136    Output Parameter:
4137 .   mat - the matrix
4138 
4139    Level: intermediate
4140 
4141    Notes:
4142        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4143      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4144      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4145 
4146        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4147 
4148        The format which is used for the sparse matrix input, is equivalent to a
4149     row-major ordering.. i.e for the following matrix, the input data expected is
4150     as shown
4151 
4152        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4153 
4154 $        1 0 0
4155 $        2 0 3     P0
4156 $       -------
4157 $        4 5 6     P1
4158 $
4159 $     Process0 [P0]: rows_owned=[0,1]
4160 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4161 $        j =  {0,0,2}  [size = 3]
4162 $        v =  {1,2,3}  [size = 3]
4163 $
4164 $     Process1 [P1]: rows_owned=[2]
4165 $        i =  {0,3}    [size = nrow+1  = 1+1]
4166 $        j =  {0,1,2}  [size = 3]
4167 $        v =  {4,5,6}  [size = 3]
4168 
4169 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4170           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4171 @*/
4172 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4173 {
4174   PetscErrorCode ierr;
4175 
4176   PetscFunctionBegin;
4177   PetscCheckFalse(i && i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4178   PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4179   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4180   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4181   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4182   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4183   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4184   PetscFunctionReturn(0);
4185 }
4186 
4187 /*@
4188      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4189          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4190 
4191    Collective
4192 
4193    Input Parameters:
4194 +  mat - the matrix
4195 .  m - number of local rows (Cannot be PETSC_DECIDE)
4196 .  n - This value should be the same as the local size used in creating the
4197        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4198        calculated if N is given) For square matrices n is almost always m.
4199 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4200 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4201 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4202 .  J - column indices
4203 -  v - matrix values
4204 
4205    Level: intermediate
4206 
4207 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4208           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4209 @*/
4210 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4211 {
4212   PetscErrorCode ierr;
4213   PetscInt       cstart,nnz,i,j;
4214   PetscInt       *ld;
4215   PetscBool      nooffprocentries;
4216   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4217   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4218   PetscScalar    *ad,*ao;
4219   const PetscInt *Adi = Ad->i;
4220   PetscInt       ldi,Iii,md;
4221 
4222   PetscFunctionBegin;
4223   PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4224   PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4225   PetscCheckFalse(m != mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4226   PetscCheckFalse(n != mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4227 
4228   ierr = MatSeqAIJGetArrayWrite(Aij->A,&ad);CHKERRQ(ierr);
4229   ierr = MatSeqAIJGetArrayWrite(Aij->B,&ao);CHKERRQ(ierr);
4230   cstart = mat->cmap->rstart;
4231   if (!Aij->ld) {
4232     /* count number of entries below block diagonal */
4233     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4234     Aij->ld = ld;
4235     for (i=0; i<m; i++) {
4236       nnz  = Ii[i+1]- Ii[i];
4237       j     = 0;
4238       while  (J[j] < cstart && j < nnz) {j++;}
4239       J    += nnz;
4240       ld[i] = j;
4241     }
4242   } else {
4243     ld = Aij->ld;
4244   }
4245 
4246   for (i=0; i<m; i++) {
4247     nnz  = Ii[i+1]- Ii[i];
4248     Iii  = Ii[i];
4249     ldi  = ld[i];
4250     md   = Adi[i+1]-Adi[i];
4251     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4252     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4253     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4254     ad  += md;
4255     ao  += nnz - md;
4256   }
4257   nooffprocentries      = mat->nooffprocentries;
4258   mat->nooffprocentries = PETSC_TRUE;
4259   ierr = MatSeqAIJRestoreArrayWrite(Aij->A,&ad);CHKERRQ(ierr);
4260   ierr = MatSeqAIJRestoreArrayWrite(Aij->B,&ao);CHKERRQ(ierr);
4261   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4262   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4263   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4264   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4265   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4266   mat->nooffprocentries = nooffprocentries;
4267   PetscFunctionReturn(0);
4268 }
4269 
4270 /*@C
4271    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4272    (the default parallel PETSc format).  For good matrix assembly performance
4273    the user should preallocate the matrix storage by setting the parameters
4274    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4275    performance can be increased by more than a factor of 50.
4276 
4277    Collective
4278 
4279    Input Parameters:
4280 +  comm - MPI communicator
4281 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4282            This value should be the same as the local size used in creating the
4283            y vector for the matrix-vector product y = Ax.
4284 .  n - This value should be the same as the local size used in creating the
4285        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4286        calculated if N is given) For square matrices n is almost always m.
4287 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4288 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4289 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4290            (same value is used for all local rows)
4291 .  d_nnz - array containing the number of nonzeros in the various rows of the
4292            DIAGONAL portion of the local submatrix (possibly different for each row)
4293            or NULL, if d_nz is used to specify the nonzero structure.
4294            The size of this array is equal to the number of local rows, i.e 'm'.
4295 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4296            submatrix (same value is used for all local rows).
4297 -  o_nnz - array containing the number of nonzeros in the various rows of the
4298            OFF-DIAGONAL portion of the local submatrix (possibly different for
4299            each row) or NULL, if o_nz is used to specify the nonzero
4300            structure. The size of this array is equal to the number
4301            of local rows, i.e 'm'.
4302 
4303    Output Parameter:
4304 .  A - the matrix
4305 
4306    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4307    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4308    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4309 
4310    Notes:
4311    If the *_nnz parameter is given then the *_nz parameter is ignored
4312 
4313    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4314    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4315    storage requirements for this matrix.
4316 
4317    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4318    processor than it must be used on all processors that share the object for
4319    that argument.
4320 
4321    The user MUST specify either the local or global matrix dimensions
4322    (possibly both).
4323 
4324    The parallel matrix is partitioned across processors such that the
4325    first m0 rows belong to process 0, the next m1 rows belong to
4326    process 1, the next m2 rows belong to process 2 etc.. where
4327    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4328    values corresponding to [m x N] submatrix.
4329 
4330    The columns are logically partitioned with the n0 columns belonging
4331    to 0th partition, the next n1 columns belonging to the next
4332    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4333 
4334    The DIAGONAL portion of the local submatrix on any given processor
4335    is the submatrix corresponding to the rows and columns m,n
4336    corresponding to the given processor. i.e diagonal matrix on
4337    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4338    etc. The remaining portion of the local submatrix [m x (N-n)]
4339    constitute the OFF-DIAGONAL portion. The example below better
4340    illustrates this concept.
4341 
4342    For a square global matrix we define each processor's diagonal portion
4343    to be its local rows and the corresponding columns (a square submatrix);
4344    each processor's off-diagonal portion encompasses the remainder of the
4345    local matrix (a rectangular submatrix).
4346 
4347    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4348 
4349    When calling this routine with a single process communicator, a matrix of
4350    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4351    type of communicator, use the construction mechanism
4352 .vb
4353      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4354 .ve
4355 
4356 $     MatCreate(...,&A);
4357 $     MatSetType(A,MATMPIAIJ);
4358 $     MatSetSizes(A, m,n,M,N);
4359 $     MatMPIAIJSetPreallocation(A,...);
4360 
4361    By default, this format uses inodes (identical nodes) when possible.
4362    We search for consecutive rows with the same nonzero structure, thereby
4363    reusing matrix information to achieve increased efficiency.
4364 
4365    Options Database Keys:
4366 +  -mat_no_inode  - Do not use inodes
4367 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4368 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
4369         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4370         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.
4371 
4372    Example usage:
4373 
4374    Consider the following 8x8 matrix with 34 non-zero values, that is
4375    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4376    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4377    as follows
4378 
4379 .vb
4380             1  2  0  |  0  3  0  |  0  4
4381     Proc0   0  5  6  |  7  0  0  |  8  0
4382             9  0 10  | 11  0  0  | 12  0
4383     -------------------------------------
4384            13  0 14  | 15 16 17  |  0  0
4385     Proc1   0 18  0  | 19 20 21  |  0  0
4386             0  0  0  | 22 23  0  | 24  0
4387     -------------------------------------
4388     Proc2  25 26 27  |  0  0 28  | 29  0
4389            30  0  0  | 31 32 33  |  0 34
4390 .ve
4391 
4392    This can be represented as a collection of submatrices as
4393 
4394 .vb
4395       A B C
4396       D E F
4397       G H I
4398 .ve
4399 
4400    Where the submatrices A,B,C are owned by proc0, D,E,F are
4401    owned by proc1, G,H,I are owned by proc2.
4402 
4403    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4404    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4405    The 'M','N' parameters are 8,8, and have the same values on all procs.
4406 
4407    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4408    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4409    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4410    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4411    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4412    matrix, ans [DF] as another SeqAIJ matrix.
4413 
4414    When d_nz, o_nz parameters are specified, d_nz storage elements are
4415    allocated for every row of the local diagonal submatrix, and o_nz
4416    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4417    One way to choose d_nz and o_nz is to use the max nonzerors per local
4418    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4419    In this case, the values of d_nz,o_nz are
4420 .vb
4421      proc0 : dnz = 2, o_nz = 2
4422      proc1 : dnz = 3, o_nz = 2
4423      proc2 : dnz = 1, o_nz = 4
4424 .ve
4425    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4426    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4427    for proc3. i.e we are using 12+15+10=37 storage locations to store
4428    34 values.
4429 
4430    When d_nnz, o_nnz parameters are specified, the storage is specified
4431    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4432    In the above case the values for d_nnz,o_nnz are
4433 .vb
4434      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4435      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4436      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4437 .ve
4438    Here the space allocated is sum of all the above values i.e 34, and
4439    hence pre-allocation is perfect.
4440 
4441    Level: intermediate
4442 
4443 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4444           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4445 @*/
4446 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4447 {
4448   PetscErrorCode ierr;
4449   PetscMPIInt    size;
4450 
4451   PetscFunctionBegin;
4452   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4453   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4454   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4455   if (size > 1) {
4456     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4457     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4458   } else {
4459     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4460     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4461   }
4462   PetscFunctionReturn(0);
4463 }
4464 
4465 /*@C
4466   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4467 
4468   Not collective
4469 
4470   Input Parameter:
4471 . A - The MPIAIJ matrix
4472 
4473   Output Parameters:
4474 + Ad - The local diagonal block as a SeqAIJ matrix
4475 . Ao - The local off-diagonal block as a SeqAIJ matrix
4476 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4477 
4478   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4479   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4480   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4481   local column numbers to global column numbers in the original matrix.
4482 
4483   Level: intermediate
4484 
4485 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4486 @*/
4487 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4488 {
4489   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4490   PetscBool      flg;
4491   PetscErrorCode ierr;
4492 
4493   PetscFunctionBegin;
4494   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4495   PetscCheckFalse(!flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4496   if (Ad)     *Ad     = a->A;
4497   if (Ao)     *Ao     = a->B;
4498   if (colmap) *colmap = a->garray;
4499   PetscFunctionReturn(0);
4500 }
4501 
4502 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4503 {
4504   PetscErrorCode ierr;
4505   PetscInt       m,N,i,rstart,nnz,Ii;
4506   PetscInt       *indx;
4507   PetscScalar    *values;
4508   MatType        rootType;
4509 
4510   PetscFunctionBegin;
4511   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4512   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4513     PetscInt       *dnz,*onz,sum,bs,cbs;
4514 
4515     if (n == PETSC_DECIDE) {
4516       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4517     }
4518     /* Check sum(n) = N */
4519     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4520     PetscCheckFalse(sum != N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
4521 
4522     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4523     rstart -= m;
4524 
4525     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4526     for (i=0; i<m; i++) {
4527       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4528       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4529       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4530     }
4531 
4532     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4533     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4534     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4535     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4536     ierr = MatGetRootType_Private(inmat,&rootType);CHKERRQ(ierr);
4537     ierr = MatSetType(*outmat,rootType);CHKERRQ(ierr);
4538     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4539     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4540     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4541     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
4542   }
4543 
4544   /* numeric phase */
4545   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4546   for (i=0; i<m; i++) {
4547     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4548     Ii   = i + rstart;
4549     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4550     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4551   }
4552   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4553   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4554   PetscFunctionReturn(0);
4555 }
4556 
4557 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4558 {
4559   PetscErrorCode    ierr;
4560   PetscMPIInt       rank;
4561   PetscInt          m,N,i,rstart,nnz;
4562   size_t            len;
4563   const PetscInt    *indx;
4564   PetscViewer       out;
4565   char              *name;
4566   Mat               B;
4567   const PetscScalar *values;
4568 
4569   PetscFunctionBegin;
4570   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4571   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4572   /* Should this be the type of the diagonal block of A? */
4573   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4574   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4575   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4576   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4577   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4578   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4579   for (i=0; i<m; i++) {
4580     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4581     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4582     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4583   }
4584   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4585   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4586 
4587   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
4588   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4589   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4590   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4591   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4592   ierr = PetscFree(name);CHKERRQ(ierr);
4593   ierr = MatView(B,out);CHKERRQ(ierr);
4594   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4595   ierr = MatDestroy(&B);CHKERRQ(ierr);
4596   PetscFunctionReturn(0);
4597 }
4598 
4599 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4600 {
4601   PetscErrorCode      ierr;
4602   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4603 
4604   PetscFunctionBegin;
4605   if (!merge) PetscFunctionReturn(0);
4606   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4607   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4608   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4609   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4610   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4611   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4612   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4613   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4614   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4615   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4616   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4617   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4618   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4619   ierr = PetscFree(merge);CHKERRQ(ierr);
4620   PetscFunctionReturn(0);
4621 }
4622 
4623 #include <../src/mat/utils/freespace.h>
4624 #include <petscbt.h>
4625 
4626 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4627 {
4628   PetscErrorCode      ierr;
4629   MPI_Comm            comm;
4630   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4631   PetscMPIInt         size,rank,taga,*len_s;
4632   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4633   PetscInt            proc,m;
4634   PetscInt            **buf_ri,**buf_rj;
4635   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4636   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4637   MPI_Request         *s_waits,*r_waits;
4638   MPI_Status          *status;
4639   const MatScalar     *aa,*a_a;
4640   MatScalar           **abuf_r,*ba_i;
4641   Mat_Merge_SeqsToMPI *merge;
4642   PetscContainer      container;
4643 
4644   PetscFunctionBegin;
4645   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4646   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4647 
4648   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4649   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4650 
4651   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4652   PetscCheckFalse(!container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4653   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4654   ierr = MatSeqAIJGetArrayRead(seqmat,&a_a);CHKERRQ(ierr);
4655   aa   = a_a;
4656 
4657   bi     = merge->bi;
4658   bj     = merge->bj;
4659   buf_ri = merge->buf_ri;
4660   buf_rj = merge->buf_rj;
4661 
4662   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4663   owners = merge->rowmap->range;
4664   len_s  = merge->len_s;
4665 
4666   /* send and recv matrix values */
4667   /*-----------------------------*/
4668   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4669   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4670 
4671   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4672   for (proc=0,k=0; proc<size; proc++) {
4673     if (!len_s[proc]) continue;
4674     i    = owners[proc];
4675     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr);
4676     k++;
4677   }
4678 
4679   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);}
4680   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);}
4681   ierr = PetscFree(status);CHKERRQ(ierr);
4682 
4683   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4684   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4685 
4686   /* insert mat values of mpimat */
4687   /*----------------------------*/
4688   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4689   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4690 
4691   for (k=0; k<merge->nrecv; k++) {
4692     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4693     nrows       = *(buf_ri_k[k]);
4694     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4695     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4696   }
4697 
4698   /* set values of ba */
4699   m    = merge->rowmap->n;
4700   for (i=0; i<m; i++) {
4701     arow = owners[rank] + i;
4702     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4703     bnzi = bi[i+1] - bi[i];
4704     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4705 
4706     /* add local non-zero vals of this proc's seqmat into ba */
4707     anzi   = ai[arow+1] - ai[arow];
4708     aj     = a->j + ai[arow];
4709     aa     = a_a + ai[arow];
4710     nextaj = 0;
4711     for (j=0; nextaj<anzi; j++) {
4712       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4713         ba_i[j] += aa[nextaj++];
4714       }
4715     }
4716 
4717     /* add received vals into ba */
4718     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4719       /* i-th row */
4720       if (i == *nextrow[k]) {
4721         anzi   = *(nextai[k]+1) - *nextai[k];
4722         aj     = buf_rj[k] + *(nextai[k]);
4723         aa     = abuf_r[k] + *(nextai[k]);
4724         nextaj = 0;
4725         for (j=0; nextaj<anzi; j++) {
4726           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4727             ba_i[j] += aa[nextaj++];
4728           }
4729         }
4730         nextrow[k]++; nextai[k]++;
4731       }
4732     }
4733     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4734   }
4735   ierr = MatSeqAIJRestoreArrayRead(seqmat,&a_a);CHKERRQ(ierr);
4736   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4737   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4738 
4739   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4740   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4741   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4742   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4743   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4744   PetscFunctionReturn(0);
4745 }
4746 
4747 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4748 {
4749   PetscErrorCode      ierr;
4750   Mat                 B_mpi;
4751   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4752   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4753   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4754   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4755   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4756   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4757   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4758   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4759   MPI_Status          *status;
4760   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4761   PetscBT             lnkbt;
4762   Mat_Merge_SeqsToMPI *merge;
4763   PetscContainer      container;
4764 
4765   PetscFunctionBegin;
4766   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4767 
4768   /* make sure it is a PETSc comm */
4769   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4770   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4771   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4772 
4773   ierr = PetscNew(&merge);CHKERRQ(ierr);
4774   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4775 
4776   /* determine row ownership */
4777   /*---------------------------------------------------------*/
4778   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4779   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4780   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4781   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4782   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4783   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4784   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4785 
4786   m      = merge->rowmap->n;
4787   owners = merge->rowmap->range;
4788 
4789   /* determine the number of messages to send, their lengths */
4790   /*---------------------------------------------------------*/
4791   len_s = merge->len_s;
4792 
4793   len          = 0; /* length of buf_si[] */
4794   merge->nsend = 0;
4795   for (proc=0; proc<size; proc++) {
4796     len_si[proc] = 0;
4797     if (proc == rank) {
4798       len_s[proc] = 0;
4799     } else {
4800       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4801       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4802     }
4803     if (len_s[proc]) {
4804       merge->nsend++;
4805       nrows = 0;
4806       for (i=owners[proc]; i<owners[proc+1]; i++) {
4807         if (ai[i+1] > ai[i]) nrows++;
4808       }
4809       len_si[proc] = 2*(nrows+1);
4810       len         += len_si[proc];
4811     }
4812   }
4813 
4814   /* determine the number and length of messages to receive for ij-structure */
4815   /*-------------------------------------------------------------------------*/
4816   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4817   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4818 
4819   /* post the Irecv of j-structure */
4820   /*-------------------------------*/
4821   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4822   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4823 
4824   /* post the Isend of j-structure */
4825   /*--------------------------------*/
4826   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4827 
4828   for (proc=0, k=0; proc<size; proc++) {
4829     if (!len_s[proc]) continue;
4830     i    = owners[proc];
4831     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr);
4832     k++;
4833   }
4834 
4835   /* receives and sends of j-structure are complete */
4836   /*------------------------------------------------*/
4837   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);}
4838   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);}
4839 
4840   /* send and recv i-structure */
4841   /*---------------------------*/
4842   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4843   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4844 
4845   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4846   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4847   for (proc=0,k=0; proc<size; proc++) {
4848     if (!len_s[proc]) continue;
4849     /* form outgoing message for i-structure:
4850          buf_si[0]:                 nrows to be sent
4851                [1:nrows]:           row index (global)
4852                [nrows+1:2*nrows+1]: i-structure index
4853     */
4854     /*-------------------------------------------*/
4855     nrows       = len_si[proc]/2 - 1;
4856     buf_si_i    = buf_si + nrows+1;
4857     buf_si[0]   = nrows;
4858     buf_si_i[0] = 0;
4859     nrows       = 0;
4860     for (i=owners[proc]; i<owners[proc+1]; i++) {
4861       anzi = ai[i+1] - ai[i];
4862       if (anzi) {
4863         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4864         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4865         nrows++;
4866       }
4867     }
4868     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr);
4869     k++;
4870     buf_si += len_si[proc];
4871   }
4872 
4873   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);}
4874   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);}
4875 
4876   ierr = PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4877   for (i=0; i<merge->nrecv; i++) {
4878     ierr = PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4879   }
4880 
4881   ierr = PetscFree(len_si);CHKERRQ(ierr);
4882   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4883   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4884   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4885   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4886   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4887   ierr = PetscFree(status);CHKERRQ(ierr);
4888 
4889   /* compute a local seq matrix in each processor */
4890   /*----------------------------------------------*/
4891   /* allocate bi array and free space for accumulating nonzero column info */
4892   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4893   bi[0] = 0;
4894 
4895   /* create and initialize a linked list */
4896   nlnk = N+1;
4897   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4898 
4899   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4900   len  = ai[owners[rank+1]] - ai[owners[rank]];
4901   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4902 
4903   current_space = free_space;
4904 
4905   /* determine symbolic info for each local row */
4906   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4907 
4908   for (k=0; k<merge->nrecv; k++) {
4909     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4910     nrows       = *buf_ri_k[k];
4911     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4912     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4913   }
4914 
4915   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4916   len  = 0;
4917   for (i=0; i<m; i++) {
4918     bnzi = 0;
4919     /* add local non-zero cols of this proc's seqmat into lnk */
4920     arow  = owners[rank] + i;
4921     anzi  = ai[arow+1] - ai[arow];
4922     aj    = a->j + ai[arow];
4923     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4924     bnzi += nlnk;
4925     /* add received col data into lnk */
4926     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4927       if (i == *nextrow[k]) { /* i-th row */
4928         anzi  = *(nextai[k]+1) - *nextai[k];
4929         aj    = buf_rj[k] + *nextai[k];
4930         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4931         bnzi += nlnk;
4932         nextrow[k]++; nextai[k]++;
4933       }
4934     }
4935     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4936 
4937     /* if free space is not available, make more free space */
4938     if (current_space->local_remaining<bnzi) {
4939       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4940       nspacedouble++;
4941     }
4942     /* copy data into free space, then initialize lnk */
4943     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4944     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4945 
4946     current_space->array           += bnzi;
4947     current_space->local_used      += bnzi;
4948     current_space->local_remaining -= bnzi;
4949 
4950     bi[i+1] = bi[i] + bnzi;
4951   }
4952 
4953   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4954 
4955   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4956   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4957   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4958 
4959   /* create symbolic parallel matrix B_mpi */
4960   /*---------------------------------------*/
4961   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4962   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4963   if (n==PETSC_DECIDE) {
4964     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4965   } else {
4966     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4967   }
4968   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4969   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4970   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4971   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4972   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4973 
4974   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4975   B_mpi->assembled  = PETSC_FALSE;
4976   merge->bi         = bi;
4977   merge->bj         = bj;
4978   merge->buf_ri     = buf_ri;
4979   merge->buf_rj     = buf_rj;
4980   merge->coi        = NULL;
4981   merge->coj        = NULL;
4982   merge->owners_co  = NULL;
4983 
4984   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4985 
4986   /* attach the supporting struct to B_mpi for reuse */
4987   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4988   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4989   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
4990   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4991   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4992   *mpimat = B_mpi;
4993 
4994   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4995   PetscFunctionReturn(0);
4996 }
4997 
4998 /*@C
4999       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5000                  matrices from each processor
5001 
5002     Collective
5003 
5004    Input Parameters:
5005 +    comm - the communicators the parallel matrix will live on
5006 .    seqmat - the input sequential matrices
5007 .    m - number of local rows (or PETSC_DECIDE)
5008 .    n - number of local columns (or PETSC_DECIDE)
5009 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5010 
5011    Output Parameter:
5012 .    mpimat - the parallel matrix generated
5013 
5014     Level: advanced
5015 
5016    Notes:
5017      The dimensions of the sequential matrix in each processor MUST be the same.
5018      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5019      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5020 @*/
5021 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5022 {
5023   PetscErrorCode ierr;
5024   PetscMPIInt    size;
5025 
5026   PetscFunctionBegin;
5027   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5028   if (size == 1) {
5029     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5030     if (scall == MAT_INITIAL_MATRIX) {
5031       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5032     } else {
5033       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5034     }
5035     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5036     PetscFunctionReturn(0);
5037   }
5038   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5039   if (scall == MAT_INITIAL_MATRIX) {
5040     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5041   }
5042   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5043   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5044   PetscFunctionReturn(0);
5045 }
5046 
5047 /*@
5048      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5049           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5050           with MatGetSize()
5051 
5052     Not Collective
5053 
5054    Input Parameters:
5055 +    A - the matrix
5056 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5057 
5058    Output Parameter:
5059 .    A_loc - the local sequential matrix generated
5060 
5061     Level: developer
5062 
5063    Notes:
5064      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5065      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5066      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5067      modify the values of the returned A_loc.
5068 
5069 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
5070 @*/
5071 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5072 {
5073   PetscErrorCode    ierr;
5074   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5075   Mat_SeqAIJ        *mat,*a,*b;
5076   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5077   const PetscScalar *aa,*ba,*aav,*bav;
5078   PetscScalar       *ca,*cam;
5079   PetscMPIInt       size;
5080   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5081   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5082   PetscBool         match;
5083 
5084   PetscFunctionBegin;
5085   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5086   PetscCheckFalse(!match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5087   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5088   if (size == 1) {
5089     if (scall == MAT_INITIAL_MATRIX) {
5090       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5091       *A_loc = mpimat->A;
5092     } else if (scall == MAT_REUSE_MATRIX) {
5093       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5094     }
5095     PetscFunctionReturn(0);
5096   }
5097 
5098   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5099   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5100   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5101   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5102   ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5103   ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5104   aa   = aav;
5105   ba   = bav;
5106   if (scall == MAT_INITIAL_MATRIX) {
5107     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5108     ci[0] = 0;
5109     for (i=0; i<am; i++) {
5110       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5111     }
5112     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5113     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5114     k    = 0;
5115     for (i=0; i<am; i++) {
5116       ncols_o = bi[i+1] - bi[i];
5117       ncols_d = ai[i+1] - ai[i];
5118       /* off-diagonal portion of A */
5119       for (jo=0; jo<ncols_o; jo++) {
5120         col = cmap[*bj];
5121         if (col >= cstart) break;
5122         cj[k]   = col; bj++;
5123         ca[k++] = *ba++;
5124       }
5125       /* diagonal portion of A */
5126       for (j=0; j<ncols_d; j++) {
5127         cj[k]   = cstart + *aj++;
5128         ca[k++] = *aa++;
5129       }
5130       /* off-diagonal portion of A */
5131       for (j=jo; j<ncols_o; j++) {
5132         cj[k]   = cmap[*bj++];
5133         ca[k++] = *ba++;
5134       }
5135     }
5136     /* put together the new matrix */
5137     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5138     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5139     /* Since these are PETSc arrays, change flags to free them as necessary. */
5140     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5141     mat->free_a  = PETSC_TRUE;
5142     mat->free_ij = PETSC_TRUE;
5143     mat->nonew   = 0;
5144   } else if (scall == MAT_REUSE_MATRIX) {
5145     mat  =(Mat_SeqAIJ*)(*A_loc)->data;
5146     ci   = mat->i;
5147     cj   = mat->j;
5148     ierr = MatSeqAIJGetArrayWrite(*A_loc,&cam);CHKERRQ(ierr);
5149     for (i=0; i<am; i++) {
5150       /* off-diagonal portion of A */
5151       ncols_o = bi[i+1] - bi[i];
5152       for (jo=0; jo<ncols_o; jo++) {
5153         col = cmap[*bj];
5154         if (col >= cstart) break;
5155         *cam++ = *ba++; bj++;
5156       }
5157       /* diagonal portion of A */
5158       ncols_d = ai[i+1] - ai[i];
5159       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5160       /* off-diagonal portion of A */
5161       for (j=jo; j<ncols_o; j++) {
5162         *cam++ = *ba++; bj++;
5163       }
5164     }
5165     ierr = MatSeqAIJRestoreArrayWrite(*A_loc,&cam);CHKERRQ(ierr);
5166   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5167   ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5168   ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5169   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5170   PetscFunctionReturn(0);
5171 }
5172 
5173 /*@
5174      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5175           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5176 
5177     Not Collective
5178 
5179    Input Parameters:
5180 +    A - the matrix
5181 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5182 
5183    Output Parameters:
5184 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5185 -    A_loc - the local sequential matrix generated
5186 
5187     Level: developer
5188 
5189    Notes:
5190      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5191 
5192 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()
5193 
5194 @*/
5195 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5196 {
5197   PetscErrorCode ierr;
5198   Mat            Ao,Ad;
5199   const PetscInt *cmap;
5200   PetscMPIInt    size;
5201   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5202 
5203   PetscFunctionBegin;
5204   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr);
5205   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5206   if (size == 1) {
5207     if (scall == MAT_INITIAL_MATRIX) {
5208       ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr);
5209       *A_loc = Ad;
5210     } else if (scall == MAT_REUSE_MATRIX) {
5211       ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5212     }
5213     if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); }
5214     PetscFunctionReturn(0);
5215   }
5216   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr);
5217   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5218   if (f) {
5219     ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr);
5220   } else {
5221     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5222     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5223     Mat_SeqAIJ        *c;
5224     PetscInt          *ai = a->i, *aj = a->j;
5225     PetscInt          *bi = b->i, *bj = b->j;
5226     PetscInt          *ci,*cj;
5227     const PetscScalar *aa,*ba;
5228     PetscScalar       *ca;
5229     PetscInt          i,j,am,dn,on;
5230 
5231     ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr);
5232     ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr);
5233     ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr);
5234     ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr);
5235     if (scall == MAT_INITIAL_MATRIX) {
5236       PetscInt k;
5237       ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5238       ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr);
5239       ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr);
5240       ci[0] = 0;
5241       for (i=0,k=0; i<am; i++) {
5242         const PetscInt ncols_o = bi[i+1] - bi[i];
5243         const PetscInt ncols_d = ai[i+1] - ai[i];
5244         ci[i+1] = ci[i] + ncols_o + ncols_d;
5245         /* diagonal portion of A */
5246         for (j=0; j<ncols_d; j++,k++) {
5247           cj[k] = *aj++;
5248           ca[k] = *aa++;
5249         }
5250         /* off-diagonal portion of A */
5251         for (j=0; j<ncols_o; j++,k++) {
5252           cj[k] = dn + *bj++;
5253           ca[k] = *ba++;
5254         }
5255       }
5256       /* put together the new matrix */
5257       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr);
5258       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5259       /* Since these are PETSc arrays, change flags to free them as necessary. */
5260       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5261       c->free_a  = PETSC_TRUE;
5262       c->free_ij = PETSC_TRUE;
5263       c->nonew   = 0;
5264       ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr);
5265     } else if (scall == MAT_REUSE_MATRIX) {
5266       ierr = MatSeqAIJGetArrayWrite(*A_loc,&ca);CHKERRQ(ierr);
5267       for (i=0; i<am; i++) {
5268         const PetscInt ncols_d = ai[i+1] - ai[i];
5269         const PetscInt ncols_o = bi[i+1] - bi[i];
5270         /* diagonal portion of A */
5271         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5272         /* off-diagonal portion of A */
5273         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5274       }
5275       ierr = MatSeqAIJRestoreArrayWrite(*A_loc,&ca);CHKERRQ(ierr);
5276     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5277     ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr);
5278     ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr);
5279     if (glob) {
5280       PetscInt cst, *gidx;
5281 
5282       ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr);
5283       ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr);
5284       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5285       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5286       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr);
5287     }
5288   }
5289   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5290   PetscFunctionReturn(0);
5291 }
5292 
5293 /*@C
5294      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5295 
5296     Not Collective
5297 
5298    Input Parameters:
5299 +    A - the matrix
5300 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5301 -    row, col - index sets of rows and columns to extract (or NULL)
5302 
5303    Output Parameter:
5304 .    A_loc - the local sequential matrix generated
5305 
5306     Level: developer
5307 
5308 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5309 
5310 @*/
5311 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5312 {
5313   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5314   PetscErrorCode ierr;
5315   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5316   IS             isrowa,iscola;
5317   Mat            *aloc;
5318   PetscBool      match;
5319 
5320   PetscFunctionBegin;
5321   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5322   PetscCheckFalse(!match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5323   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5324   if (!row) {
5325     start = A->rmap->rstart; end = A->rmap->rend;
5326     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5327   } else {
5328     isrowa = *row;
5329   }
5330   if (!col) {
5331     start = A->cmap->rstart;
5332     cmap  = a->garray;
5333     nzA   = a->A->cmap->n;
5334     nzB   = a->B->cmap->n;
5335     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5336     ncols = 0;
5337     for (i=0; i<nzB; i++) {
5338       if (cmap[i] < start) idx[ncols++] = cmap[i];
5339       else break;
5340     }
5341     imark = i;
5342     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5343     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5344     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5345   } else {
5346     iscola = *col;
5347   }
5348   if (scall != MAT_INITIAL_MATRIX) {
5349     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5350     aloc[0] = *A_loc;
5351   }
5352   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5353   if (!col) { /* attach global id of condensed columns */
5354     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5355   }
5356   *A_loc = aloc[0];
5357   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5358   if (!row) {
5359     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5360   }
5361   if (!col) {
5362     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5363   }
5364   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5365   PetscFunctionReturn(0);
5366 }
5367 
5368 /*
5369  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5370  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5371  * on a global size.
5372  * */
5373 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5374 {
5375   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5376   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5377   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5378   PetscMPIInt              owner;
5379   PetscSFNode              *iremote,*oiremote;
5380   const PetscInt           *lrowindices;
5381   PetscErrorCode           ierr;
5382   PetscSF                  sf,osf;
5383   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5384   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5385   MPI_Comm                 comm;
5386   ISLocalToGlobalMapping   mapping;
5387   const PetscScalar        *pd_a,*po_a;
5388 
5389   PetscFunctionBegin;
5390   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5391   /* plocalsize is the number of roots
5392    * nrows is the number of leaves
5393    * */
5394   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5395   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5396   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5397   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5398   for (i=0;i<nrows;i++) {
5399     /* Find a remote index and an owner for a row
5400      * The row could be local or remote
5401      * */
5402     owner = 0;
5403     lidx  = 0;
5404     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5405     iremote[i].index = lidx;
5406     iremote[i].rank  = owner;
5407   }
5408   /* Create SF to communicate how many nonzero columns for each row */
5409   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5410   /* SF will figure out the number of nonzero colunms for each row, and their
5411    * offsets
5412    * */
5413   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5414   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5415   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5416 
5417   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5418   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5419   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5420   roffsets[0] = 0;
5421   roffsets[1] = 0;
5422   for (i=0;i<plocalsize;i++) {
5423     /* diag */
5424     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5425     /* off diag */
5426     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5427     /* compute offsets so that we relative location for each row */
5428     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5429     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5430   }
5431   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5432   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5433   /* 'r' means root, and 'l' means leaf */
5434   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5435   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5436   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5437   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5438   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5439   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5440   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5441   dntotalcols = 0;
5442   ontotalcols = 0;
5443   ncol = 0;
5444   for (i=0;i<nrows;i++) {
5445     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5446     ncol = PetscMax(pnnz[i],ncol);
5447     /* diag */
5448     dntotalcols += nlcols[i*2+0];
5449     /* off diag */
5450     ontotalcols += nlcols[i*2+1];
5451   }
5452   /* We do not need to figure the right number of columns
5453    * since all the calculations will be done by going through the raw data
5454    * */
5455   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5456   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5457   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5458   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5459   /* diag */
5460   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5461   /* off diag */
5462   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5463   /* diag */
5464   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5465   /* off diag */
5466   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5467   dntotalcols = 0;
5468   ontotalcols = 0;
5469   ntotalcols  = 0;
5470   for (i=0;i<nrows;i++) {
5471     owner = 0;
5472     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5473     /* Set iremote for diag matrix */
5474     for (j=0;j<nlcols[i*2+0];j++) {
5475       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5476       iremote[dntotalcols].rank    = owner;
5477       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5478       ilocal[dntotalcols++]        = ntotalcols++;
5479     }
5480     /* off diag */
5481     for (j=0;j<nlcols[i*2+1];j++) {
5482       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5483       oiremote[ontotalcols].rank    = owner;
5484       oilocal[ontotalcols++]        = ntotalcols++;
5485     }
5486   }
5487   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5488   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5489   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5490   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5491   /* P serves as roots and P_oth is leaves
5492    * Diag matrix
5493    * */
5494   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5495   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5496   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5497 
5498   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5499   /* Off diag */
5500   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5501   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5502   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5503   ierr = MatSeqAIJGetArrayRead(p->A,&pd_a);CHKERRQ(ierr);
5504   ierr = MatSeqAIJGetArrayRead(p->B,&po_a);CHKERRQ(ierr);
5505   /* We operate on the matrix internal data for saving memory */
5506   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5507   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5508   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5509   /* Convert to global indices for diag matrix */
5510   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5511   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5512   /* We want P_oth store global indices */
5513   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5514   /* Use memory scalable approach */
5515   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5516   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5517   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5518   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5519   /* Convert back to local indices */
5520   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5521   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5522   nout = 0;
5523   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5524   PetscCheckFalse(nout != po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout);
5525   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5526   /* Exchange values */
5527   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5528   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5529   ierr = MatSeqAIJRestoreArrayRead(p->A,&pd_a);CHKERRQ(ierr);
5530   ierr = MatSeqAIJRestoreArrayRead(p->B,&po_a);CHKERRQ(ierr);
5531   /* Stop PETSc from shrinking memory */
5532   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5533   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5534   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5535   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5536   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5537   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5538   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5539   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5540   PetscFunctionReturn(0);
5541 }
5542 
5543 /*
5544  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5545  * This supports MPIAIJ and MAIJ
5546  * */
5547 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5548 {
5549   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5550   Mat_SeqAIJ            *p_oth;
5551   IS                    rows,map;
5552   PetscHMapI            hamp;
5553   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5554   MPI_Comm              comm;
5555   PetscSF               sf,osf;
5556   PetscBool             has;
5557   PetscErrorCode        ierr;
5558 
5559   PetscFunctionBegin;
5560   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5561   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5562   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5563    *  and then create a submatrix (that often is an overlapping matrix)
5564    * */
5565   if (reuse == MAT_INITIAL_MATRIX) {
5566     /* Use a hash table to figure out unique keys */
5567     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5568     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5569     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5570     count = 0;
5571     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5572     for (i=0;i<a->B->cmap->n;i++) {
5573       key  = a->garray[i]/dof;
5574       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5575       if (!has) {
5576         mapping[i] = count;
5577         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5578       } else {
5579         /* Current 'i' has the same value the previous step */
5580         mapping[i] = count-1;
5581       }
5582     }
5583     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5584     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5585     PetscCheckFalse(htsize!=count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count);
5586     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5587     off = 0;
5588     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5589     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5590     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5591     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5592     /* In case, the matrix was already created but users want to recreate the matrix */
5593     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5594     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5595     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5596     ierr = ISDestroy(&map);CHKERRQ(ierr);
5597     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5598   } else if (reuse == MAT_REUSE_MATRIX) {
5599     /* If matrix was already created, we simply update values using SF objects
5600      * that as attached to the matrix ealier.
5601      */
5602     const PetscScalar *pd_a,*po_a;
5603 
5604     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5605     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5606     PetscCheckFalse(!sf || !osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5607     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5608     /* Update values in place */
5609     ierr = MatSeqAIJGetArrayRead(p->A,&pd_a);CHKERRQ(ierr);
5610     ierr = MatSeqAIJGetArrayRead(p->B,&po_a);CHKERRQ(ierr);
5611     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5612     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5613     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5614     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5615     ierr = MatSeqAIJRestoreArrayRead(p->A,&pd_a);CHKERRQ(ierr);
5616     ierr = MatSeqAIJRestoreArrayRead(p->B,&po_a);CHKERRQ(ierr);
5617   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5618   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5619   PetscFunctionReturn(0);
5620 }
5621 
5622 /*@C
5623   MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5624 
5625   Collective on Mat
5626 
5627   Input Parameters:
5628 + A - the first matrix in mpiaij format
5629 . B - the second matrix in mpiaij format
5630 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5631 
5632   Output Parameters:
5633 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5634 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5635 - B_seq - the sequential matrix generated
5636 
5637   Level: developer
5638 
5639 @*/
5640 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5641 {
5642   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5643   PetscErrorCode ierr;
5644   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5645   IS             isrowb,iscolb;
5646   Mat            *bseq=NULL;
5647 
5648   PetscFunctionBegin;
5649   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5650     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5651   }
5652   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5653 
5654   if (scall == MAT_INITIAL_MATRIX) {
5655     start = A->cmap->rstart;
5656     cmap  = a->garray;
5657     nzA   = a->A->cmap->n;
5658     nzB   = a->B->cmap->n;
5659     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5660     ncols = 0;
5661     for (i=0; i<nzB; i++) {  /* row < local row index */
5662       if (cmap[i] < start) idx[ncols++] = cmap[i];
5663       else break;
5664     }
5665     imark = i;
5666     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5667     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5668     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5669     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5670   } else {
5671     PetscCheckFalse(!rowb || !colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5672     isrowb  = *rowb; iscolb = *colb;
5673     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5674     bseq[0] = *B_seq;
5675   }
5676   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5677   *B_seq = bseq[0];
5678   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5679   if (!rowb) {
5680     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5681   } else {
5682     *rowb = isrowb;
5683   }
5684   if (!colb) {
5685     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5686   } else {
5687     *colb = iscolb;
5688   }
5689   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5690   PetscFunctionReturn(0);
5691 }
5692 
5693 /*
5694     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5695     of the OFF-DIAGONAL portion of local A
5696 
5697     Collective on Mat
5698 
5699    Input Parameters:
5700 +    A,B - the matrices in mpiaij format
5701 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5702 
5703    Output Parameter:
5704 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5705 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5706 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5707 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5708 
5709     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5710      for this matrix. This is not desirable..
5711 
5712     Level: developer
5713 
5714 */
5715 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5716 {
5717   PetscErrorCode         ierr;
5718   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5719   Mat_SeqAIJ             *b_oth;
5720   VecScatter             ctx;
5721   MPI_Comm               comm;
5722   const PetscMPIInt      *rprocs,*sprocs;
5723   const PetscInt         *srow,*rstarts,*sstarts;
5724   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5725   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5726   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5727   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5728   PetscMPIInt            size,tag,rank,nreqs;
5729 
5730   PetscFunctionBegin;
5731   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5732   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5733 
5734   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5735     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5736   }
5737   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5738   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
5739 
5740   if (size == 1) {
5741     startsj_s = NULL;
5742     bufa_ptr  = NULL;
5743     *B_oth    = NULL;
5744     PetscFunctionReturn(0);
5745   }
5746 
5747   ctx = a->Mvctx;
5748   tag = ((PetscObject)ctx)->tag;
5749 
5750   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5751   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5752   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5753   ierr = PetscMPIIntCast(nsends+nrecvs,&nreqs);CHKERRQ(ierr);
5754   ierr = PetscMalloc1(nreqs,&reqs);CHKERRQ(ierr);
5755   rwaits = reqs;
5756   swaits = reqs + nrecvs;
5757 
5758   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5759   if (scall == MAT_INITIAL_MATRIX) {
5760     /* i-array */
5761     /*---------*/
5762     /*  post receives */
5763     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5764     for (i=0; i<nrecvs; i++) {
5765       rowlen = rvalues + rstarts[i]*rbs;
5766       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5767       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5768     }
5769 
5770     /* pack the outgoing message */
5771     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5772 
5773     sstartsj[0] = 0;
5774     rstartsj[0] = 0;
5775     len         = 0; /* total length of j or a array to be sent */
5776     if (nsends) {
5777       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5778       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5779     }
5780     for (i=0; i<nsends; i++) {
5781       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5782       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5783       for (j=0; j<nrows; j++) {
5784         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5785         for (l=0; l<sbs; l++) {
5786           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5787 
5788           rowlen[j*sbs+l] = ncols;
5789 
5790           len += ncols;
5791           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5792         }
5793         k++;
5794       }
5795       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5796 
5797       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5798     }
5799     /* recvs and sends of i-array are completed */
5800     if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5801     ierr = PetscFree(svalues);CHKERRQ(ierr);
5802 
5803     /* allocate buffers for sending j and a arrays */
5804     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5805     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5806 
5807     /* create i-array of B_oth */
5808     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5809 
5810     b_othi[0] = 0;
5811     len       = 0; /* total length of j or a array to be received */
5812     k         = 0;
5813     for (i=0; i<nrecvs; i++) {
5814       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5815       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5816       for (j=0; j<nrows; j++) {
5817         b_othi[k+1] = b_othi[k] + rowlen[j];
5818         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5819         k++;
5820       }
5821       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5822     }
5823     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5824 
5825     /* allocate space for j and a arrrays of B_oth */
5826     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5827     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5828 
5829     /* j-array */
5830     /*---------*/
5831     /*  post receives of j-array */
5832     for (i=0; i<nrecvs; i++) {
5833       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5834       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5835     }
5836 
5837     /* pack the outgoing message j-array */
5838     if (nsends) k = sstarts[0];
5839     for (i=0; i<nsends; i++) {
5840       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5841       bufJ  = bufj+sstartsj[i];
5842       for (j=0; j<nrows; j++) {
5843         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5844         for (ll=0; ll<sbs; ll++) {
5845           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5846           for (l=0; l<ncols; l++) {
5847             *bufJ++ = cols[l];
5848           }
5849           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5850         }
5851       }
5852       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5853     }
5854 
5855     /* recvs and sends of j-array are completed */
5856     if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5857   } else if (scall == MAT_REUSE_MATRIX) {
5858     sstartsj = *startsj_s;
5859     rstartsj = *startsj_r;
5860     bufa     = *bufa_ptr;
5861     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5862     ierr     = MatSeqAIJGetArrayWrite(*B_oth,&b_otha);CHKERRQ(ierr);
5863   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5864 
5865   /* a-array */
5866   /*---------*/
5867   /*  post receives of a-array */
5868   for (i=0; i<nrecvs; i++) {
5869     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5870     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5871   }
5872 
5873   /* pack the outgoing message a-array */
5874   if (nsends) k = sstarts[0];
5875   for (i=0; i<nsends; i++) {
5876     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5877     bufA  = bufa+sstartsj[i];
5878     for (j=0; j<nrows; j++) {
5879       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5880       for (ll=0; ll<sbs; ll++) {
5881         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5882         for (l=0; l<ncols; l++) {
5883           *bufA++ = vals[l];
5884         }
5885         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5886       }
5887     }
5888     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5889   }
5890   /* recvs and sends of a-array are completed */
5891   if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5892   ierr = PetscFree(reqs);CHKERRQ(ierr);
5893 
5894   if (scall == MAT_INITIAL_MATRIX) {
5895     /* put together the new matrix */
5896     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5897 
5898     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5899     /* Since these are PETSc arrays, change flags to free them as necessary. */
5900     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5901     b_oth->free_a  = PETSC_TRUE;
5902     b_oth->free_ij = PETSC_TRUE;
5903     b_oth->nonew   = 0;
5904 
5905     ierr = PetscFree(bufj);CHKERRQ(ierr);
5906     if (!startsj_s || !bufa_ptr) {
5907       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5908       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5909     } else {
5910       *startsj_s = sstartsj;
5911       *startsj_r = rstartsj;
5912       *bufa_ptr  = bufa;
5913     }
5914   } else if (scall == MAT_REUSE_MATRIX) {
5915     ierr = MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha);CHKERRQ(ierr);
5916   }
5917 
5918   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5919   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5920   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5921   PetscFunctionReturn(0);
5922 }
5923 
5924 /*@C
5925   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5926 
5927   Not Collective
5928 
5929   Input Parameter:
5930 . A - The matrix in mpiaij format
5931 
5932   Output Parameters:
5933 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5934 . colmap - A map from global column index to local index into lvec
5935 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5936 
5937   Level: developer
5938 
5939 @*/
5940 #if defined(PETSC_USE_CTABLE)
5941 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5942 #else
5943 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5944 #endif
5945 {
5946   Mat_MPIAIJ *a;
5947 
5948   PetscFunctionBegin;
5949   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5950   PetscValidPointer(lvec, 2);
5951   PetscValidPointer(colmap, 3);
5952   PetscValidPointer(multScatter, 4);
5953   a = (Mat_MPIAIJ*) A->data;
5954   if (lvec) *lvec = a->lvec;
5955   if (colmap) *colmap = a->colmap;
5956   if (multScatter) *multScatter = a->Mvctx;
5957   PetscFunctionReturn(0);
5958 }
5959 
5960 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5961 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5962 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5963 #if defined(PETSC_HAVE_MKL_SPARSE)
5964 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5965 #endif
5966 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5967 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5968 #if defined(PETSC_HAVE_ELEMENTAL)
5969 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5970 #endif
5971 #if defined(PETSC_HAVE_SCALAPACK)
5972 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5973 #endif
5974 #if defined(PETSC_HAVE_HYPRE)
5975 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5976 #endif
5977 #if defined(PETSC_HAVE_CUDA)
5978 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5979 #endif
5980 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5981 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5982 #endif
5983 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5984 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5985 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5986 
5987 /*
5988     Computes (B'*A')' since computing B*A directly is untenable
5989 
5990                n                       p                          p
5991         [             ]       [             ]         [                 ]
5992       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5993         [             ]       [             ]         [                 ]
5994 
5995 */
5996 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5997 {
5998   PetscErrorCode ierr;
5999   Mat            At,Bt,Ct;
6000 
6001   PetscFunctionBegin;
6002   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
6003   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
6004   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
6005   ierr = MatDestroy(&At);CHKERRQ(ierr);
6006   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
6007   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
6008   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
6009   PetscFunctionReturn(0);
6010 }
6011 
6012 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
6013 {
6014   PetscErrorCode ierr;
6015   PetscBool      cisdense;
6016 
6017   PetscFunctionBegin;
6018   PetscCheckFalse(A->cmap->n != B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n);
6019   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
6020   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
6021   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
6022   if (!cisdense) {
6023     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6024   }
6025   ierr = MatSetUp(C);CHKERRQ(ierr);
6026 
6027   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6028   PetscFunctionReturn(0);
6029 }
6030 
6031 /* ----------------------------------------------------------------*/
6032 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6033 {
6034   Mat_Product *product = C->product;
6035   Mat         A = product->A,B=product->B;
6036 
6037   PetscFunctionBegin;
6038   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6039     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6040 
6041   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6042   C->ops->productsymbolic = MatProductSymbolic_AB;
6043   PetscFunctionReturn(0);
6044 }
6045 
6046 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6047 {
6048   PetscErrorCode ierr;
6049   Mat_Product    *product = C->product;
6050 
6051   PetscFunctionBegin;
6052   if (product->type == MATPRODUCT_AB) {
6053     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
6054   }
6055   PetscFunctionReturn(0);
6056 }
6057 
6058 /* std::upper_bound(): Given a sorted array, return index of the first element in range [first,last) whose value
6059    is greater than value, or last if there is no such element.
6060 */
6061 static inline PetscErrorCode PetscSortedIntUpperBound(PetscInt *array,PetscCount first,PetscCount last,PetscInt value,PetscCount *upper)
6062 {
6063   PetscCount  it,step,count = last - first;
6064 
6065   PetscFunctionBegin;
6066   while (count > 0) {
6067     it   = first;
6068     step = count / 2;
6069     it  += step;
6070     if (!(value < array[it])) {
6071       first  = ++it;
6072       count -= step + 1;
6073     } else count = step;
6074   }
6075   *upper = first;
6076   PetscFunctionReturn(0);
6077 }
6078 
6079 /* Merge two sets of sorted nonzero entries and return a CSR for the merged (sequential) matrix
6080 
6081   Input Parameters:
6082 
6083     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
6084     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
6085 
6086     mat: both sets' entries are on m rows, where m is the number of local rows of the matrix mat
6087 
6088     For Set1, j1[] contains column indices of the nonzeros.
6089     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6090     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6091     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6092 
6093     Similar for Set2.
6094 
6095     This routine merges the two sets of nonzeros row by row and removes repeats.
6096 
6097   Output Parameters: (memories are allocated by the caller)
6098 
6099     i[],j[]: the CSR of the merged matrix, which has m rows.
6100     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6101     imap2[]: similar to imap1[], but for Set2.
6102     Note we order nonzeros row-by-row and from left to right.
6103 */
6104 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[],
6105   const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[],
6106   PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[])
6107 {
6108   PetscErrorCode ierr;
6109   PetscInt       r,m; /* Row index of mat */
6110   PetscCount     t,t1,t2,b1,e1,b2,e2;
6111 
6112   PetscFunctionBegin;
6113   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
6114   t1   = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6115   i[0] = 0;
6116   for (r=0; r<m; r++) { /* Do row by row merging */
6117     b1   = rowBegin1[r];
6118     e1   = rowEnd1[r];
6119     b2   = rowBegin2[r];
6120     e2   = rowEnd2[r];
6121     while (b1 < e1 && b2 < e2) {
6122       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6123         j[t]      = j1[b1];
6124         imap1[t1] = t;
6125         imap2[t2] = t;
6126         b1       += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */
6127         b2       += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6128         t1++; t2++; t++;
6129       } else if (j1[b1] < j2[b2]) {
6130         j[t]      = j1[b1];
6131         imap1[t1] = t;
6132         b1       += jmap1[t1+1] - jmap1[t1];
6133         t1++; t++;
6134       } else {
6135         j[t]      = j2[b2];
6136         imap2[t2] = t;
6137         b2       += jmap2[t2+1] - jmap2[t2];
6138         t2++; t++;
6139       }
6140     }
6141     /* Merge the remaining in either j1[] or j2[] */
6142     while (b1 < e1) {
6143       j[t]      = j1[b1];
6144       imap1[t1] = t;
6145       b1       += jmap1[t1+1] - jmap1[t1];
6146       t1++; t++;
6147     }
6148     while (b2 < e2) {
6149       j[t]      = j2[b2];
6150       imap2[t2] = t;
6151       b2       += jmap2[t2+1] - jmap2[t2];
6152       t2++; t++;
6153     }
6154     i[r+1] = t;
6155   }
6156   PetscFunctionReturn(0);
6157 }
6158 
6159 /* Split a set/group of local entries into two subsets: those in the diagonal block and those in the off-diagonal block
6160 
6161   Input Parameters:
6162     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6163     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6164       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6165 
6166       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6167       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6168 
6169   Output Parameters:
6170     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6171     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6172       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6173       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6174 
6175     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6176       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6177         repeats (i.e., same 'i,j' pair).
6178       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6179         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6180 
6181       Atot: number of entries belonging to the diagonal block
6182       Annz: number of unique nonzeros belonging to the diagonal block.
6183 
6184     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6185 
6186     Aperm[],Bperm[],Ajmap[],Bjmap[] are allocated by this routine with PetscMalloc4(). One has to free them with PetscFree4() in the exact order.
6187 */
6188 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[],
6189   PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[],
6190   PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_,
6191   PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_)
6192 {
6193   PetscErrorCode    ierr;
6194   PetscInt          cstart,cend,rstart,rend,row,col;
6195   PetscCount        Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6196   PetscCount        Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6197   PetscCount        k,m,p,q,r,s,mid;
6198   PetscCount        *Aperm,*Bperm,*Ajmap,*Bjmap;
6199 
6200   PetscFunctionBegin;
6201   ierr = PetscLayoutGetRange(mat->rmap,&rstart,&rend);CHKERRQ(ierr);
6202   ierr = PetscLayoutGetRange(mat->cmap,&cstart,&cend);CHKERRQ(ierr);
6203   m    = rend - rstart;
6204 
6205   for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */
6206 
6207   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6208      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6209   */
6210   while (k<n) {
6211     row = i[k];
6212     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6213     for (s=k; s<n; s++) if (i[s] != row) break;
6214     for (p=k; p<s; p++) {
6215       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6216       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]);
6217     }
6218     ierr = PetscSortIntWithCountArray(s-k,j+k,perm+k);CHKERRQ(ierr);
6219     ierr = PetscSortedIntUpperBound(j,k,s,-1,&mid);CHKERRQ(ierr); /* Seperate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6220     rowBegin[row-rstart] = k;
6221     rowMid[row-rstart]   = mid;
6222     rowEnd[row-rstart]   = s;
6223 
6224     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6225     Atot += mid - k;
6226     Btot += s - mid;
6227 
6228     /* Count unique nonzeros of this diag/offdiag row */
6229     for (p=k; p<mid;) {
6230       col = j[p];
6231       do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */
6232       Annz++;
6233     }
6234 
6235     for (p=mid; p<s;) {
6236       col = j[p];
6237       do {p++;} while (p<s && j[p] == col);
6238       Bnnz++;
6239     }
6240     k = s;
6241   }
6242 
6243   /* Allocation according to Atot, Btot, Annz, Bnnz */
6244   ierr = PetscMalloc4(Atot,&Aperm,Btot,&Bperm,Annz+1,&Ajmap,Bnnz+1,&Bjmap);CHKERRQ(ierr);
6245 
6246   /* Re-scan indices and copy diag/offdiag permuation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6247   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6248   for (r=0; r<m; r++) {
6249     k     = rowBegin[r];
6250     mid   = rowMid[r];
6251     s     = rowEnd[r];
6252     ierr  = PetscArraycpy(Aperm+Atot,perm+k,  mid-k);CHKERRQ(ierr);
6253     ierr  = PetscArraycpy(Bperm+Btot,perm+mid,s-mid);CHKERRQ(ierr);
6254     Atot += mid - k;
6255     Btot += s - mid;
6256 
6257     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6258     for (p=k; p<mid;) {
6259       col = j[p];
6260       q   = p;
6261       do {p++;} while (p<mid && j[p] == col);
6262       Ajmap[Annz+1] = Ajmap[Annz] + (p - q);
6263       Annz++;
6264     }
6265 
6266     for (p=mid; p<s;) {
6267       col = j[p];
6268       q   = p;
6269       do {p++;} while (p<s && j[p] == col);
6270       Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q);
6271       Bnnz++;
6272     }
6273   }
6274   /* Output */
6275   *Aperm_ = Aperm;
6276   *Annz_  = Annz;
6277   *Atot_  = Atot;
6278   *Ajmap_ = Ajmap;
6279   *Bperm_ = Bperm;
6280   *Bnnz_  = Bnnz;
6281   *Btot_  = Btot;
6282   *Bjmap_ = Bjmap;
6283   PetscFunctionReturn(0);
6284 }
6285 
6286 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[])
6287 {
6288   PetscErrorCode            ierr;
6289   MPI_Comm                  comm;
6290   PetscMPIInt               rank,size;
6291   PetscInt                  m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6292   PetscCount                k,p,q,rem; /* Loop variables over coo arrays */
6293   Mat_MPIAIJ                *mpiaij = (Mat_MPIAIJ*)mat->data;
6294 
6295   PetscFunctionBegin;
6296   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
6297   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
6298   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
6299   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
6300   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
6301   ierr = PetscLayoutGetRange(mat->rmap,&rstart,&rend);CHKERRQ(ierr);
6302   ierr = PetscLayoutGetRange(mat->cmap,&cstart,&cend);CHKERRQ(ierr);
6303   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
6304   ierr = MatGetSize(mat,&M,&N);CHKERRQ(ierr);
6305 
6306   /* ---------------------------------------------------------------------------*/
6307   /* Sort (i,j) by row along with a permuation array, so that the to-be-ignored */
6308   /* entries come first, then local rows, then remote rows.                     */
6309   /* ---------------------------------------------------------------------------*/
6310   PetscCount n1 = coo_n,*perm1;
6311   PetscInt   *i1,*j1; /* Copies of input COOs along with a permutation array */
6312   ierr = PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1);CHKERRQ(ierr);
6313   ierr = PetscArraycpy(i1,coo_i,n1);CHKERRQ(ierr); /* Make a copy since we'll modify it */
6314   ierr = PetscArraycpy(j1,coo_j,n1);CHKERRQ(ierr);
6315   for (k=0; k<n1; k++) perm1[k] = k;
6316 
6317   /* Manipulate indices so that entries with negative row or col indices will have smallest
6318      row indices, local entries will have greater but negative row indices, and remote entries
6319      will have positive row indices.
6320   */
6321   for (k=0; k<n1; k++) {
6322     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */
6323     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6324     else PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6325     else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6326   }
6327 
6328   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6329   ierr = PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1);CHKERRQ(ierr);
6330   for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */
6331   ierr = PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem);CHKERRQ(ierr); /* rem is upper bound of the last local row */
6332   for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/
6333 
6334   /* ---------------------------------------------------------------------------*/
6335   /*           Split local rows into diag/offdiag portions                      */
6336   /* ---------------------------------------------------------------------------*/
6337   PetscCount   *rowBegin1,*rowMid1,*rowEnd1;
6338   PetscCount   *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1;
6339   PetscCount   Annz1,Bnnz1,Atot1,Btot1;
6340 
6341   ierr = PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1);CHKERRQ(ierr);
6342   ierr = PetscMalloc1(n1-rem,&Cperm1);CHKERRQ(ierr);
6343   ierr = MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1);CHKERRQ(ierr);
6344 
6345   /* ---------------------------------------------------------------------------*/
6346   /*           Send remote rows to their owner                                  */
6347   /* ---------------------------------------------------------------------------*/
6348   /* Find which rows should be sent to which remote ranks*/
6349   PetscInt       nsend = 0; /* Number of MPI ranks to send data to */
6350   PetscMPIInt    *sendto; /* [nsend], storing remote ranks */
6351   PetscInt       *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6352   const PetscInt *ranges;
6353   PetscInt       maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6354 
6355   ierr = PetscLayoutGetRanges(mat->rmap,&ranges);CHKERRQ(ierr);
6356   ierr = PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries);CHKERRQ(ierr);
6357   for (k=rem; k<n1;) {
6358     PetscMPIInt  owner;
6359     PetscInt     firstRow,lastRow;
6360     /* Locate a row range */
6361     firstRow = i1[k]; /* first row of this owner */
6362     ierr     = PetscLayoutFindOwner(mat->rmap,firstRow,&owner);CHKERRQ(ierr);
6363     lastRow  = ranges[owner+1]-1; /* last row of this owner */
6364 
6365     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6366     ierr     = PetscSortedIntUpperBound(i1,k,n1,lastRow,&p);CHKERRQ(ierr);
6367 
6368     /* All entries in [k,p) belong to this remote owner */
6369     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6370       PetscMPIInt *sendto2;
6371       PetscInt    *nentries2;
6372       PetscInt    maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size;
6373       ierr = PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2);CHKERRQ(ierr);
6374       ierr = PetscArraycpy(sendto2,sendto,maxNsend);CHKERRQ(ierr);
6375       ierr = PetscArraycpy(nentries2,nentries2,maxNsend+1);CHKERRQ(ierr);
6376       ierr = PetscFree2(sendto,nentries2);CHKERRQ(ierr);
6377       sendto      = sendto2;
6378       nentries    = nentries2;
6379       maxNsend    = maxNsend2;
6380     }
6381     sendto[nsend]   = owner;
6382     nentries[nsend] = p - k;
6383     ierr = PetscCountCast(p-k,&nentries[nsend]);CHKERRQ(ierr);
6384     nsend++;
6385     k = p;
6386   }
6387 
6388   /* Build 1st SF to know offsets on remote to send data */
6389   PetscSF     sf1;
6390   PetscInt    nroots = 1,nroots2 = 0;
6391   PetscInt    nleaves = nsend,nleaves2 = 0;
6392   PetscInt    *offsets;
6393   PetscSFNode *iremote;
6394 
6395   ierr = PetscSFCreate(comm,&sf1);CHKERRQ(ierr);
6396   ierr = PetscMalloc1(nsend,&iremote);CHKERRQ(ierr);
6397   ierr = PetscMalloc1(nsend,&offsets);CHKERRQ(ierr);
6398   for (k=0; k<nsend; k++) {
6399     iremote[k].rank  = sendto[k];
6400     iremote[k].index = 0;
6401     nleaves2        += nentries[k];
6402     PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt");
6403   }
6404   ierr = PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
6405   ierr = PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM);CHKERRQ(ierr);
6406   ierr = PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM);CHKERRQ(ierr); /* Would nroots2 overflow, we check offsets[] below */
6407   ierr = PetscSFDestroy(&sf1);CHKERRQ(ierr);
6408   PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 " PetscInt_FMT " != number of remote entries " PetscCount_FMT "",nleaves2,n1-rem);
6409 
6410   /* Build 2nd SF to send remote COOs to their owner */
6411   PetscSF sf2;
6412   nroots  = nroots2;
6413   nleaves = nleaves2;
6414   ierr    = PetscSFCreate(comm,&sf2);CHKERRQ(ierr);
6415   ierr    = PetscSFSetFromOptions(sf2);CHKERRQ(ierr);
6416   ierr    = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr);
6417   p       = 0;
6418   for (k=0; k<nsend; k++) {
6419     PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt");
6420     for (q=0; q<nentries[k]; q++,p++) {
6421       iremote[p].rank  = sendto[k];
6422       iremote[p].index = offsets[k] + q;
6423     }
6424   }
6425   ierr = PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
6426 
6427   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permuation which will be used to fill leafdata */
6428   ierr = PetscArraycpy(Cperm1,perm1+rem,n1-rem);CHKERRQ(ierr);
6429 
6430   /* Send the remote COOs to their owner */
6431   PetscInt   n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6432   PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6433   ierr = PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2);CHKERRQ(ierr);
6434   ierr = PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE);CHKERRQ(ierr);
6435   ierr = PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE);CHKERRQ(ierr);
6436   ierr = PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE);CHKERRQ(ierr);
6437   ierr = PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE);CHKERRQ(ierr);
6438 
6439   ierr = PetscFree(offsets);CHKERRQ(ierr);
6440   ierr = PetscFree2(sendto,nentries);CHKERRQ(ierr);
6441 
6442   /* ---------------------------------------------------------------*/
6443   /* Sort received COOs by row along with the permutation array     */
6444   /* ---------------------------------------------------------------*/
6445   for (k=0; k<n2; k++) perm2[k] = k;
6446   ierr = PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2);CHKERRQ(ierr);
6447 
6448   /* ---------------------------------------------------------------*/
6449   /* Split received COOs into diag/offdiag portions                 */
6450   /* ---------------------------------------------------------------*/
6451   PetscCount  *rowBegin2,*rowMid2,*rowEnd2;
6452   PetscCount  *Ajmap2,*Aperm2,*Bjmap2,*Bperm2;
6453   PetscCount  Annz2,Bnnz2,Atot2,Btot2;
6454 
6455   ierr = PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2);CHKERRQ(ierr);
6456   ierr = MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2);CHKERRQ(ierr);
6457 
6458   /* --------------------------------------------------------------------------*/
6459   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6460   /* --------------------------------------------------------------------------*/
6461   PetscInt   *Ai,*Bi;
6462   PetscInt   *Aj,*Bj;
6463 
6464   ierr  = PetscMalloc1(m+1,&Ai);CHKERRQ(ierr);
6465   ierr  = PetscMalloc1(m+1,&Bi);CHKERRQ(ierr);
6466   ierr  = PetscMalloc1(Annz1+Annz2,&Aj);CHKERRQ(ierr); /* Since local and remote entries might have dups, we might allocate excess memory */
6467   ierr  = PetscMalloc1(Bnnz1+Bnnz2,&Bj);CHKERRQ(ierr);
6468 
6469   PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2;
6470   ierr = PetscMalloc4(Annz1,&Aimap1,Bnnz1,&Bimap1,Annz2,&Aimap2,Bnnz2,&Bimap2);CHKERRQ(ierr);
6471 
6472   ierr = MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj);CHKERRQ(ierr);
6473   ierr = MatMergeEntries_Internal(mat,j1,j2,rowMid1,  rowEnd1,rowMid2,  rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj);CHKERRQ(ierr);
6474   ierr = PetscFree3(rowBegin1,rowMid1,rowEnd1);CHKERRQ(ierr);
6475   ierr = PetscFree3(rowBegin2,rowMid2,rowEnd2);CHKERRQ(ierr);
6476   ierr = PetscFree3(i1,j1,perm1);CHKERRQ(ierr);
6477   ierr = PetscFree3(i2,j2,perm2);CHKERRQ(ierr);
6478 
6479   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6480   PetscInt Annz = Ai[m];
6481   PetscInt Bnnz = Bi[m];
6482   if (Annz < Annz1 + Annz2) {
6483     PetscInt *Aj_new;
6484     ierr = PetscMalloc1(Annz,&Aj_new);CHKERRQ(ierr);
6485     ierr = PetscArraycpy(Aj_new,Aj,Annz);CHKERRQ(ierr);
6486     ierr = PetscFree(Aj);CHKERRQ(ierr);
6487     Aj   = Aj_new;
6488   }
6489 
6490   if (Bnnz < Bnnz1 + Bnnz2) {
6491     PetscInt *Bj_new;
6492     ierr = PetscMalloc1(Bnnz,&Bj_new);CHKERRQ(ierr);
6493     ierr = PetscArraycpy(Bj_new,Bj,Bnnz);CHKERRQ(ierr);
6494     ierr = PetscFree(Bj);CHKERRQ(ierr);
6495     Bj   = Bj_new;
6496   }
6497 
6498   /* --------------------------------------------------------------------------------*/
6499   /* Create a MPIAIJKOKKOS newmat with CSRs of A and B, then replace mat with newmat */
6500   /* --------------------------------------------------------------------------------*/
6501   Mat           newmat;
6502   PetscScalar   *Aa,*Ba;
6503   Mat_SeqAIJ    *a,*b;
6504 
6505   ierr   = PetscCalloc1(Annz,&Aa);CHKERRQ(ierr); /* Zero matrix on device */
6506   ierr   = PetscCalloc1(Bnnz,&Ba);CHKERRQ(ierr);
6507   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6508   if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;}
6509   ierr   = MatCreateMPIAIJWithSplitArrays(comm,m,n,M,N,Ai,Aj,Aa,Bi,Bj,Ba,&newmat);CHKERRQ(ierr); /* FIXME: Can we do it without creating a new mat? */
6510   ierr   = MatHeaderMerge(mat,&newmat);CHKERRQ(ierr); /* Unlike MatHeaderReplace(), some info, ex. mat->product is kept */
6511   mpiaij = (Mat_MPIAIJ*)mat->data;
6512   a      = (Mat_SeqAIJ*)mpiaij->A->data;
6513   b      = (Mat_SeqAIJ*)mpiaij->B->data;
6514   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6515   a->free_a       = b->free_a       = PETSC_TRUE;
6516   a->free_ij      = b->free_ij      = PETSC_TRUE;
6517 
6518   mpiaij->coo_n   = coo_n;
6519   mpiaij->coo_sf  = sf2;
6520   mpiaij->sendlen = nleaves;
6521   mpiaij->recvlen = nroots;
6522 
6523   mpiaij->Annz1   = Annz1;
6524   mpiaij->Annz2   = Annz2;
6525   mpiaij->Bnnz1   = Bnnz1;
6526   mpiaij->Bnnz2   = Bnnz2;
6527 
6528   mpiaij->Atot1   = Atot1;
6529   mpiaij->Atot2   = Atot2;
6530   mpiaij->Btot1   = Btot1;
6531   mpiaij->Btot2   = Btot2;
6532 
6533   mpiaij->Aimap1  = Aimap1;
6534   mpiaij->Aimap2  = Aimap2;
6535   mpiaij->Bimap1  = Bimap1;
6536   mpiaij->Bimap2  = Bimap2;
6537 
6538   mpiaij->Ajmap1  = Ajmap1;
6539   mpiaij->Ajmap2  = Ajmap2;
6540   mpiaij->Bjmap1  = Bjmap1;
6541   mpiaij->Bjmap2  = Bjmap2;
6542 
6543   mpiaij->Aperm1  = Aperm1;
6544   mpiaij->Aperm2  = Aperm2;
6545   mpiaij->Bperm1  = Bperm1;
6546   mpiaij->Bperm2  = Bperm2;
6547 
6548   mpiaij->Cperm1  = Cperm1;
6549 
6550   /* Allocate in preallocation. If not used, it has zero cost on host */
6551   ierr = PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf);CHKERRQ(ierr);
6552   PetscFunctionReturn(0);
6553 }
6554 
6555 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode)
6556 {
6557   PetscErrorCode       ierr;
6558   Mat_MPIAIJ           *mpiaij = (Mat_MPIAIJ*)mat->data;
6559   Mat                  A = mpiaij->A,B = mpiaij->B;
6560   PetscCount           Annz1 = mpiaij->Annz1,Annz2 = mpiaij->Annz2,Bnnz1 = mpiaij->Bnnz1,Bnnz2 = mpiaij->Bnnz2;
6561   PetscScalar          *Aa,*Ba;
6562   PetscScalar          *sendbuf = mpiaij->sendbuf;
6563   PetscScalar          *recvbuf = mpiaij->recvbuf;
6564   const PetscCount     *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap1 = mpiaij->Aimap1,*Aimap2 = mpiaij->Aimap2;
6565   const PetscCount     *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap1 = mpiaij->Bimap1,*Bimap2 = mpiaij->Bimap2;
6566   const PetscCount     *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2;
6567   const PetscCount     *Cperm1 = mpiaij->Cperm1;
6568 
6569   PetscFunctionBegin;
6570   ierr = MatSeqAIJGetArray(A,&Aa);CHKERRQ(ierr); /* Might read and write matrix values */
6571   ierr = MatSeqAIJGetArray(B,&Ba);CHKERRQ(ierr);
6572   if (imode == INSERT_VALUES) {
6573     ierr = PetscMemzero(Aa,((Mat_SeqAIJ*)A->data)->nz*sizeof(PetscScalar));CHKERRQ(ierr);
6574     ierr = PetscMemzero(Ba,((Mat_SeqAIJ*)B->data)->nz*sizeof(PetscScalar));CHKERRQ(ierr);
6575   }
6576 
6577   /* Pack entries to be sent to remote */
6578   for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6579 
6580   /* Send remote entries to their owner and overlap the communication with local computation */
6581   ierr = PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE);CHKERRQ(ierr);
6582   /* Add local entries to A and B */
6583   for (PetscCount i=0; i<Annz1; i++) {
6584     for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) Aa[Aimap1[i]] += v[Aperm1[k]];
6585   }
6586   for (PetscCount i=0; i<Bnnz1; i++) {
6587     for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) Ba[Bimap1[i]] += v[Bperm1[k]];
6588   }
6589   ierr = PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE);CHKERRQ(ierr);
6590 
6591   /* Add received remote entries to A and B */
6592   for (PetscCount i=0; i<Annz2; i++) {
6593     for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6594   }
6595   for (PetscCount i=0; i<Bnnz2; i++) {
6596     for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6597   }
6598   ierr = MatSeqAIJRestoreArray(A,&Aa);CHKERRQ(ierr);
6599   ierr = MatSeqAIJRestoreArray(B,&Ba);CHKERRQ(ierr);
6600   PetscFunctionReturn(0);
6601 }
6602 
6603 /* ----------------------------------------------------------------*/
6604 
6605 /*MC
6606    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6607 
6608    Options Database Keys:
6609 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6610 
6611    Level: beginner
6612 
6613    Notes:
6614     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6615     in this case the values associated with the rows and columns one passes in are set to zero
6616     in the matrix
6617 
6618     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6619     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6620 
6621 .seealso: MatCreateAIJ()
6622 M*/
6623 
6624 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6625 {
6626   Mat_MPIAIJ     *b;
6627   PetscErrorCode ierr;
6628   PetscMPIInt    size;
6629 
6630   PetscFunctionBegin;
6631   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
6632 
6633   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6634   B->data       = (void*)b;
6635   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6636   B->assembled  = PETSC_FALSE;
6637   B->insertmode = NOT_SET_VALUES;
6638   b->size       = size;
6639 
6640   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
6641 
6642   /* build cache for off array entries formed */
6643   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6644 
6645   b->donotstash  = PETSC_FALSE;
6646   b->colmap      = NULL;
6647   b->garray      = NULL;
6648   b->roworiented = PETSC_TRUE;
6649 
6650   /* stuff used for matrix vector multiply */
6651   b->lvec  = NULL;
6652   b->Mvctx = NULL;
6653 
6654   /* stuff for MatGetRow() */
6655   b->rowindices   = NULL;
6656   b->rowvalues    = NULL;
6657   b->getrowactive = PETSC_FALSE;
6658 
6659   /* flexible pointer used in CUSPARSE classes */
6660   b->spptr = NULL;
6661 
6662   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6663   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6664   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6665   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6666   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6667   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6668   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6670   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6671   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6672 #if defined(PETSC_HAVE_CUDA)
6673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr);
6674 #endif
6675 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6676   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr);
6677 #endif
6678 #if defined(PETSC_HAVE_MKL_SPARSE)
6679   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6680 #endif
6681   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6682   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6683   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6684   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr);
6685 #if defined(PETSC_HAVE_ELEMENTAL)
6686   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6687 #endif
6688 #if defined(PETSC_HAVE_SCALAPACK)
6689   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6690 #endif
6691   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6692   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6693 #if defined(PETSC_HAVE_HYPRE)
6694   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6695   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6696 #endif
6697   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6698   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6699   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ);CHKERRQ(ierr);
6700   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ);CHKERRQ(ierr);
6701   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6702   PetscFunctionReturn(0);
6703 }
6704 
6705 /*@C
6706      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6707          and "off-diagonal" part of the matrix in CSR format.
6708 
6709    Collective
6710 
6711    Input Parameters:
6712 +  comm - MPI communicator
6713 .  m - number of local rows (Cannot be PETSC_DECIDE)
6714 .  n - This value should be the same as the local size used in creating the
6715        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6716        calculated if N is given) For square matrices n is almost always m.
6717 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6718 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6719 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6720 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6721 .   a - matrix values
6722 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6723 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6724 -   oa - matrix values
6725 
6726    Output Parameter:
6727 .   mat - the matrix
6728 
6729    Level: advanced
6730 
6731    Notes:
6732        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6733        must free the arrays once the matrix has been destroyed and not before.
6734 
6735        The i and j indices are 0 based
6736 
6737        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6738 
6739        This sets local rows and cannot be used to set off-processor values.
6740 
6741        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6742        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6743        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6744        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6745        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6746        communication if it is known that only local entries will be set.
6747 
6748 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6749           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6750 @*/
6751 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6752 {
6753   PetscErrorCode ierr;
6754   Mat_MPIAIJ     *maij;
6755 
6756   PetscFunctionBegin;
6757   PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6758   PetscCheckFalse(i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6759   PetscCheckFalse(oi[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6760   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6761   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6762   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6763   maij = (Mat_MPIAIJ*) (*mat)->data;
6764 
6765   (*mat)->preallocated = PETSC_TRUE;
6766 
6767   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6768   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6769 
6770   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6771   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6772 
6773   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6774   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6775   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6776   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6777   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6778   PetscFunctionReturn(0);
6779 }
6780 
6781 typedef struct {
6782   Mat       *mp;    /* intermediate products */
6783   PetscBool *mptmp; /* is the intermediate product temporary ? */
6784   PetscInt  cp;     /* number of intermediate products */
6785 
6786   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6787   PetscInt    *startsj_s,*startsj_r;
6788   PetscScalar *bufa;
6789   Mat         P_oth;
6790 
6791   /* may take advantage of merging product->B */
6792   Mat Bloc; /* B-local by merging diag and off-diag */
6793 
6794   /* cusparse does not have support to split between symbolic and numeric phases.
6795      When api_user is true, we don't need to update the numerical values
6796      of the temporary storage */
6797   PetscBool reusesym;
6798 
6799   /* support for COO values insertion */
6800   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6801   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6802   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6803   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6804   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6805   PetscMemType mtype;
6806 
6807   /* customization */
6808   PetscBool abmerge;
6809   PetscBool P_oth_bind;
6810 } MatMatMPIAIJBACKEND;
6811 
6812 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6813 {
6814   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6815   PetscInt            i;
6816   PetscErrorCode      ierr;
6817 
6818   PetscFunctionBegin;
6819   ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr);
6820   ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr);
6821   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr);
6822   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr);
6823   ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr);
6824   ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr);
6825   ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr);
6826   for (i = 0; i < mmdata->cp; i++) {
6827     ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr);
6828   }
6829   ierr = PetscFree2(mmdata->mp,mmdata->mptmp);CHKERRQ(ierr);
6830   ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr);
6831   ierr = PetscFree(mmdata->own);CHKERRQ(ierr);
6832   ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr);
6833   ierr = PetscFree(mmdata->off);CHKERRQ(ierr);
6834   ierr = PetscFree(mmdata);CHKERRQ(ierr);
6835   PetscFunctionReturn(0);
6836 }
6837 
6838 /* Copy selected n entries with indices in idx[] of A to v[].
6839    If idx is NULL, copy the whole data array of A to v[]
6840  */
6841 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6842 {
6843   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6844   PetscErrorCode ierr;
6845 
6846   PetscFunctionBegin;
6847   ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr);
6848   if (f) {
6849     ierr = (*f)(A,n,idx,v);CHKERRQ(ierr);
6850   } else {
6851     const PetscScalar *vv;
6852 
6853     ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr);
6854     if (n && idx) {
6855       PetscScalar    *w = v;
6856       const PetscInt *oi = idx;
6857       PetscInt       j;
6858 
6859       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6860     } else {
6861       ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr);
6862     }
6863     ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr);
6864   }
6865   PetscFunctionReturn(0);
6866 }
6867 
6868 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6869 {
6870   MatMatMPIAIJBACKEND *mmdata;
6871   PetscInt            i,n_d,n_o;
6872   PetscErrorCode      ierr;
6873 
6874   PetscFunctionBegin;
6875   MatCheckProduct(C,1);
6876   PetscCheckFalse(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6877   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6878   if (!mmdata->reusesym) { /* update temporary matrices */
6879     if (mmdata->P_oth) {
6880       ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6881     }
6882     if (mmdata->Bloc) {
6883       ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr);
6884     }
6885   }
6886   mmdata->reusesym = PETSC_FALSE;
6887 
6888   for (i = 0; i < mmdata->cp; i++) {
6889     PetscCheckFalse(!mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6890     ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr);
6891   }
6892   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6893     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6894 
6895     if (mmdata->mptmp[i]) continue;
6896     if (noff) {
6897       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6898 
6899       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr);
6900       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr);
6901       n_o += noff;
6902       n_d += nown;
6903     } else {
6904       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6905 
6906       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr);
6907       n_d += mm->nz;
6908     }
6909   }
6910   if (mmdata->hasoffproc) { /* offprocess insertion */
6911     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6912     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6913   }
6914   ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr);
6915   PetscFunctionReturn(0);
6916 }
6917 
6918 /* Support for Pt * A, A * P, or Pt * A * P */
6919 #define MAX_NUMBER_INTERMEDIATE 4
6920 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6921 {
6922   Mat_Product            *product = C->product;
6923   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6924   Mat_MPIAIJ             *a,*p;
6925   MatMatMPIAIJBACKEND    *mmdata;
6926   ISLocalToGlobalMapping P_oth_l2g = NULL;
6927   IS                     glob = NULL;
6928   const char             *prefix;
6929   char                   pprefix[256];
6930   const PetscInt         *globidx,*P_oth_idx;
6931   PetscInt               i,j,cp,m,n,M,N,*coo_i,*coo_j;
6932   PetscCount             ncoo,ncoo_d,ncoo_o,ncoo_oown;
6933   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6934                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
6935                                                                                         /* a base offset; type-2: sparse with a local to global map table */
6936   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6937 
6938   MatProductType         ptype;
6939   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6940   PetscMPIInt            size;
6941   PetscErrorCode         ierr;
6942 
6943   PetscFunctionBegin;
6944   MatCheckProduct(C,1);
6945   PetscCheckFalse(product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6946   ptype = product->type;
6947   if (product->A->symmetric && ptype == MATPRODUCT_AtB) {
6948     ptype = MATPRODUCT_AB;
6949     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6950   }
6951   switch (ptype) {
6952   case MATPRODUCT_AB:
6953     A = product->A;
6954     P = product->B;
6955     m = A->rmap->n;
6956     n = P->cmap->n;
6957     M = A->rmap->N;
6958     N = P->cmap->N;
6959     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6960     break;
6961   case MATPRODUCT_AtB:
6962     P = product->A;
6963     A = product->B;
6964     m = P->cmap->n;
6965     n = A->cmap->n;
6966     M = P->cmap->N;
6967     N = A->cmap->N;
6968     hasoffproc = PETSC_TRUE;
6969     break;
6970   case MATPRODUCT_PtAP:
6971     A = product->A;
6972     P = product->B;
6973     m = P->cmap->n;
6974     n = P->cmap->n;
6975     M = P->cmap->N;
6976     N = P->cmap->N;
6977     hasoffproc = PETSC_TRUE;
6978     break;
6979   default:
6980     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6981   }
6982   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr);
6983   if (size == 1) hasoffproc = PETSC_FALSE;
6984 
6985   /* defaults */
6986   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6987     mp[i]    = NULL;
6988     mptmp[i] = PETSC_FALSE;
6989     rmapt[i] = -1;
6990     cmapt[i] = -1;
6991     rmapa[i] = NULL;
6992     cmapa[i] = NULL;
6993   }
6994 
6995   /* customization */
6996   ierr = PetscNew(&mmdata);CHKERRQ(ierr);
6997   mmdata->reusesym = product->api_user;
6998   if (ptype == MATPRODUCT_AB) {
6999     if (product->api_user) {
7000       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
7001       ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
7002       ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
7003       ierr = PetscOptionsEnd();CHKERRQ(ierr);
7004     } else {
7005       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
7006       ierr = PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
7007       ierr = PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
7008       ierr = PetscOptionsEnd();CHKERRQ(ierr);
7009     }
7010   } else if (ptype == MATPRODUCT_PtAP) {
7011     if (product->api_user) {
7012       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
7013       ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
7014       ierr = PetscOptionsEnd();CHKERRQ(ierr);
7015     } else {
7016       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
7017       ierr = PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
7018       ierr = PetscOptionsEnd();CHKERRQ(ierr);
7019     }
7020   }
7021   a = (Mat_MPIAIJ*)A->data;
7022   p = (Mat_MPIAIJ*)P->data;
7023   ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr);
7024   ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr);
7025   ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr);
7026   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
7027   ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr);
7028 
7029   cp   = 0;
7030   switch (ptype) {
7031   case MATPRODUCT_AB: /* A * P */
7032     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
7033 
7034     /* A_diag * P_local (merged or not) */
7035     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7036       /* P is product->B */
7037       ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
7038       ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
7039       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
7040       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7041       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7042       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7043       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7044       mp[cp]->product->api_user = product->api_user;
7045       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7046       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7047       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7048       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
7049       rmapt[cp] = 1;
7050       cmapt[cp] = 2;
7051       cmapa[cp] = globidx;
7052       mptmp[cp] = PETSC_FALSE;
7053       cp++;
7054     } else { /* A_diag * P_diag and A_diag * P_off */
7055       ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr);
7056       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
7057       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7058       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7059       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7060       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7061       mp[cp]->product->api_user = product->api_user;
7062       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7063       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7064       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7065       rmapt[cp] = 1;
7066       cmapt[cp] = 1;
7067       mptmp[cp] = PETSC_FALSE;
7068       cp++;
7069       ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr);
7070       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
7071       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7072       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7073       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7074       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7075       mp[cp]->product->api_user = product->api_user;
7076       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7077       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7078       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7079       rmapt[cp] = 1;
7080       cmapt[cp] = 2;
7081       cmapa[cp] = p->garray;
7082       mptmp[cp] = PETSC_FALSE;
7083       cp++;
7084     }
7085 
7086     /* A_off * P_other */
7087     if (mmdata->P_oth) {
7088       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); /* make P_oth use local col ids */
7089       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
7090       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
7091       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
7092       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
7093       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
7094       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7095       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7096       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7097       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7098       mp[cp]->product->api_user = product->api_user;
7099       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7100       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7101       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7102       rmapt[cp] = 1;
7103       cmapt[cp] = 2;
7104       cmapa[cp] = P_oth_idx;
7105       mptmp[cp] = PETSC_FALSE;
7106       cp++;
7107     }
7108     break;
7109 
7110   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7111     /* A is product->B */
7112     ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
7113     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7114       ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
7115       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
7116       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7117       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7118       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7119       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7120       mp[cp]->product->api_user = product->api_user;
7121       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7122       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7123       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7124       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
7125       rmapt[cp] = 2;
7126       rmapa[cp] = globidx;
7127       cmapt[cp] = 2;
7128       cmapa[cp] = globidx;
7129       mptmp[cp] = PETSC_FALSE;
7130       cp++;
7131     } else {
7132       ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
7133       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
7134       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7135       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7136       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7137       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7138       mp[cp]->product->api_user = product->api_user;
7139       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7140       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7141       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7142       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
7143       rmapt[cp] = 1;
7144       cmapt[cp] = 2;
7145       cmapa[cp] = globidx;
7146       mptmp[cp] = PETSC_FALSE;
7147       cp++;
7148       ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
7149       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
7150       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7151       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7152       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7153       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7154       mp[cp]->product->api_user = product->api_user;
7155       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7156       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7157       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7158       rmapt[cp] = 2;
7159       rmapa[cp] = p->garray;
7160       cmapt[cp] = 2;
7161       cmapa[cp] = globidx;
7162       mptmp[cp] = PETSC_FALSE;
7163       cp++;
7164     }
7165     break;
7166   case MATPRODUCT_PtAP:
7167     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
7168     /* P is product->B */
7169     ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
7170     ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
7171     ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr);
7172     ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7173     ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7174     ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7175     ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7176     mp[cp]->product->api_user = product->api_user;
7177     ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7178     PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7179     ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7180     ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
7181     rmapt[cp] = 2;
7182     rmapa[cp] = globidx;
7183     cmapt[cp] = 2;
7184     cmapa[cp] = globidx;
7185     mptmp[cp] = PETSC_FALSE;
7186     cp++;
7187     if (mmdata->P_oth) {
7188       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr);
7189       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
7190       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
7191       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
7192       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
7193       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
7194       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7195       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7196       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7197       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7198       mp[cp]->product->api_user = product->api_user;
7199       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7200       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7201       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7202       mptmp[cp] = PETSC_TRUE;
7203       cp++;
7204       ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr);
7205       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
7206       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7207       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7208       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7209       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7210       mp[cp]->product->api_user = product->api_user;
7211       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7212       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7213       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7214       rmapt[cp] = 2;
7215       rmapa[cp] = globidx;
7216       cmapt[cp] = 2;
7217       cmapa[cp] = P_oth_idx;
7218       mptmp[cp] = PETSC_FALSE;
7219       cp++;
7220     }
7221     break;
7222   default:
7223     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
7224   }
7225   /* sanity check */
7226   if (size > 1) for (i = 0; i < cp; i++) PetscCheckFalse(rmapt[i] == 2 && !hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i);
7227 
7228   ierr = PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp);CHKERRQ(ierr);
7229   for (i = 0; i < cp; i++) {
7230     mmdata->mp[i]    = mp[i];
7231     mmdata->mptmp[i] = mptmp[i];
7232   }
7233   mmdata->cp = cp;
7234   C->product->data       = mmdata;
7235   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7236   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7237 
7238   /* memory type */
7239   mmdata->mtype = PETSC_MEMTYPE_HOST;
7240   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr);
7241   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr);
7242   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7243 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
7244   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE;
7245 #endif
7246 
7247   /* prepare coo coordinates for values insertion */
7248 
7249   /* count total nonzeros of those intermediate seqaij Mats
7250     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7251     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7252     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7253   */
7254   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7255     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7256     if (mptmp[cp]) continue;
7257     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7258       const PetscInt *rmap = rmapa[cp];
7259       const PetscInt mr = mp[cp]->rmap->n;
7260       const PetscInt rs = C->rmap->rstart;
7261       const PetscInt re = C->rmap->rend;
7262       const PetscInt *ii  = mm->i;
7263       for (i = 0; i < mr; i++) {
7264         const PetscInt gr = rmap[i];
7265         const PetscInt nz = ii[i+1] - ii[i];
7266         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7267         else ncoo_oown += nz; /* this row is local */
7268       }
7269     } else ncoo_d += mm->nz;
7270   }
7271 
7272   /*
7273     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7274 
7275     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7276 
7277     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
7278 
7279     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7280     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7281     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7282 
7283     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7284     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7285   */
7286   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); /* +1 to make a csr-like data structure */
7287   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr);
7288 
7289   /* gather (i,j) of nonzeros inserted by remote procs */
7290   if (hasoffproc) {
7291     PetscSF  msf;
7292     PetscInt ncoo2,*coo_i2,*coo_j2;
7293 
7294     ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr);
7295     ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr);
7296     ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); /* to collect (i,j) of entries to be sent to others */
7297 
7298     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7299       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7300       PetscInt   *idxoff = mmdata->off[cp];
7301       PetscInt   *idxown = mmdata->own[cp];
7302       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7303         const PetscInt *rmap = rmapa[cp];
7304         const PetscInt *cmap = cmapa[cp];
7305         const PetscInt *ii  = mm->i;
7306         PetscInt       *coi = coo_i + ncoo_o;
7307         PetscInt       *coj = coo_j + ncoo_o;
7308         const PetscInt mr = mp[cp]->rmap->n;
7309         const PetscInt rs = C->rmap->rstart;
7310         const PetscInt re = C->rmap->rend;
7311         const PetscInt cs = C->cmap->rstart;
7312         for (i = 0; i < mr; i++) {
7313           const PetscInt *jj = mm->j + ii[i];
7314           const PetscInt gr  = rmap[i];
7315           const PetscInt nz  = ii[i+1] - ii[i];
7316           if (gr < rs || gr >= re) { /* this is an offproc row */
7317             for (j = ii[i]; j < ii[i+1]; j++) {
7318               *coi++ = gr;
7319               *idxoff++ = j;
7320             }
7321             if (!cmapt[cp]) { /* already global */
7322               for (j = 0; j < nz; j++) *coj++ = jj[j];
7323             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7324               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7325             } else { /* offdiag */
7326               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7327             }
7328             ncoo_o += nz;
7329           } else { /* this is a local row */
7330             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
7331           }
7332         }
7333       }
7334       mmdata->off[cp + 1] = idxoff;
7335       mmdata->own[cp + 1] = idxown;
7336     }
7337 
7338     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
7339     ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr);
7340     ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr);
7341     ierr = PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL);CHKERRQ(ierr);
7342     ncoo = ncoo_d + ncoo_oown + ncoo2;
7343     ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr);
7344     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); /* put (i,j) of remote nonzeros at back */
7345     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
7346     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
7347     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
7348     ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
7349     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7350     ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr);
7351     coo_i = coo_i2;
7352     coo_j = coo_j2;
7353   } else { /* no offproc values insertion */
7354     ncoo = ncoo_d;
7355     ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr);
7356 
7357     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
7358     ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr);
7359     ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr);
7360   }
7361   mmdata->hasoffproc = hasoffproc;
7362 
7363    /* gather (i,j) of nonzeros inserted locally */
7364   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7365     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
7366     PetscInt       *coi = coo_i + ncoo_d;
7367     PetscInt       *coj = coo_j + ncoo_d;
7368     const PetscInt *jj  = mm->j;
7369     const PetscInt *ii  = mm->i;
7370     const PetscInt *cmap = cmapa[cp];
7371     const PetscInt *rmap = rmapa[cp];
7372     const PetscInt mr = mp[cp]->rmap->n;
7373     const PetscInt rs = C->rmap->rstart;
7374     const PetscInt re = C->rmap->rend;
7375     const PetscInt cs = C->cmap->rstart;
7376 
7377     if (mptmp[cp]) continue;
7378     if (rmapt[cp] == 1) { /* consecutive rows */
7379       /* fill coo_i */
7380       for (i = 0; i < mr; i++) {
7381         const PetscInt gr = i + rs;
7382         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
7383       }
7384       /* fill coo_j */
7385       if (!cmapt[cp]) { /* type-0, already global */
7386         ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr);
7387       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
7388         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7389       } else { /* type-2, local to global for sparse columns */
7390         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7391       }
7392       ncoo_d += mm->nz;
7393     } else if (rmapt[cp] == 2) { /* sparse rows */
7394       for (i = 0; i < mr; i++) {
7395         const PetscInt *jj = mm->j + ii[i];
7396         const PetscInt gr  = rmap[i];
7397         const PetscInt nz  = ii[i+1] - ii[i];
7398         if (gr >= rs && gr < re) { /* local rows */
7399           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
7400           if (!cmapt[cp]) { /* type-0, already global */
7401             for (j = 0; j < nz; j++) *coj++ = jj[j];
7402           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7403             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7404           } else { /* type-2, local to global for sparse columns */
7405             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7406           }
7407           ncoo_d += nz;
7408         }
7409       }
7410     }
7411   }
7412   if (glob) {
7413     ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr);
7414   }
7415   ierr = ISDestroy(&glob);CHKERRQ(ierr);
7416   if (P_oth_l2g) {
7417     ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
7418   }
7419   ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr);
7420   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7421   ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr);
7422 
7423   /* preallocate with COO data */
7424   ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr);
7425   ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
7426   PetscFunctionReturn(0);
7427 }
7428 
7429 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7430 {
7431   Mat_Product    *product = mat->product;
7432   PetscErrorCode ierr;
7433 #if defined(PETSC_HAVE_DEVICE)
7434   PetscBool      match = PETSC_FALSE;
7435   PetscBool      usecpu = PETSC_FALSE;
7436 #else
7437   PetscBool      match = PETSC_TRUE;
7438 #endif
7439 
7440   PetscFunctionBegin;
7441   MatCheckProduct(mat,1);
7442 #if defined(PETSC_HAVE_DEVICE)
7443   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7444     ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr);
7445   }
7446   if (match) { /* we can always fallback to the CPU if requested */
7447     switch (product->type) {
7448     case MATPRODUCT_AB:
7449       if (product->api_user) {
7450         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
7451         ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7452         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7453       } else {
7454         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
7455         ierr = PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7456         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7457       }
7458       break;
7459     case MATPRODUCT_AtB:
7460       if (product->api_user) {
7461         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr);
7462         ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7463         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7464       } else {
7465         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr);
7466         ierr = PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7467         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7468       }
7469       break;
7470     case MATPRODUCT_PtAP:
7471       if (product->api_user) {
7472         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
7473         ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7474         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7475       } else {
7476         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
7477         ierr = PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7478         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7479       }
7480       break;
7481     default:
7482       break;
7483     }
7484     match = (PetscBool)!usecpu;
7485   }
7486 #endif
7487   if (match) {
7488     switch (product->type) {
7489     case MATPRODUCT_AB:
7490     case MATPRODUCT_AtB:
7491     case MATPRODUCT_PtAP:
7492       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7493       break;
7494     default:
7495       break;
7496     }
7497   }
7498   /* fallback to MPIAIJ ops */
7499   if (!mat->ops->productsymbolic) {
7500     ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr);
7501   }
7502   PetscFunctionReturn(0);
7503 }
7504 
7505 /*
7506     Special version for direct calls from Fortran
7507 */
7508 #include <petsc/private/fortranimpl.h>
7509 
7510 /* Change these macros so can be used in void function */
7511 /* Identical to CHKERRV, except it assigns to *_ierr */
7512 #undef CHKERRQ
7513 #define CHKERRQ(ierr) do {                                                                     \
7514     PetscErrorCode ierr_msv_mpiaij = (ierr);                                                   \
7515     if (PetscUnlikely(ierr_msv_mpiaij)) {                                                      \
7516       *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \
7517       return;                                                                                  \
7518     }                                                                                          \
7519   } while (0)
7520 
7521 #undef SETERRQ
7522 #define SETERRQ(comm,ierr,...) do {                                                            \
7523     *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \
7524     return;                                                                                    \
7525   } while (0)
7526 
7527 #if defined(PETSC_HAVE_FORTRAN_CAPS)
7528 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
7529 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
7530 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
7531 #else
7532 #endif
7533 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
7534 {
7535   Mat            mat  = *mmat;
7536   PetscInt       m    = *mm, n = *mn;
7537   InsertMode     addv = *maddv;
7538   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
7539   PetscScalar    value;
7540   PetscErrorCode ierr;
7541 
7542   MatCheckPreallocated(mat,1);
7543   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
7544   else PetscCheckFalse(mat->insertmode != addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
7545   {
7546     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
7547     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
7548     PetscBool roworiented = aij->roworiented;
7549 
7550     /* Some Variables required in the macro */
7551     Mat        A                    = aij->A;
7552     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
7553     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
7554     MatScalar  *aa;
7555     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
7556     Mat        B                    = aij->B;
7557     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
7558     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
7559     MatScalar  *ba;
7560     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
7561      * cannot use "#if defined" inside a macro. */
7562     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
7563 
7564     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
7565     PetscInt  nonew = a->nonew;
7566     MatScalar *ap1,*ap2;
7567 
7568     PetscFunctionBegin;
7569     ierr = MatSeqAIJGetArray(A,&aa);CHKERRQ(ierr);
7570     ierr = MatSeqAIJGetArray(B,&ba);CHKERRQ(ierr);
7571     for (i=0; i<m; i++) {
7572       if (im[i] < 0) continue;
7573       PetscAssertFalse(im[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
7574       if (im[i] >= rstart && im[i] < rend) {
7575         row      = im[i] - rstart;
7576         lastcol1 = -1;
7577         rp1      = aj + ai[row];
7578         ap1      = aa + ai[row];
7579         rmax1    = aimax[row];
7580         nrow1    = ailen[row];
7581         low1     = 0;
7582         high1    = nrow1;
7583         lastcol2 = -1;
7584         rp2      = bj + bi[row];
7585         ap2      = ba + bi[row];
7586         rmax2    = bimax[row];
7587         nrow2    = bilen[row];
7588         low2     = 0;
7589         high2    = nrow2;
7590 
7591         for (j=0; j<n; j++) {
7592           if (roworiented) value = v[i*n+j];
7593           else value = v[i+j*m];
7594           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
7595           if (in[j] >= cstart && in[j] < cend) {
7596             col = in[j] - cstart;
7597             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
7598           } else if (in[j] < 0) continue;
7599           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
7600             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
7601             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
7602           } else {
7603             if (mat->was_assembled) {
7604               if (!aij->colmap) {
7605                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
7606               }
7607 #if defined(PETSC_USE_CTABLE)
7608               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
7609               col--;
7610 #else
7611               col = aij->colmap[in[j]] - 1;
7612 #endif
7613               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
7614                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
7615                 col  =  in[j];
7616                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
7617                 B        = aij->B;
7618                 b        = (Mat_SeqAIJ*)B->data;
7619                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
7620                 rp2      = bj + bi[row];
7621                 ap2      = ba + bi[row];
7622                 rmax2    = bimax[row];
7623                 nrow2    = bilen[row];
7624                 low2     = 0;
7625                 high2    = nrow2;
7626                 bm       = aij->B->rmap->n;
7627                 ba       = b->a;
7628                 inserted = PETSC_FALSE;
7629               }
7630             } else col = in[j];
7631             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
7632           }
7633         }
7634       } else if (!aij->donotstash) {
7635         if (roworiented) {
7636           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
7637         } else {
7638           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
7639         }
7640       }
7641     }
7642     ierr = MatSeqAIJRestoreArray(A,&aa);CHKERRQ(ierr);
7643     ierr = MatSeqAIJRestoreArray(B,&ba);CHKERRQ(ierr);
7644   }
7645   PetscFunctionReturnVoid();
7646 }
7647 /* Undefining these here since they were redefined from their original definition above! No
7648  * other PETSc functions should be defined past this point, as it is impossible to recover the
7649  * original definitions */
7650 #undef CHKERRQ
7651 #undef SETERRQ
7652