xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a873a8cd69acc6fd9b12ad3d6b30ee1bf0a81da9)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
66 {
67   PetscErrorCode ierr;
68   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
69 
70   PetscFunctionBegin;
71   if (mat->A) {
72     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
73     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
74   }
75   PetscFunctionReturn(0);
76 }
77 
78 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
79 {
80   PetscErrorCode  ierr;
81   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
82   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
83   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
84   const PetscInt  *ia,*ib;
85   const MatScalar *aa,*bb,*aav,*bav;
86   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
87   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
88 
89   PetscFunctionBegin;
90   *keptrows = NULL;
91 
92   ia   = a->i;
93   ib   = b->i;
94   ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr);
95   ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr);
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) {
100       cnt++;
101       goto ok1;
102     }
103     aa = aav + ia[i];
104     for (j=0; j<na; j++) {
105       if (aa[j] != 0.0) goto ok1;
106     }
107     bb = bav + ib[i];
108     for (j=0; j <nb; j++) {
109       if (bb[j] != 0.0) goto ok1;
110     }
111     cnt++;
112 ok1:;
113   }
114   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr);
115   if (!n0rows) {
116     ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
117     ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
118     PetscFunctionReturn(0);
119   }
120   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
121   cnt  = 0;
122   for (i=0; i<m; i++) {
123     na = ia[i+1] - ia[i];
124     nb = ib[i+1] - ib[i];
125     if (!na && !nb) continue;
126     aa = aav + ia[i];
127     for (j=0; j<na;j++) {
128       if (aa[j] != 0.0) {
129         rows[cnt++] = rstart + i;
130         goto ok2;
131       }
132     }
133     bb = bav + ib[i];
134     for (j=0; j<nb; j++) {
135       if (bb[j] != 0.0) {
136         rows[cnt++] = rstart + i;
137         goto ok2;
138       }
139     }
140 ok2:;
141   }
142   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
143   ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
144   ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
145   PetscFunctionReturn(0);
146 }
147 
148 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
149 {
150   PetscErrorCode    ierr;
151   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
152   PetscBool         cong;
153 
154   PetscFunctionBegin;
155   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
156   if (Y->assembled && cong) {
157     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
158   } else {
159     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
160   }
161   PetscFunctionReturn(0);
162 }
163 
164 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
165 {
166   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
167   PetscErrorCode ierr;
168   PetscInt       i,rstart,nrows,*rows;
169 
170   PetscFunctionBegin;
171   *zrows = NULL;
172   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
173   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
174   for (i=0; i<nrows; i++) rows[i] += rstart;
175   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
176   PetscFunctionReturn(0);
177 }
178 
179 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,ReductionType type,PetscReal *reductions)
180 {
181   PetscErrorCode    ierr;
182   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
183   PetscInt          i,m,n,*garray = aij->garray;
184   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
185   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
186   PetscReal         *work;
187   const PetscScalar *dummy;
188 
189   PetscFunctionBegin;
190   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
191   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
192   ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr);
193   ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr);
194   ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr);
195   ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr);
196   if (type == REDUCTION_NORM_2) {
197     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
198       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
199     }
200     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
201       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
202     }
203   } else if (type == REDUCTION_NORM_1) {
204     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
205       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
206     }
207     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
208       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
209     }
210   } else if (type == REDUCTION_NORM_INFINITY) {
211     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
212       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
213     }
214     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
215       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
216     }
217   } else if (type == REDUCTION_SUM || type == REDUCTION_MEAN) {
218     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
219       work[A->cmap->rstart + a_aij->j[i]] += a_aij->a[i];
220     }
221     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
222       work[garray[b_aij->j[i]]] += b_aij->a[i];
223     }
224   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown ReductionType");
225   if (type == REDUCTION_NORM_INFINITY) {
226     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
227   } else {
228     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
229   }
230   ierr = PetscFree(work);CHKERRQ(ierr);
231   if (type == REDUCTION_NORM_2) {
232     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
233   } else if (type == REDUCTION_MEAN) {
234     for (i=0; i<n; i++) reductions[i] /= m;
235   }
236   PetscFunctionReturn(0);
237 }
238 
239 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
240 {
241   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
242   IS              sis,gis;
243   PetscErrorCode  ierr;
244   const PetscInt  *isis,*igis;
245   PetscInt        n,*iis,nsis,ngis,rstart,i;
246 
247   PetscFunctionBegin;
248   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
249   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
250   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
251   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
252   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
253   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
254 
255   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
256   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
257   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
258   n    = ngis + nsis;
259   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
260   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
261   for (i=0; i<n; i++) iis[i] += rstart;
262   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
263 
264   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
265   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
266   ierr = ISDestroy(&sis);CHKERRQ(ierr);
267   ierr = ISDestroy(&gis);CHKERRQ(ierr);
268   PetscFunctionReturn(0);
269 }
270 
271 /*
272   Local utility routine that creates a mapping from the global column
273 number to the local number in the off-diagonal part of the local
274 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
275 a slightly higher hash table cost; without it it is not scalable (each processor
276 has an order N integer array but is fast to access.
277 */
278 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
279 {
280   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
281   PetscErrorCode ierr;
282   PetscInt       n = aij->B->cmap->n,i;
283 
284   PetscFunctionBegin;
285   if (n && !aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
286 #if defined(PETSC_USE_CTABLE)
287   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
288   for (i=0; i<n; i++) {
289     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
290   }
291 #else
292   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
293   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
294   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
295 #endif
296   PetscFunctionReturn(0);
297 }
298 
299 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
300 { \
301     if (col <= lastcol1)  low1 = 0;     \
302     else                 high1 = nrow1; \
303     lastcol1 = col;\
304     while (high1-low1 > 5) { \
305       t = (low1+high1)/2; \
306       if (rp1[t] > col) high1 = t; \
307       else              low1  = t; \
308     } \
309       for (_i=low1; _i<high1; _i++) { \
310         if (rp1[_i] > col) break; \
311         if (rp1[_i] == col) { \
312           if (addv == ADD_VALUES) { \
313             ap1[_i] += value;   \
314             /* Not sure LogFlops will slow dow the code or not */ \
315             (void)PetscLogFlops(1.0);   \
316            } \
317           else                    ap1[_i] = value; \
318           inserted = PETSC_TRUE; \
319           goto a_noinsert; \
320         } \
321       }  \
322       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
323       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
324       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
325       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
326       N = nrow1++ - 1; a->nz++; high1++; \
327       /* shift up all the later entries in this row */ \
328       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
329       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
330       rp1[_i] = col;  \
331       ap1[_i] = value;  \
332       A->nonzerostate++;\
333       a_noinsert: ; \
334       ailen[row] = nrow1; \
335 }
336 
337 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
338   { \
339     if (col <= lastcol2) low2 = 0;                        \
340     else high2 = nrow2;                                   \
341     lastcol2 = col;                                       \
342     while (high2-low2 > 5) {                              \
343       t = (low2+high2)/2;                                 \
344       if (rp2[t] > col) high2 = t;                        \
345       else             low2  = t;                         \
346     }                                                     \
347     for (_i=low2; _i<high2; _i++) {                       \
348       if (rp2[_i] > col) break;                           \
349       if (rp2[_i] == col) {                               \
350         if (addv == ADD_VALUES) {                         \
351           ap2[_i] += value;                               \
352           (void)PetscLogFlops(1.0);                       \
353         }                                                 \
354         else                    ap2[_i] = value;          \
355         inserted = PETSC_TRUE;                            \
356         goto b_noinsert;                                  \
357       }                                                   \
358     }                                                     \
359     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
360     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
361     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
362     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
363     N = nrow2++ - 1; b->nz++; high2++;                    \
364     /* shift up all the later entries in this row */      \
365     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
366     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
367     rp2[_i] = col;                                        \
368     ap2[_i] = value;                                      \
369     B->nonzerostate++;                                    \
370     b_noinsert: ;                                         \
371     bilen[row] = nrow2;                                   \
372   }
373 
374 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
375 {
376   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
377   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
378   PetscErrorCode ierr;
379   PetscInt       l,*garray = mat->garray,diag;
380 
381   PetscFunctionBegin;
382   /* code only works for square matrices A */
383 
384   /* find size of row to the left of the diagonal part */
385   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
386   row  = row - diag;
387   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
388     if (garray[b->j[b->i[row]+l]] > diag) break;
389   }
390   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
391 
392   /* diagonal part */
393   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
394 
395   /* right of diagonal part */
396   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
397 #if defined(PETSC_HAVE_DEVICE)
398   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
399 #endif
400   PetscFunctionReturn(0);
401 }
402 
403 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
404 {
405   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
406   PetscScalar    value = 0.0;
407   PetscErrorCode ierr;
408   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
409   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
410   PetscBool      roworiented = aij->roworiented;
411 
412   /* Some Variables required in the macro */
413   Mat        A                    = aij->A;
414   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
415   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
416   PetscBool  ignorezeroentries    = a->ignorezeroentries;
417   Mat        B                    = aij->B;
418   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
419   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
420   MatScalar  *aa,*ba;
421   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
422    * cannot use "#if defined" inside a macro. */
423   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
424 
425   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
426   PetscInt  nonew;
427   MatScalar *ap1,*ap2;
428 
429   PetscFunctionBegin;
430 #if defined(PETSC_HAVE_DEVICE)
431   if (A->offloadmask == PETSC_OFFLOAD_GPU) {
432     const PetscScalar *dummy;
433     ierr = MatSeqAIJGetArrayRead(A,&dummy);CHKERRQ(ierr);
434     ierr = MatSeqAIJRestoreArrayRead(A,&dummy);CHKERRQ(ierr);
435   }
436   if (B->offloadmask == PETSC_OFFLOAD_GPU) {
437     const PetscScalar *dummy;
438     ierr = MatSeqAIJGetArrayRead(B,&dummy);CHKERRQ(ierr);
439     ierr = MatSeqAIJRestoreArrayRead(B,&dummy);CHKERRQ(ierr);
440   }
441 #endif
442   aa = a->a;
443   ba = b->a;
444   for (i=0; i<m; i++) {
445     if (im[i] < 0) continue;
446     if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
447     if (im[i] >= rstart && im[i] < rend) {
448       row      = im[i] - rstart;
449       lastcol1 = -1;
450       rp1      = aj + ai[row];
451       ap1      = aa + ai[row];
452       rmax1    = aimax[row];
453       nrow1    = ailen[row];
454       low1     = 0;
455       high1    = nrow1;
456       lastcol2 = -1;
457       rp2      = bj + bi[row];
458       ap2      = ba + bi[row];
459       rmax2    = bimax[row];
460       nrow2    = bilen[row];
461       low2     = 0;
462       high2    = nrow2;
463 
464       for (j=0; j<n; j++) {
465         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
466         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
467         if (in[j] >= cstart && in[j] < cend) {
468           col   = in[j] - cstart;
469           nonew = a->nonew;
470           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
471 #if defined(PETSC_HAVE_DEVICE)
472           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
473 #endif
474         } else if (in[j] < 0) continue;
475         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
476         else {
477           if (mat->was_assembled) {
478             if (!aij->colmap) {
479               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
480             }
481 #if defined(PETSC_USE_CTABLE)
482             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
483             col--;
484 #else
485             col = aij->colmap[in[j]] - 1;
486 #endif
487             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
488               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
489               col  =  in[j];
490               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
491               B        = aij->B;
492               b        = (Mat_SeqAIJ*)B->data;
493               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
494               rp2      = bj + bi[row];
495               ap2      = ba + bi[row];
496               rmax2    = bimax[row];
497               nrow2    = bilen[row];
498               low2     = 0;
499               high2    = nrow2;
500               bm       = aij->B->rmap->n;
501               ba       = b->a;
502               inserted = PETSC_FALSE;
503             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
504               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
505                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
506               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
507             }
508           } else col = in[j];
509           nonew = b->nonew;
510           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
511 #if defined(PETSC_HAVE_DEVICE)
512           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
513 #endif
514         }
515       }
516     } else {
517       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
518       if (!aij->donotstash) {
519         mat->assembled = PETSC_FALSE;
520         if (roworiented) {
521           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
522         } else {
523           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
524         }
525       }
526     }
527   }
528   PetscFunctionReturn(0);
529 }
530 
531 /*
532     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
533     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
534     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
535 */
536 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
537 {
538   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
539   Mat            A           = aij->A; /* diagonal part of the matrix */
540   Mat            B           = aij->B; /* offdiagonal part of the matrix */
541   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
542   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
543   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
544   PetscInt       *ailen      = a->ilen,*aj = a->j;
545   PetscInt       *bilen      = b->ilen,*bj = b->j;
546   PetscInt       am          = aij->A->rmap->n,j;
547   PetscInt       diag_so_far = 0,dnz;
548   PetscInt       offd_so_far = 0,onz;
549 
550   PetscFunctionBegin;
551   /* Iterate over all rows of the matrix */
552   for (j=0; j<am; j++) {
553     dnz = onz = 0;
554     /*  Iterate over all non-zero columns of the current row */
555     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
556       /* If column is in the diagonal */
557       if (mat_j[col] >= cstart && mat_j[col] < cend) {
558         aj[diag_so_far++] = mat_j[col] - cstart;
559         dnz++;
560       } else { /* off-diagonal entries */
561         bj[offd_so_far++] = mat_j[col];
562         onz++;
563       }
564     }
565     ailen[j] = dnz;
566     bilen[j] = onz;
567   }
568   PetscFunctionReturn(0);
569 }
570 
571 /*
572     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
573     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
574     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
575     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
576     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
577 */
578 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
579 {
580   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
581   Mat            A      = aij->A; /* diagonal part of the matrix */
582   Mat            B      = aij->B; /* offdiagonal part of the matrix */
583   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
584   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
585   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
586   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
587   PetscInt       *ailen = a->ilen,*aj = a->j;
588   PetscInt       *bilen = b->ilen,*bj = b->j;
589   PetscInt       am     = aij->A->rmap->n,j;
590   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
591   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
592   PetscScalar    *aa = a->a,*ba = b->a;
593 
594   PetscFunctionBegin;
595   /* Iterate over all rows of the matrix */
596   for (j=0; j<am; j++) {
597     dnz_row = onz_row = 0;
598     rowstart_offd = full_offd_i[j];
599     rowstart_diag = full_diag_i[j];
600     /*  Iterate over all non-zero columns of the current row */
601     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
602       /* If column is in the diagonal */
603       if (mat_j[col] >= cstart && mat_j[col] < cend) {
604         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
605         aa[rowstart_diag+dnz_row] = mat_a[col];
606         dnz_row++;
607       } else { /* off-diagonal entries */
608         bj[rowstart_offd+onz_row] = mat_j[col];
609         ba[rowstart_offd+onz_row] = mat_a[col];
610         onz_row++;
611       }
612     }
613     ailen[j] = dnz_row;
614     bilen[j] = onz_row;
615   }
616   PetscFunctionReturn(0);
617 }
618 
619 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
620 {
621   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
622   PetscErrorCode ierr;
623   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
624   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
625 
626   PetscFunctionBegin;
627   for (i=0; i<m; i++) {
628     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
629     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
630     if (idxm[i] >= rstart && idxm[i] < rend) {
631       row = idxm[i] - rstart;
632       for (j=0; j<n; j++) {
633         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
634         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
635         if (idxn[j] >= cstart && idxn[j] < cend) {
636           col  = idxn[j] - cstart;
637           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
638         } else {
639           if (!aij->colmap) {
640             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
641           }
642 #if defined(PETSC_USE_CTABLE)
643           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
644           col--;
645 #else
646           col = aij->colmap[idxn[j]] - 1;
647 #endif
648           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
649           else {
650             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
651           }
652         }
653       }
654     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
655   }
656   PetscFunctionReturn(0);
657 }
658 
659 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
660 {
661   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
662   PetscErrorCode ierr;
663   PetscInt       nstash,reallocs;
664 
665   PetscFunctionBegin;
666   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
667 
668   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
669   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
670   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
671   PetscFunctionReturn(0);
672 }
673 
674 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
675 {
676   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
677   PetscErrorCode ierr;
678   PetscMPIInt    n;
679   PetscInt       i,j,rstart,ncols,flg;
680   PetscInt       *row,*col;
681   PetscBool      other_disassembled;
682   PetscScalar    *val;
683 
684   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
685 
686   PetscFunctionBegin;
687   if (!aij->donotstash && !mat->nooffprocentries) {
688     while (1) {
689       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
690       if (!flg) break;
691 
692       for (i=0; i<n;) {
693         /* Now identify the consecutive vals belonging to the same row */
694         for (j=i,rstart=row[j]; j<n; j++) {
695           if (row[j] != rstart) break;
696         }
697         if (j < n) ncols = j-i;
698         else       ncols = n-i;
699         /* Now assemble all these values with a single function call */
700         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
701         i    = j;
702       }
703     }
704     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
705   }
706 #if defined(PETSC_HAVE_DEVICE)
707   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
708   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
709   if (mat->boundtocpu) {
710     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
711     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
712   }
713 #endif
714   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
715   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
716 
717   /* determine if any processor has disassembled, if so we must
718      also disassemble ourself, in order that we may reassemble. */
719   /*
720      if nonzero structure of submatrix B cannot change then we know that
721      no processor disassembled thus we can skip this stuff
722   */
723   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
724     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
725     if (mat->was_assembled && !other_disassembled) {
726 #if defined(PETSC_HAVE_DEVICE)
727       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
728 #endif
729       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
730     }
731   }
732   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
733     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
734   }
735   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
736 #if defined(PETSC_HAVE_DEVICE)
737   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
738 #endif
739   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
740   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
741 
742   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
743 
744   aij->rowvalues = NULL;
745 
746   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
747 
748   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
749   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
750     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
751     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
752   }
753 #if defined(PETSC_HAVE_DEVICE)
754   mat->offloadmask = PETSC_OFFLOAD_BOTH;
755 #endif
756   PetscFunctionReturn(0);
757 }
758 
759 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
760 {
761   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
762   PetscErrorCode ierr;
763 
764   PetscFunctionBegin;
765   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
766   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
767   PetscFunctionReturn(0);
768 }
769 
770 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
771 {
772   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
773   PetscObjectState sA, sB;
774   PetscInt        *lrows;
775   PetscInt         r, len;
776   PetscBool        cong, lch, gch;
777   PetscErrorCode   ierr;
778 
779   PetscFunctionBegin;
780   /* get locally owned rows */
781   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
782   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
783   /* fix right hand side if needed */
784   if (x && b) {
785     const PetscScalar *xx;
786     PetscScalar       *bb;
787 
788     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
789     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
790     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
791     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
792     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
793     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
794   }
795 
796   sA = mat->A->nonzerostate;
797   sB = mat->B->nonzerostate;
798 
799   if (diag != 0.0 && cong) {
800     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
801     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
802   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
803     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
804     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
805     PetscInt   nnwA, nnwB;
806     PetscBool  nnzA, nnzB;
807 
808     nnwA = aijA->nonew;
809     nnwB = aijB->nonew;
810     nnzA = aijA->keepnonzeropattern;
811     nnzB = aijB->keepnonzeropattern;
812     if (!nnzA) {
813       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
814       aijA->nonew = 0;
815     }
816     if (!nnzB) {
817       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
818       aijB->nonew = 0;
819     }
820     /* Must zero here before the next loop */
821     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
822     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
823     for (r = 0; r < len; ++r) {
824       const PetscInt row = lrows[r] + A->rmap->rstart;
825       if (row >= A->cmap->N) continue;
826       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
827     }
828     aijA->nonew = nnwA;
829     aijB->nonew = nnwB;
830   } else {
831     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
832     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
833   }
834   ierr = PetscFree(lrows);CHKERRQ(ierr);
835   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
836   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
837 
838   /* reduce nonzerostate */
839   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
840   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
841   if (gch) A->nonzerostate++;
842   PetscFunctionReturn(0);
843 }
844 
845 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
846 {
847   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
848   PetscErrorCode    ierr;
849   PetscMPIInt       n = A->rmap->n;
850   PetscInt          i,j,r,m,len = 0;
851   PetscInt          *lrows,*owners = A->rmap->range;
852   PetscMPIInt       p = 0;
853   PetscSFNode       *rrows;
854   PetscSF           sf;
855   const PetscScalar *xx;
856   PetscScalar       *bb,*mask;
857   Vec               xmask,lmask;
858   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
859   const PetscInt    *aj, *ii,*ridx;
860   PetscScalar       *aa;
861 
862   PetscFunctionBegin;
863   /* Create SF where leaves are input rows and roots are owned rows */
864   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
865   for (r = 0; r < n; ++r) lrows[r] = -1;
866   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
867   for (r = 0; r < N; ++r) {
868     const PetscInt idx   = rows[r];
869     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
870     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
871       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
872     }
873     rrows[r].rank  = p;
874     rrows[r].index = rows[r] - owners[p];
875   }
876   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
877   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
878   /* Collect flags for rows to be zeroed */
879   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
880   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
881   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
882   /* Compress and put in row numbers */
883   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
884   /* zero diagonal part of matrix */
885   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
886   /* handle off diagonal part of matrix */
887   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
888   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
889   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
890   for (i=0; i<len; i++) bb[lrows[i]] = 1;
891   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
892   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
893   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
894   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
895   if (x && b) { /* this code is buggy when the row and column layout don't match */
896     PetscBool cong;
897 
898     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
899     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
900     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
901     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
902     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
903     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
904   }
905   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
906   /* remove zeroed rows of off diagonal matrix */
907   ii = aij->i;
908   for (i=0; i<len; i++) {
909     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
910   }
911   /* loop over all elements of off process part of matrix zeroing removed columns*/
912   if (aij->compressedrow.use) {
913     m    = aij->compressedrow.nrows;
914     ii   = aij->compressedrow.i;
915     ridx = aij->compressedrow.rindex;
916     for (i=0; i<m; i++) {
917       n  = ii[i+1] - ii[i];
918       aj = aij->j + ii[i];
919       aa = aij->a + ii[i];
920 
921       for (j=0; j<n; j++) {
922         if (PetscAbsScalar(mask[*aj])) {
923           if (b) bb[*ridx] -= *aa*xx[*aj];
924           *aa = 0.0;
925         }
926         aa++;
927         aj++;
928       }
929       ridx++;
930     }
931   } else { /* do not use compressed row format */
932     m = l->B->rmap->n;
933     for (i=0; i<m; i++) {
934       n  = ii[i+1] - ii[i];
935       aj = aij->j + ii[i];
936       aa = aij->a + ii[i];
937       for (j=0; j<n; j++) {
938         if (PetscAbsScalar(mask[*aj])) {
939           if (b) bb[i] -= *aa*xx[*aj];
940           *aa = 0.0;
941         }
942         aa++;
943         aj++;
944       }
945     }
946   }
947   if (x && b) {
948     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
949     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
950   }
951   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
952   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
953   ierr = PetscFree(lrows);CHKERRQ(ierr);
954 
955   /* only change matrix nonzero state if pattern was allowed to be changed */
956   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
957     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
958     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
959   }
960   PetscFunctionReturn(0);
961 }
962 
963 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
964 {
965   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
966   PetscErrorCode ierr;
967   PetscInt       nt;
968   VecScatter     Mvctx = a->Mvctx;
969 
970   PetscFunctionBegin;
971   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
972   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
973   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
974   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
975   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
976   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
977   PetscFunctionReturn(0);
978 }
979 
980 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
981 {
982   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
983   PetscErrorCode ierr;
984 
985   PetscFunctionBegin;
986   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
987   PetscFunctionReturn(0);
988 }
989 
990 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
991 {
992   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
993   PetscErrorCode ierr;
994   VecScatter     Mvctx = a->Mvctx;
995 
996   PetscFunctionBegin;
997   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
998   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
999   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1000   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1005 {
1006   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1007   PetscErrorCode ierr;
1008 
1009   PetscFunctionBegin;
1010   /* do nondiagonal part */
1011   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1012   /* do local part */
1013   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1014   /* add partial results together */
1015   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1016   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1021 {
1022   MPI_Comm       comm;
1023   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1024   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1025   IS             Me,Notme;
1026   PetscErrorCode ierr;
1027   PetscInt       M,N,first,last,*notme,i;
1028   PetscBool      lf;
1029   PetscMPIInt    size;
1030 
1031   PetscFunctionBegin;
1032   /* Easy test: symmetric diagonal block */
1033   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1034   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1035   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr);
1036   if (!*f) PetscFunctionReturn(0);
1037   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1038   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1039   if (size == 1) PetscFunctionReturn(0);
1040 
1041   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1042   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1043   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1044   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1045   for (i=0; i<first; i++) notme[i] = i;
1046   for (i=last; i<M; i++) notme[i-last+first] = i;
1047   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1048   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1049   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1050   Aoff = Aoffs[0];
1051   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1052   Boff = Boffs[0];
1053   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1054   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1055   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1056   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1057   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1058   ierr = PetscFree(notme);CHKERRQ(ierr);
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1063 {
1064   PetscErrorCode ierr;
1065 
1066   PetscFunctionBegin;
1067   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1072 {
1073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1074   PetscErrorCode ierr;
1075 
1076   PetscFunctionBegin;
1077   /* do nondiagonal part */
1078   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1079   /* do local part */
1080   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1081   /* add partial results together */
1082   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1083   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 /*
1088   This only works correctly for square matrices where the subblock A->A is the
1089    diagonal block
1090 */
1091 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1092 {
1093   PetscErrorCode ierr;
1094   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1095 
1096   PetscFunctionBegin;
1097   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1098   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1099   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1104 {
1105   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1106   PetscErrorCode ierr;
1107 
1108   PetscFunctionBegin;
1109   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1110   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1115 {
1116   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1117   PetscErrorCode ierr;
1118 
1119   PetscFunctionBegin;
1120 #if defined(PETSC_USE_LOG)
1121   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1122 #endif
1123   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1124   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1125   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1126   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1127 #if defined(PETSC_USE_CTABLE)
1128   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1129 #else
1130   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1131 #endif
1132   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1133   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1134   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1135   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1136   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1137   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1138 
1139   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1140   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1141 
1142   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1143   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1144   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1145   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1146   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1147   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1148   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1149   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1150   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1151   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1152 #if defined(PETSC_HAVE_CUDA)
1153   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1154 #endif
1155 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1156   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr);
1157 #endif
1158   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
1159 #if defined(PETSC_HAVE_ELEMENTAL)
1160   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1161 #endif
1162 #if defined(PETSC_HAVE_SCALAPACK)
1163   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1164 #endif
1165 #if defined(PETSC_HAVE_HYPRE)
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1168 #endif
1169   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1170   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr);
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr);
1174   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr);
1175 #if defined(PETSC_HAVE_MKL_SPARSE)
1176   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr);
1177 #endif
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr);
1179   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr);
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1185 {
1186   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1187   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1188   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1189   const PetscInt    *garray = aij->garray;
1190   const PetscScalar *aa,*ba;
1191   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1192   PetscInt          *rowlens;
1193   PetscInt          *colidxs;
1194   PetscScalar       *matvals;
1195   PetscErrorCode    ierr;
1196 
1197   PetscFunctionBegin;
1198   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1199 
1200   M  = mat->rmap->N;
1201   N  = mat->cmap->N;
1202   m  = mat->rmap->n;
1203   rs = mat->rmap->rstart;
1204   cs = mat->cmap->rstart;
1205   nz = A->nz + B->nz;
1206 
1207   /* write matrix header */
1208   header[0] = MAT_FILE_CLASSID;
1209   header[1] = M; header[2] = N; header[3] = nz;
1210   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1211   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1212 
1213   /* fill in and store row lengths  */
1214   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1215   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1216   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1217   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1218 
1219   /* fill in and store column indices */
1220   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1221   for (cnt=0, i=0; i<m; i++) {
1222     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1223       if (garray[B->j[jb]] > cs) break;
1224       colidxs[cnt++] = garray[B->j[jb]];
1225     }
1226     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1227       colidxs[cnt++] = A->j[ja] + cs;
1228     for (; jb<B->i[i+1]; jb++)
1229       colidxs[cnt++] = garray[B->j[jb]];
1230   }
1231   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1232   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1233   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1234 
1235   /* fill in and store nonzero values */
1236   ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr);
1237   ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr);
1238   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1239   for (cnt=0, i=0; i<m; i++) {
1240     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1241       if (garray[B->j[jb]] > cs) break;
1242       matvals[cnt++] = ba[jb];
1243     }
1244     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1245       matvals[cnt++] = aa[ja];
1246     for (; jb<B->i[i+1]; jb++)
1247       matvals[cnt++] = ba[jb];
1248   }
1249   ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr);
1250   ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr);
1251   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1252   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1253   ierr = PetscFree(matvals);CHKERRQ(ierr);
1254 
1255   /* write block size option to the viewer's .info file */
1256   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #include <petscdraw.h>
1261 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1262 {
1263   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1264   PetscErrorCode    ierr;
1265   PetscMPIInt       rank = aij->rank,size = aij->size;
1266   PetscBool         isdraw,iascii,isbinary;
1267   PetscViewer       sviewer;
1268   PetscViewerFormat format;
1269 
1270   PetscFunctionBegin;
1271   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1272   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1273   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1274   if (iascii) {
1275     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1276     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1277       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1278       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1279       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1280       for (i=0; i<(PetscInt)size; i++) {
1281         nmax = PetscMax(nmax,nz[i]);
1282         nmin = PetscMin(nmin,nz[i]);
1283         navg += nz[i];
1284       }
1285       ierr = PetscFree(nz);CHKERRQ(ierr);
1286       navg = navg/size;
1287       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1288       PetscFunctionReturn(0);
1289     }
1290     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1291     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1292       MatInfo   info;
1293       PetscInt *inodes=NULL;
1294 
1295       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
1296       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1297       ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr);
1298       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1299       if (!inodes) {
1300         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1301                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1302       } else {
1303         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1304                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1305       }
1306       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1307       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1308       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1309       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1310       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1311       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1312       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1313       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1314       PetscFunctionReturn(0);
1315     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1316       PetscInt inodecount,inodelimit,*inodes;
1317       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1318       if (inodes) {
1319         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1320       } else {
1321         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1322       }
1323       PetscFunctionReturn(0);
1324     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1325       PetscFunctionReturn(0);
1326     }
1327   } else if (isbinary) {
1328     if (size == 1) {
1329       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1330       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1331     } else {
1332       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1333     }
1334     PetscFunctionReturn(0);
1335   } else if (iascii && size == 1) {
1336     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1337     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1338     PetscFunctionReturn(0);
1339   } else if (isdraw) {
1340     PetscDraw draw;
1341     PetscBool isnull;
1342     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1343     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1344     if (isnull) PetscFunctionReturn(0);
1345   }
1346 
1347   { /* assemble the entire matrix onto first processor */
1348     Mat A = NULL, Av;
1349     IS  isrow,iscol;
1350 
1351     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1352     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1353     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1354     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1355 /*  The commented code uses MatCreateSubMatrices instead */
1356 /*
1357     Mat *AA, A = NULL, Av;
1358     IS  isrow,iscol;
1359 
1360     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1361     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1362     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1363     if (!rank) {
1364        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1365        A    = AA[0];
1366        Av   = AA[0];
1367     }
1368     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1369 */
1370     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1371     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1372     /*
1373        Everyone has to call to draw the matrix since the graphics waits are
1374        synchronized across all processors that share the PetscDraw object
1375     */
1376     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1377     if (!rank) {
1378       if (((PetscObject)mat)->name) {
1379         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1380       }
1381       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1382     }
1383     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1384     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1385     ierr = MatDestroy(&A);CHKERRQ(ierr);
1386   }
1387   PetscFunctionReturn(0);
1388 }
1389 
1390 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1391 {
1392   PetscErrorCode ierr;
1393   PetscBool      iascii,isdraw,issocket,isbinary;
1394 
1395   PetscFunctionBegin;
1396   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1397   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1398   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1399   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1400   if (iascii || isdraw || isbinary || issocket) {
1401     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1402   }
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1407 {
1408   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1409   PetscErrorCode ierr;
1410   Vec            bb1 = NULL;
1411   PetscBool      hasop;
1412 
1413   PetscFunctionBegin;
1414   if (flag == SOR_APPLY_UPPER) {
1415     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1416     PetscFunctionReturn(0);
1417   }
1418 
1419   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1420     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1421   }
1422 
1423   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1424     if (flag & SOR_ZERO_INITIAL_GUESS) {
1425       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1426       its--;
1427     }
1428 
1429     while (its--) {
1430       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1431       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1432 
1433       /* update rhs: bb1 = bb - B*x */
1434       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1435       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1436 
1437       /* local sweep */
1438       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1439     }
1440   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1441     if (flag & SOR_ZERO_INITIAL_GUESS) {
1442       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1443       its--;
1444     }
1445     while (its--) {
1446       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1447       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1448 
1449       /* update rhs: bb1 = bb - B*x */
1450       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1451       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1452 
1453       /* local sweep */
1454       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1455     }
1456   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1457     if (flag & SOR_ZERO_INITIAL_GUESS) {
1458       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1459       its--;
1460     }
1461     while (its--) {
1462       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1463       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1464 
1465       /* update rhs: bb1 = bb - B*x */
1466       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1467       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1468 
1469       /* local sweep */
1470       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1471     }
1472   } else if (flag & SOR_EISENSTAT) {
1473     Vec xx1;
1474 
1475     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1476     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1477 
1478     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1479     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1480     if (!mat->diag) {
1481       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1482       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1483     }
1484     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1485     if (hasop) {
1486       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1487     } else {
1488       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1489     }
1490     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1491 
1492     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1493 
1494     /* local sweep */
1495     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1496     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1497     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1498   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1499 
1500   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1501 
1502   matin->factorerrortype = mat->A->factorerrortype;
1503   PetscFunctionReturn(0);
1504 }
1505 
1506 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1507 {
1508   Mat            aA,aB,Aperm;
1509   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1510   PetscScalar    *aa,*ba;
1511   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1512   PetscSF        rowsf,sf;
1513   IS             parcolp = NULL;
1514   PetscBool      done;
1515   PetscErrorCode ierr;
1516 
1517   PetscFunctionBegin;
1518   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1519   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1520   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1521   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1522 
1523   /* Invert row permutation to find out where my rows should go */
1524   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1525   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1526   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1527   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1528   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1529   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1530 
1531   /* Invert column permutation to find out where my columns should go */
1532   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1533   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1534   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1535   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1536   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1537   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1538   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1539 
1540   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1541   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1542   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1543 
1544   /* Find out where my gcols should go */
1545   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1546   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1547   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1548   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1549   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1550   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1551   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1552   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1553 
1554   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1555   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1556   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1557   for (i=0; i<m; i++) {
1558     PetscInt    row = rdest[i];
1559     PetscMPIInt rowner;
1560     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1561     for (j=ai[i]; j<ai[i+1]; j++) {
1562       PetscInt    col = cdest[aj[j]];
1563       PetscMPIInt cowner;
1564       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1565       if (rowner == cowner) dnnz[i]++;
1566       else onnz[i]++;
1567     }
1568     for (j=bi[i]; j<bi[i+1]; j++) {
1569       PetscInt    col = gcdest[bj[j]];
1570       PetscMPIInt cowner;
1571       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1572       if (rowner == cowner) dnnz[i]++;
1573       else onnz[i]++;
1574     }
1575   }
1576   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1577   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1578   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1579   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1580   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1581 
1582   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1583   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1584   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1585   for (i=0; i<m; i++) {
1586     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1587     PetscInt j0,rowlen;
1588     rowlen = ai[i+1] - ai[i];
1589     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1590       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1591       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1592     }
1593     rowlen = bi[i+1] - bi[i];
1594     for (j0=j=0; j<rowlen; j0=j) {
1595       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1596       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1597     }
1598   }
1599   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1600   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1601   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1602   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1603   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1604   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1605   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1606   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1607   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1608   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1609   *B = Aperm;
1610   PetscFunctionReturn(0);
1611 }
1612 
1613 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1614 {
1615   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1616   PetscErrorCode ierr;
1617 
1618   PetscFunctionBegin;
1619   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1620   if (ghosts) *ghosts = aij->garray;
1621   PetscFunctionReturn(0);
1622 }
1623 
1624 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1625 {
1626   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1627   Mat            A    = mat->A,B = mat->B;
1628   PetscErrorCode ierr;
1629   PetscLogDouble isend[5],irecv[5];
1630 
1631   PetscFunctionBegin;
1632   info->block_size = 1.0;
1633   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1634 
1635   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1636   isend[3] = info->memory;  isend[4] = info->mallocs;
1637 
1638   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1639 
1640   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1641   isend[3] += info->memory;  isend[4] += info->mallocs;
1642   if (flag == MAT_LOCAL) {
1643     info->nz_used      = isend[0];
1644     info->nz_allocated = isend[1];
1645     info->nz_unneeded  = isend[2];
1646     info->memory       = isend[3];
1647     info->mallocs      = isend[4];
1648   } else if (flag == MAT_GLOBAL_MAX) {
1649     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1650 
1651     info->nz_used      = irecv[0];
1652     info->nz_allocated = irecv[1];
1653     info->nz_unneeded  = irecv[2];
1654     info->memory       = irecv[3];
1655     info->mallocs      = irecv[4];
1656   } else if (flag == MAT_GLOBAL_SUM) {
1657     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1658 
1659     info->nz_used      = irecv[0];
1660     info->nz_allocated = irecv[1];
1661     info->nz_unneeded  = irecv[2];
1662     info->memory       = irecv[3];
1663     info->mallocs      = irecv[4];
1664   }
1665   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1666   info->fill_ratio_needed = 0;
1667   info->factor_mallocs    = 0;
1668   PetscFunctionReturn(0);
1669 }
1670 
1671 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1672 {
1673   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1674   PetscErrorCode ierr;
1675 
1676   PetscFunctionBegin;
1677   switch (op) {
1678   case MAT_NEW_NONZERO_LOCATIONS:
1679   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1680   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1681   case MAT_KEEP_NONZERO_PATTERN:
1682   case MAT_NEW_NONZERO_LOCATION_ERR:
1683   case MAT_USE_INODES:
1684   case MAT_IGNORE_ZERO_ENTRIES:
1685   case MAT_FORM_EXPLICIT_TRANSPOSE:
1686     MatCheckPreallocated(A,1);
1687     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1688     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1689     break;
1690   case MAT_ROW_ORIENTED:
1691     MatCheckPreallocated(A,1);
1692     a->roworiented = flg;
1693 
1694     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1695     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1696     break;
1697   case MAT_FORCE_DIAGONAL_ENTRIES:
1698   case MAT_SORTED_FULL:
1699     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1700     break;
1701   case MAT_IGNORE_OFF_PROC_ENTRIES:
1702     a->donotstash = flg;
1703     break;
1704   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1705   case MAT_SPD:
1706   case MAT_SYMMETRIC:
1707   case MAT_STRUCTURALLY_SYMMETRIC:
1708   case MAT_HERMITIAN:
1709   case MAT_SYMMETRY_ETERNAL:
1710     break;
1711   case MAT_SUBMAT_SINGLEIS:
1712     A->submat_singleis = flg;
1713     break;
1714   case MAT_STRUCTURE_ONLY:
1715     /* The option is handled directly by MatSetOption() */
1716     break;
1717   default:
1718     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1719   }
1720   PetscFunctionReturn(0);
1721 }
1722 
1723 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1724 {
1725   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1726   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1727   PetscErrorCode ierr;
1728   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1729   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1730   PetscInt       *cmap,*idx_p;
1731 
1732   PetscFunctionBegin;
1733   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1734   mat->getrowactive = PETSC_TRUE;
1735 
1736   if (!mat->rowvalues && (idx || v)) {
1737     /*
1738         allocate enough space to hold information from the longest row.
1739     */
1740     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1741     PetscInt   max = 1,tmp;
1742     for (i=0; i<matin->rmap->n; i++) {
1743       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1744       if (max < tmp) max = tmp;
1745     }
1746     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1747   }
1748 
1749   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1750   lrow = row - rstart;
1751 
1752   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1753   if (!v)   {pvA = NULL; pvB = NULL;}
1754   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1755   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1756   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1757   nztot = nzA + nzB;
1758 
1759   cmap = mat->garray;
1760   if (v  || idx) {
1761     if (nztot) {
1762       /* Sort by increasing column numbers, assuming A and B already sorted */
1763       PetscInt imark = -1;
1764       if (v) {
1765         *v = v_p = mat->rowvalues;
1766         for (i=0; i<nzB; i++) {
1767           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1768           else break;
1769         }
1770         imark = i;
1771         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1772         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1773       }
1774       if (idx) {
1775         *idx = idx_p = mat->rowindices;
1776         if (imark > -1) {
1777           for (i=0; i<imark; i++) {
1778             idx_p[i] = cmap[cworkB[i]];
1779           }
1780         } else {
1781           for (i=0; i<nzB; i++) {
1782             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1783             else break;
1784           }
1785           imark = i;
1786         }
1787         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1788         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1789       }
1790     } else {
1791       if (idx) *idx = NULL;
1792       if (v)   *v   = NULL;
1793     }
1794   }
1795   *nz  = nztot;
1796   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1797   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1798   PetscFunctionReturn(0);
1799 }
1800 
1801 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1802 {
1803   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1804 
1805   PetscFunctionBegin;
1806   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1807   aij->getrowactive = PETSC_FALSE;
1808   PetscFunctionReturn(0);
1809 }
1810 
1811 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1812 {
1813   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1814   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1815   PetscErrorCode ierr;
1816   PetscInt       i,j,cstart = mat->cmap->rstart;
1817   PetscReal      sum = 0.0;
1818   MatScalar      *v;
1819 
1820   PetscFunctionBegin;
1821   if (aij->size == 1) {
1822     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1823   } else {
1824     if (type == NORM_FROBENIUS) {
1825       v = amat->a;
1826       for (i=0; i<amat->nz; i++) {
1827         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1828       }
1829       v = bmat->a;
1830       for (i=0; i<bmat->nz; i++) {
1831         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1832       }
1833       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1834       *norm = PetscSqrtReal(*norm);
1835       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1836     } else if (type == NORM_1) { /* max column norm */
1837       PetscReal *tmp,*tmp2;
1838       PetscInt  *jj,*garray = aij->garray;
1839       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1840       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1841       *norm = 0.0;
1842       v     = amat->a; jj = amat->j;
1843       for (j=0; j<amat->nz; j++) {
1844         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1845       }
1846       v = bmat->a; jj = bmat->j;
1847       for (j=0; j<bmat->nz; j++) {
1848         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1849       }
1850       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1851       for (j=0; j<mat->cmap->N; j++) {
1852         if (tmp2[j] > *norm) *norm = tmp2[j];
1853       }
1854       ierr = PetscFree(tmp);CHKERRQ(ierr);
1855       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1856       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1857     } else if (type == NORM_INFINITY) { /* max row norm */
1858       PetscReal ntemp = 0.0;
1859       for (j=0; j<aij->A->rmap->n; j++) {
1860         v   = amat->a + amat->i[j];
1861         sum = 0.0;
1862         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1863           sum += PetscAbsScalar(*v); v++;
1864         }
1865         v = bmat->a + bmat->i[j];
1866         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1867           sum += PetscAbsScalar(*v); v++;
1868         }
1869         if (sum > ntemp) ntemp = sum;
1870       }
1871       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1872       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1873     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1874   }
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1879 {
1880   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1881   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1882   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1883   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1884   PetscErrorCode  ierr;
1885   Mat             B,A_diag,*B_diag;
1886   const MatScalar *pbv,*bv;
1887 
1888   PetscFunctionBegin;
1889   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1890   ai = Aloc->i; aj = Aloc->j;
1891   bi = Bloc->i; bj = Bloc->j;
1892   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1893     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1894     PetscSFNode          *oloc;
1895     PETSC_UNUSED PetscSF sf;
1896 
1897     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1898     /* compute d_nnz for preallocation */
1899     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
1900     for (i=0; i<ai[ma]; i++) {
1901       d_nnz[aj[i]]++;
1902     }
1903     /* compute local off-diagonal contributions */
1904     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
1905     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1906     /* map those to global */
1907     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1908     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1909     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1910     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
1911     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1912     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1913     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1914 
1915     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1916     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1917     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1918     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1919     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1920     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1921   } else {
1922     B    = *matout;
1923     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1924   }
1925 
1926   b           = (Mat_MPIAIJ*)B->data;
1927   A_diag      = a->A;
1928   B_diag      = &b->A;
1929   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1930   A_diag_ncol = A_diag->cmap->N;
1931   B_diag_ilen = sub_B_diag->ilen;
1932   B_diag_i    = sub_B_diag->i;
1933 
1934   /* Set ilen for diagonal of B */
1935   for (i=0; i<A_diag_ncol; i++) {
1936     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1937   }
1938 
1939   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1940   very quickly (=without using MatSetValues), because all writes are local. */
1941   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
1942 
1943   /* copy over the B part */
1944   ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
1945   ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr);
1946   pbv  = bv;
1947   row  = A->rmap->rstart;
1948   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1949   cols_tmp = cols;
1950   for (i=0; i<mb; i++) {
1951     ncol = bi[i+1]-bi[i];
1952     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr);
1953     row++;
1954     pbv += ncol; cols_tmp += ncol;
1955   }
1956   ierr = PetscFree(cols);CHKERRQ(ierr);
1957   ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr);
1958 
1959   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1960   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1961   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1962     *matout = B;
1963   } else {
1964     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1965   }
1966   PetscFunctionReturn(0);
1967 }
1968 
1969 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1970 {
1971   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1972   Mat            a    = aij->A,b = aij->B;
1973   PetscErrorCode ierr;
1974   PetscInt       s1,s2,s3;
1975 
1976   PetscFunctionBegin;
1977   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1978   if (rr) {
1979     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1980     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1981     /* Overlap communication with computation. */
1982     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1983   }
1984   if (ll) {
1985     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1986     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1987     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1988   }
1989   /* scale  the diagonal block */
1990   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1991 
1992   if (rr) {
1993     /* Do a scatter end and then right scale the off-diagonal block */
1994     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1995     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
1996   }
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2001 {
2002   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2003   PetscErrorCode ierr;
2004 
2005   PetscFunctionBegin;
2006   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2007   PetscFunctionReturn(0);
2008 }
2009 
2010 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2011 {
2012   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2013   Mat            a,b,c,d;
2014   PetscBool      flg;
2015   PetscErrorCode ierr;
2016 
2017   PetscFunctionBegin;
2018   a = matA->A; b = matA->B;
2019   c = matB->A; d = matB->B;
2020 
2021   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2022   if (flg) {
2023     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2024   }
2025   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2026   PetscFunctionReturn(0);
2027 }
2028 
2029 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2030 {
2031   PetscErrorCode ierr;
2032   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2033   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2034 
2035   PetscFunctionBegin;
2036   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2037   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2038     /* because of the column compression in the off-processor part of the matrix a->B,
2039        the number of columns in a->B and b->B may be different, hence we cannot call
2040        the MatCopy() directly on the two parts. If need be, we can provide a more
2041        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2042        then copying the submatrices */
2043     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2044   } else {
2045     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2046     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2047   }
2048   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2053 {
2054   PetscErrorCode ierr;
2055 
2056   PetscFunctionBegin;
2057   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2058   PetscFunctionReturn(0);
2059 }
2060 
2061 /*
2062    Computes the number of nonzeros per row needed for preallocation when X and Y
2063    have different nonzero structure.
2064 */
2065 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2066 {
2067   PetscInt       i,j,k,nzx,nzy;
2068 
2069   PetscFunctionBegin;
2070   /* Set the number of nonzeros in the new matrix */
2071   for (i=0; i<m; i++) {
2072     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2073     nzx = xi[i+1] - xi[i];
2074     nzy = yi[i+1] - yi[i];
2075     nnz[i] = 0;
2076     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2077       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2078       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2079       nnz[i]++;
2080     }
2081     for (; k<nzy; k++) nnz[i]++;
2082   }
2083   PetscFunctionReturn(0);
2084 }
2085 
2086 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2087 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2088 {
2089   PetscErrorCode ierr;
2090   PetscInt       m = Y->rmap->N;
2091   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2092   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2093 
2094   PetscFunctionBegin;
2095   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2096   PetscFunctionReturn(0);
2097 }
2098 
2099 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2100 {
2101   PetscErrorCode ierr;
2102   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2103 
2104   PetscFunctionBegin;
2105   if (str == SAME_NONZERO_PATTERN) {
2106     ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr);
2107     ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr);
2108   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2109     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2110   } else {
2111     Mat      B;
2112     PetscInt *nnz_d,*nnz_o;
2113 
2114     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2115     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2116     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2117     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2118     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2119     ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr);
2120     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2121     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2122     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2123     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2124     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2125     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2126     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2127   }
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2132 
2133 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2134 {
2135 #if defined(PETSC_USE_COMPLEX)
2136   PetscErrorCode ierr;
2137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2138 
2139   PetscFunctionBegin;
2140   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2141   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2142 #else
2143   PetscFunctionBegin;
2144 #endif
2145   PetscFunctionReturn(0);
2146 }
2147 
2148 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2149 {
2150   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2151   PetscErrorCode ierr;
2152 
2153   PetscFunctionBegin;
2154   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2155   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2160 {
2161   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2162   PetscErrorCode ierr;
2163 
2164   PetscFunctionBegin;
2165   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2166   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2171 {
2172   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2173   PetscErrorCode    ierr;
2174   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2175   PetscScalar       *va,*vv;
2176   Vec               vB,vA;
2177   const PetscScalar *vb;
2178 
2179   PetscFunctionBegin;
2180   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
2181   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2182 
2183   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2184   if (idx) {
2185     for (i=0; i<m; i++) {
2186       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2187     }
2188   }
2189 
2190   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
2191   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
2192   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2193 
2194   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2195   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2196   for (i=0; i<m; i++) {
2197     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2198       vv[i] = vb[i];
2199       if (idx) idx[i] = a->garray[idxb[i]];
2200     } else {
2201       vv[i] = va[i];
2202       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2203         idx[i] = a->garray[idxb[i]];
2204     }
2205   }
2206   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2207   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2208   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2209   ierr = PetscFree(idxb);CHKERRQ(ierr);
2210   ierr = VecDestroy(&vA);CHKERRQ(ierr);
2211   ierr = VecDestroy(&vB);CHKERRQ(ierr);
2212   PetscFunctionReturn(0);
2213 }
2214 
2215 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2216 {
2217   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2218   PetscInt          m = A->rmap->n,n = A->cmap->n;
2219   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2220   PetscInt          *cmap  = mat->garray;
2221   PetscInt          *diagIdx, *offdiagIdx;
2222   Vec               diagV, offdiagV;
2223   PetscScalar       *a, *diagA, *offdiagA;
2224   const PetscScalar *ba,*bav;
2225   PetscInt          r,j,col,ncols,*bi,*bj;
2226   PetscErrorCode    ierr;
2227   Mat               B = mat->B;
2228   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2229 
2230   PetscFunctionBegin;
2231   /* When a process holds entire A and other processes have no entry */
2232   if (A->cmap->N == n) {
2233     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2234     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2235     ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr);
2236     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2237     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2238     PetscFunctionReturn(0);
2239   } else if (n == 0) {
2240     if (m) {
2241       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2242       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2243       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2244     }
2245     PetscFunctionReturn(0);
2246   }
2247 
2248   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2249   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2250   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2251   ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2252 
2253   /* Get offdiagIdx[] for implicit 0.0 */
2254   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2255   ba   = bav;
2256   bi   = b->i;
2257   bj   = b->j;
2258   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2259   for (r = 0; r < m; r++) {
2260     ncols = bi[r+1] - bi[r];
2261     if (ncols == A->cmap->N - n) { /* Brow is dense */
2262       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2263     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2264       offdiagA[r] = 0.0;
2265 
2266       /* Find first hole in the cmap */
2267       for (j=0; j<ncols; j++) {
2268         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2269         if (col > j && j < cstart) {
2270           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2271           break;
2272         } else if (col > j + n && j >= cstart) {
2273           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2274           break;
2275         }
2276       }
2277       if (j == ncols && ncols < A->cmap->N - n) {
2278         /* a hole is outside compressed Bcols */
2279         if (ncols == 0) {
2280           if (cstart) {
2281             offdiagIdx[r] = 0;
2282           } else offdiagIdx[r] = cend;
2283         } else { /* ncols > 0 */
2284           offdiagIdx[r] = cmap[ncols-1] + 1;
2285           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2286         }
2287       }
2288     }
2289 
2290     for (j=0; j<ncols; j++) {
2291       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2292       ba++; bj++;
2293     }
2294   }
2295 
2296   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2297   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2298   for (r = 0; r < m; ++r) {
2299     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2300       a[r]   = diagA[r];
2301       if (idx) idx[r] = cstart + diagIdx[r];
2302     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2303       a[r] = diagA[r];
2304       if (idx) {
2305         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2306           idx[r] = cstart + diagIdx[r];
2307         } else idx[r] = offdiagIdx[r];
2308       }
2309     } else {
2310       a[r]   = offdiagA[r];
2311       if (idx) idx[r] = offdiagIdx[r];
2312     }
2313   }
2314   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2315   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2316   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2317   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2318   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2319   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2320   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2321   PetscFunctionReturn(0);
2322 }
2323 
2324 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2325 {
2326   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2327   PetscInt          m = A->rmap->n,n = A->cmap->n;
2328   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2329   PetscInt          *cmap  = mat->garray;
2330   PetscInt          *diagIdx, *offdiagIdx;
2331   Vec               diagV, offdiagV;
2332   PetscScalar       *a, *diagA, *offdiagA;
2333   const PetscScalar *ba,*bav;
2334   PetscInt          r,j,col,ncols,*bi,*bj;
2335   PetscErrorCode    ierr;
2336   Mat               B = mat->B;
2337   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2338 
2339   PetscFunctionBegin;
2340   /* When a process holds entire A and other processes have no entry */
2341   if (A->cmap->N == n) {
2342     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2343     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2344     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2345     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2346     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2347     PetscFunctionReturn(0);
2348   } else if (n == 0) {
2349     if (m) {
2350       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2351       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2352       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2353     }
2354     PetscFunctionReturn(0);
2355   }
2356 
2357   ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2358   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2359   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2360   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2361 
2362   /* Get offdiagIdx[] for implicit 0.0 */
2363   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2364   ba   = bav;
2365   bi   = b->i;
2366   bj   = b->j;
2367   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2368   for (r = 0; r < m; r++) {
2369     ncols = bi[r+1] - bi[r];
2370     if (ncols == A->cmap->N - n) { /* Brow is dense */
2371       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2372     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2373       offdiagA[r] = 0.0;
2374 
2375       /* Find first hole in the cmap */
2376       for (j=0; j<ncols; j++) {
2377         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2378         if (col > j && j < cstart) {
2379           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2380           break;
2381         } else if (col > j + n && j >= cstart) {
2382           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2383           break;
2384         }
2385       }
2386       if (j == ncols && ncols < A->cmap->N - n) {
2387         /* a hole is outside compressed Bcols */
2388         if (ncols == 0) {
2389           if (cstart) {
2390             offdiagIdx[r] = 0;
2391           } else offdiagIdx[r] = cend;
2392         } else { /* ncols > 0 */
2393           offdiagIdx[r] = cmap[ncols-1] + 1;
2394           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2395         }
2396       }
2397     }
2398 
2399     for (j=0; j<ncols; j++) {
2400       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2401       ba++; bj++;
2402     }
2403   }
2404 
2405   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2406   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2407   for (r = 0; r < m; ++r) {
2408     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2409       a[r]   = diagA[r];
2410       if (idx) idx[r] = cstart + diagIdx[r];
2411     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2412       a[r] = diagA[r];
2413       if (idx) {
2414         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2415           idx[r] = cstart + diagIdx[r];
2416         } else idx[r] = offdiagIdx[r];
2417       }
2418     } else {
2419       a[r]   = offdiagA[r];
2420       if (idx) idx[r] = offdiagIdx[r];
2421     }
2422   }
2423   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2424   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2425   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2426   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2427   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2428   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2429   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2430   PetscFunctionReturn(0);
2431 }
2432 
2433 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2434 {
2435   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2436   PetscInt          m = A->rmap->n,n = A->cmap->n;
2437   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2438   PetscInt          *cmap  = mat->garray;
2439   PetscInt          *diagIdx, *offdiagIdx;
2440   Vec               diagV, offdiagV;
2441   PetscScalar       *a, *diagA, *offdiagA;
2442   const PetscScalar *ba,*bav;
2443   PetscInt          r,j,col,ncols,*bi,*bj;
2444   PetscErrorCode    ierr;
2445   Mat               B = mat->B;
2446   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2447 
2448   PetscFunctionBegin;
2449   /* When a process holds entire A and other processes have no entry */
2450   if (A->cmap->N == n) {
2451     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2452     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2453     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2454     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2455     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2456     PetscFunctionReturn(0);
2457   } else if (n == 0) {
2458     if (m) {
2459       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2460       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2461       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2462     }
2463     PetscFunctionReturn(0);
2464   }
2465 
2466   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2467   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2468   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2469   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2470 
2471   /* Get offdiagIdx[] for implicit 0.0 */
2472   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2473   ba   = bav;
2474   bi   = b->i;
2475   bj   = b->j;
2476   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2477   for (r = 0; r < m; r++) {
2478     ncols = bi[r+1] - bi[r];
2479     if (ncols == A->cmap->N - n) { /* Brow is dense */
2480       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2481     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2482       offdiagA[r] = 0.0;
2483 
2484       /* Find first hole in the cmap */
2485       for (j=0; j<ncols; j++) {
2486         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2487         if (col > j && j < cstart) {
2488           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2489           break;
2490         } else if (col > j + n && j >= cstart) {
2491           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2492           break;
2493         }
2494       }
2495       if (j == ncols && ncols < A->cmap->N - n) {
2496         /* a hole is outside compressed Bcols */
2497         if (ncols == 0) {
2498           if (cstart) {
2499             offdiagIdx[r] = 0;
2500           } else offdiagIdx[r] = cend;
2501         } else { /* ncols > 0 */
2502           offdiagIdx[r] = cmap[ncols-1] + 1;
2503           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2504         }
2505       }
2506     }
2507 
2508     for (j=0; j<ncols; j++) {
2509       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2510       ba++; bj++;
2511     }
2512   }
2513 
2514   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2515   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2516   for (r = 0; r < m; ++r) {
2517     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2518       a[r] = diagA[r];
2519       if (idx) idx[r] = cstart + diagIdx[r];
2520     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2521       a[r] = diagA[r];
2522       if (idx) {
2523         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2524           idx[r] = cstart + diagIdx[r];
2525         } else idx[r] = offdiagIdx[r];
2526       }
2527     } else {
2528       a[r] = offdiagA[r];
2529       if (idx) idx[r] = offdiagIdx[r];
2530     }
2531   }
2532   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2533   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2534   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2535   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2536   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2537   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2538   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2539   PetscFunctionReturn(0);
2540 }
2541 
2542 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2543 {
2544   PetscErrorCode ierr;
2545   Mat            *dummy;
2546 
2547   PetscFunctionBegin;
2548   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2549   *newmat = *dummy;
2550   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2551   PetscFunctionReturn(0);
2552 }
2553 
2554 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2555 {
2556   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2557   PetscErrorCode ierr;
2558 
2559   PetscFunctionBegin;
2560   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2561   A->factorerrortype = a->A->factorerrortype;
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2566 {
2567   PetscErrorCode ierr;
2568   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2569 
2570   PetscFunctionBegin;
2571   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2572   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2573   if (x->assembled) {
2574     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2575   } else {
2576     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2577   }
2578   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2579   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2580   PetscFunctionReturn(0);
2581 }
2582 
2583 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2584 {
2585   PetscFunctionBegin;
2586   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2587   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2588   PetscFunctionReturn(0);
2589 }
2590 
2591 /*@
2592    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2593 
2594    Collective on Mat
2595 
2596    Input Parameters:
2597 +    A - the matrix
2598 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2599 
2600  Level: advanced
2601 
2602 @*/
2603 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2604 {
2605   PetscErrorCode       ierr;
2606 
2607   PetscFunctionBegin;
2608   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2609   PetscFunctionReturn(0);
2610 }
2611 
2612 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2613 {
2614   PetscErrorCode       ierr;
2615   PetscBool            sc = PETSC_FALSE,flg;
2616 
2617   PetscFunctionBegin;
2618   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2619   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2620   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2621   if (flg) {
2622     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2623   }
2624   ierr = PetscOptionsTail();CHKERRQ(ierr);
2625   PetscFunctionReturn(0);
2626 }
2627 
2628 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2629 {
2630   PetscErrorCode ierr;
2631   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2632   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2633 
2634   PetscFunctionBegin;
2635   if (!Y->preallocated) {
2636     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2637   } else if (!aij->nz) {
2638     PetscInt nonew = aij->nonew;
2639     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2640     aij->nonew = nonew;
2641   }
2642   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2643   PetscFunctionReturn(0);
2644 }
2645 
2646 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2647 {
2648   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2649   PetscErrorCode ierr;
2650 
2651   PetscFunctionBegin;
2652   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2653   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2654   if (d) {
2655     PetscInt rstart;
2656     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2657     *d += rstart;
2658 
2659   }
2660   PetscFunctionReturn(0);
2661 }
2662 
2663 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2664 {
2665   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2666   PetscErrorCode ierr;
2667 
2668   PetscFunctionBegin;
2669   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2670   PetscFunctionReturn(0);
2671 }
2672 
2673 /* -------------------------------------------------------------------*/
2674 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2675                                        MatGetRow_MPIAIJ,
2676                                        MatRestoreRow_MPIAIJ,
2677                                        MatMult_MPIAIJ,
2678                                 /* 4*/ MatMultAdd_MPIAIJ,
2679                                        MatMultTranspose_MPIAIJ,
2680                                        MatMultTransposeAdd_MPIAIJ,
2681                                        NULL,
2682                                        NULL,
2683                                        NULL,
2684                                 /*10*/ NULL,
2685                                        NULL,
2686                                        NULL,
2687                                        MatSOR_MPIAIJ,
2688                                        MatTranspose_MPIAIJ,
2689                                 /*15*/ MatGetInfo_MPIAIJ,
2690                                        MatEqual_MPIAIJ,
2691                                        MatGetDiagonal_MPIAIJ,
2692                                        MatDiagonalScale_MPIAIJ,
2693                                        MatNorm_MPIAIJ,
2694                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2695                                        MatAssemblyEnd_MPIAIJ,
2696                                        MatSetOption_MPIAIJ,
2697                                        MatZeroEntries_MPIAIJ,
2698                                 /*24*/ MatZeroRows_MPIAIJ,
2699                                        NULL,
2700                                        NULL,
2701                                        NULL,
2702                                        NULL,
2703                                 /*29*/ MatSetUp_MPIAIJ,
2704                                        NULL,
2705                                        NULL,
2706                                        MatGetDiagonalBlock_MPIAIJ,
2707                                        NULL,
2708                                 /*34*/ MatDuplicate_MPIAIJ,
2709                                        NULL,
2710                                        NULL,
2711                                        NULL,
2712                                        NULL,
2713                                 /*39*/ MatAXPY_MPIAIJ,
2714                                        MatCreateSubMatrices_MPIAIJ,
2715                                        MatIncreaseOverlap_MPIAIJ,
2716                                        MatGetValues_MPIAIJ,
2717                                        MatCopy_MPIAIJ,
2718                                 /*44*/ MatGetRowMax_MPIAIJ,
2719                                        MatScale_MPIAIJ,
2720                                        MatShift_MPIAIJ,
2721                                        MatDiagonalSet_MPIAIJ,
2722                                        MatZeroRowsColumns_MPIAIJ,
2723                                 /*49*/ MatSetRandom_MPIAIJ,
2724                                        NULL,
2725                                        NULL,
2726                                        NULL,
2727                                        NULL,
2728                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2729                                        NULL,
2730                                        MatSetUnfactored_MPIAIJ,
2731                                        MatPermute_MPIAIJ,
2732                                        NULL,
2733                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2734                                        MatDestroy_MPIAIJ,
2735                                        MatView_MPIAIJ,
2736                                        NULL,
2737                                        NULL,
2738                                 /*64*/ NULL,
2739                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2740                                        NULL,
2741                                        NULL,
2742                                        NULL,
2743                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2744                                        MatGetRowMinAbs_MPIAIJ,
2745                                        NULL,
2746                                        NULL,
2747                                        NULL,
2748                                        NULL,
2749                                 /*75*/ MatFDColoringApply_AIJ,
2750                                        MatSetFromOptions_MPIAIJ,
2751                                        NULL,
2752                                        NULL,
2753                                        MatFindZeroDiagonals_MPIAIJ,
2754                                 /*80*/ NULL,
2755                                        NULL,
2756                                        NULL,
2757                                 /*83*/ MatLoad_MPIAIJ,
2758                                        MatIsSymmetric_MPIAIJ,
2759                                        NULL,
2760                                        NULL,
2761                                        NULL,
2762                                        NULL,
2763                                 /*89*/ NULL,
2764                                        NULL,
2765                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2766                                        NULL,
2767                                        NULL,
2768                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2769                                        NULL,
2770                                        NULL,
2771                                        NULL,
2772                                        MatBindToCPU_MPIAIJ,
2773                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2774                                        NULL,
2775                                        NULL,
2776                                        MatConjugate_MPIAIJ,
2777                                        NULL,
2778                                 /*104*/MatSetValuesRow_MPIAIJ,
2779                                        MatRealPart_MPIAIJ,
2780                                        MatImaginaryPart_MPIAIJ,
2781                                        NULL,
2782                                        NULL,
2783                                 /*109*/NULL,
2784                                        NULL,
2785                                        MatGetRowMin_MPIAIJ,
2786                                        NULL,
2787                                        MatMissingDiagonal_MPIAIJ,
2788                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2789                                        NULL,
2790                                        MatGetGhosts_MPIAIJ,
2791                                        NULL,
2792                                        NULL,
2793                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2794                                        NULL,
2795                                        NULL,
2796                                        NULL,
2797                                        MatGetMultiProcBlock_MPIAIJ,
2798                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2799                                        MatGetColumnReductions_MPIAIJ,
2800                                        MatInvertBlockDiagonal_MPIAIJ,
2801                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2802                                        MatCreateSubMatricesMPI_MPIAIJ,
2803                                 /*129*/NULL,
2804                                        NULL,
2805                                        NULL,
2806                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2807                                        NULL,
2808                                 /*134*/NULL,
2809                                        NULL,
2810                                        NULL,
2811                                        NULL,
2812                                        NULL,
2813                                 /*139*/MatSetBlockSizes_MPIAIJ,
2814                                        NULL,
2815                                        NULL,
2816                                        MatFDColoringSetUp_MPIXAIJ,
2817                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2818                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2819                                 /*145*/NULL,
2820                                        NULL,
2821                                        NULL
2822 };
2823 
2824 /* ----------------------------------------------------------------------------------------*/
2825 
2826 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2827 {
2828   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2829   PetscErrorCode ierr;
2830 
2831   PetscFunctionBegin;
2832   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2833   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2834   PetscFunctionReturn(0);
2835 }
2836 
2837 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2838 {
2839   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2840   PetscErrorCode ierr;
2841 
2842   PetscFunctionBegin;
2843   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2844   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2845   PetscFunctionReturn(0);
2846 }
2847 
2848 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2849 {
2850   Mat_MPIAIJ     *b;
2851   PetscErrorCode ierr;
2852   PetscMPIInt    size;
2853 
2854   PetscFunctionBegin;
2855   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2856   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2857   b = (Mat_MPIAIJ*)B->data;
2858 
2859 #if defined(PETSC_USE_CTABLE)
2860   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2861 #else
2862   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2863 #endif
2864   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2865   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2866   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2867 
2868   /* Because the B will have been resized we simply destroy it and create a new one each time */
2869   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2870   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2871   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2872   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2873   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2874   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2875   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2876 
2877   if (!B->preallocated) {
2878     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2879     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2880     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2881     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2882     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2883   }
2884 
2885   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2886   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2887   B->preallocated  = PETSC_TRUE;
2888   B->was_assembled = PETSC_FALSE;
2889   B->assembled     = PETSC_FALSE;
2890   PetscFunctionReturn(0);
2891 }
2892 
2893 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2894 {
2895   Mat_MPIAIJ     *b;
2896   PetscErrorCode ierr;
2897 
2898   PetscFunctionBegin;
2899   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2900   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2901   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2902   b = (Mat_MPIAIJ*)B->data;
2903 
2904 #if defined(PETSC_USE_CTABLE)
2905   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2906 #else
2907   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2908 #endif
2909   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2910   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2911   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2912 
2913   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2914   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2915   B->preallocated  = PETSC_TRUE;
2916   B->was_assembled = PETSC_FALSE;
2917   B->assembled = PETSC_FALSE;
2918   PetscFunctionReturn(0);
2919 }
2920 
2921 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2922 {
2923   Mat            mat;
2924   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2925   PetscErrorCode ierr;
2926 
2927   PetscFunctionBegin;
2928   *newmat = NULL;
2929   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2930   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2931   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2932   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2933   a       = (Mat_MPIAIJ*)mat->data;
2934 
2935   mat->factortype   = matin->factortype;
2936   mat->assembled    = matin->assembled;
2937   mat->insertmode   = NOT_SET_VALUES;
2938   mat->preallocated = matin->preallocated;
2939 
2940   a->size         = oldmat->size;
2941   a->rank         = oldmat->rank;
2942   a->donotstash   = oldmat->donotstash;
2943   a->roworiented  = oldmat->roworiented;
2944   a->rowindices   = NULL;
2945   a->rowvalues    = NULL;
2946   a->getrowactive = PETSC_FALSE;
2947 
2948   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2949   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2950 
2951   if (oldmat->colmap) {
2952 #if defined(PETSC_USE_CTABLE)
2953     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2954 #else
2955     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2956     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2957     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2958 #endif
2959   } else a->colmap = NULL;
2960   if (oldmat->garray) {
2961     PetscInt len;
2962     len  = oldmat->B->cmap->n;
2963     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2964     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2965     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2966   } else a->garray = NULL;
2967 
2968   /* It may happen MatDuplicate is called with a non-assembled matrix
2969      In fact, MatDuplicate only requires the matrix to be preallocated
2970      This may happen inside a DMCreateMatrix_Shell */
2971   if (oldmat->lvec) {
2972     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2973     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2974   }
2975   if (oldmat->Mvctx) {
2976     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2977     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2978   }
2979   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2980   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2981   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2982   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2983   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2984   *newmat = mat;
2985   PetscFunctionReturn(0);
2986 }
2987 
2988 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2989 {
2990   PetscBool      isbinary, ishdf5;
2991   PetscErrorCode ierr;
2992 
2993   PetscFunctionBegin;
2994   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2995   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2996   /* force binary viewer to load .info file if it has not yet done so */
2997   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2998   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2999   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
3000   if (isbinary) {
3001     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
3002   } else if (ishdf5) {
3003 #if defined(PETSC_HAVE_HDF5)
3004     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
3005 #else
3006     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3007 #endif
3008   } else {
3009     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3010   }
3011   PetscFunctionReturn(0);
3012 }
3013 
3014 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3015 {
3016   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3017   PetscInt       *rowidxs,*colidxs;
3018   PetscScalar    *matvals;
3019   PetscErrorCode ierr;
3020 
3021   PetscFunctionBegin;
3022   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3023 
3024   /* read in matrix header */
3025   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3026   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3027   M  = header[1]; N = header[2]; nz = header[3];
3028   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3029   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3030   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3031 
3032   /* set block sizes from the viewer's .info file */
3033   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3034   /* set global sizes if not set already */
3035   if (mat->rmap->N < 0) mat->rmap->N = M;
3036   if (mat->cmap->N < 0) mat->cmap->N = N;
3037   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3038   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3039 
3040   /* check if the matrix sizes are correct */
3041   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3042   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3043 
3044   /* read in row lengths and build row indices */
3045   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3046   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3047   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3048   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3049   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr);
3050   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3051   /* read in column indices and matrix values */
3052   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3053   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3054   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3055   /* store matrix indices and values */
3056   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3057   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3058   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3059   PetscFunctionReturn(0);
3060 }
3061 
3062 /* Not scalable because of ISAllGather() unless getting all columns. */
3063 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3064 {
3065   PetscErrorCode ierr;
3066   IS             iscol_local;
3067   PetscBool      isstride;
3068   PetscMPIInt    lisstride=0,gisstride;
3069 
3070   PetscFunctionBegin;
3071   /* check if we are grabbing all columns*/
3072   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3073 
3074   if (isstride) {
3075     PetscInt  start,len,mstart,mlen;
3076     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3077     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3078     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3079     if (mstart == start && mlen-mstart == len) lisstride = 1;
3080   }
3081 
3082   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3083   if (gisstride) {
3084     PetscInt N;
3085     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3086     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3087     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3088     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3089   } else {
3090     PetscInt cbs;
3091     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3092     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3093     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3094   }
3095 
3096   *isseq = iscol_local;
3097   PetscFunctionReturn(0);
3098 }
3099 
3100 /*
3101  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3102  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3103 
3104  Input Parameters:
3105    mat - matrix
3106    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3107            i.e., mat->rstart <= isrow[i] < mat->rend
3108    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3109            i.e., mat->cstart <= iscol[i] < mat->cend
3110  Output Parameter:
3111    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3112    iscol_o - sequential column index set for retrieving mat->B
3113    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3114  */
3115 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3116 {
3117   PetscErrorCode ierr;
3118   Vec            x,cmap;
3119   const PetscInt *is_idx;
3120   PetscScalar    *xarray,*cmaparray;
3121   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3122   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3123   Mat            B=a->B;
3124   Vec            lvec=a->lvec,lcmap;
3125   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3126   MPI_Comm       comm;
3127   VecScatter     Mvctx=a->Mvctx;
3128 
3129   PetscFunctionBegin;
3130   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3131   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3132 
3133   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3134   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3135   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3136   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3137   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3138 
3139   /* Get start indices */
3140   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3141   isstart -= ncols;
3142   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3143 
3144   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3145   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3146   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3147   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3148   for (i=0; i<ncols; i++) {
3149     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3150     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3151     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3152   }
3153   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3154   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3155   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3156 
3157   /* Get iscol_d */
3158   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3159   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3160   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3161 
3162   /* Get isrow_d */
3163   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3164   rstart = mat->rmap->rstart;
3165   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3166   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3167   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3168   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3169 
3170   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3171   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3172   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3173 
3174   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3175   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3176   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3177 
3178   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3179 
3180   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3181   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3182 
3183   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3184   /* off-process column indices */
3185   count = 0;
3186   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3187   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3188 
3189   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3190   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3191   for (i=0; i<Bn; i++) {
3192     if (PetscRealPart(xarray[i]) > -1.0) {
3193       idx[count]     = i;                   /* local column index in off-diagonal part B */
3194       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3195       count++;
3196     }
3197   }
3198   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3199   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3200 
3201   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3202   /* cannot ensure iscol_o has same blocksize as iscol! */
3203 
3204   ierr = PetscFree(idx);CHKERRQ(ierr);
3205   *garray = cmap1;
3206 
3207   ierr = VecDestroy(&x);CHKERRQ(ierr);
3208   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3209   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3210   PetscFunctionReturn(0);
3211 }
3212 
3213 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3214 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3215 {
3216   PetscErrorCode ierr;
3217   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3218   Mat            M = NULL;
3219   MPI_Comm       comm;
3220   IS             iscol_d,isrow_d,iscol_o;
3221   Mat            Asub = NULL,Bsub = NULL;
3222   PetscInt       n;
3223 
3224   PetscFunctionBegin;
3225   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3226 
3227   if (call == MAT_REUSE_MATRIX) {
3228     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3229     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3230     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3231 
3232     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3233     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3234 
3235     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3236     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3237 
3238     /* Update diagonal and off-diagonal portions of submat */
3239     asub = (Mat_MPIAIJ*)(*submat)->data;
3240     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3241     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3242     if (n) {
3243       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3244     }
3245     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3246     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3247 
3248   } else { /* call == MAT_INITIAL_MATRIX) */
3249     const PetscInt *garray;
3250     PetscInt        BsubN;
3251 
3252     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3253     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3254 
3255     /* Create local submatrices Asub and Bsub */
3256     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3257     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3258 
3259     /* Create submatrix M */
3260     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3261 
3262     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3263     asub = (Mat_MPIAIJ*)M->data;
3264 
3265     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3266     n = asub->B->cmap->N;
3267     if (BsubN > n) {
3268       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3269       const PetscInt *idx;
3270       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3271       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3272 
3273       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3274       j = 0;
3275       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3276       for (i=0; i<n; i++) {
3277         if (j >= BsubN) break;
3278         while (subgarray[i] > garray[j]) j++;
3279 
3280         if (subgarray[i] == garray[j]) {
3281           idx_new[i] = idx[j++];
3282         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3283       }
3284       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3285 
3286       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3287       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3288 
3289     } else if (BsubN < n) {
3290       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3291     }
3292 
3293     ierr = PetscFree(garray);CHKERRQ(ierr);
3294     *submat = M;
3295 
3296     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3297     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3298     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3299 
3300     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3301     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3302 
3303     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3304     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3305   }
3306   PetscFunctionReturn(0);
3307 }
3308 
3309 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3310 {
3311   PetscErrorCode ierr;
3312   IS             iscol_local=NULL,isrow_d;
3313   PetscInt       csize;
3314   PetscInt       n,i,j,start,end;
3315   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3316   MPI_Comm       comm;
3317 
3318   PetscFunctionBegin;
3319   /* If isrow has same processor distribution as mat,
3320      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3321   if (call == MAT_REUSE_MATRIX) {
3322     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3323     if (isrow_d) {
3324       sameRowDist  = PETSC_TRUE;
3325       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3326     } else {
3327       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3328       if (iscol_local) {
3329         sameRowDist  = PETSC_TRUE;
3330         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3331       }
3332     }
3333   } else {
3334     /* Check if isrow has same processor distribution as mat */
3335     sameDist[0] = PETSC_FALSE;
3336     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3337     if (!n) {
3338       sameDist[0] = PETSC_TRUE;
3339     } else {
3340       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3341       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3342       if (i >= start && j < end) {
3343         sameDist[0] = PETSC_TRUE;
3344       }
3345     }
3346 
3347     /* Check if iscol has same processor distribution as mat */
3348     sameDist[1] = PETSC_FALSE;
3349     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3350     if (!n) {
3351       sameDist[1] = PETSC_TRUE;
3352     } else {
3353       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3354       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3355       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3356     }
3357 
3358     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3359     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr);
3360     sameRowDist = tsameDist[0];
3361   }
3362 
3363   if (sameRowDist) {
3364     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3365       /* isrow and iscol have same processor distribution as mat */
3366       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3367       PetscFunctionReturn(0);
3368     } else { /* sameRowDist */
3369       /* isrow has same processor distribution as mat */
3370       if (call == MAT_INITIAL_MATRIX) {
3371         PetscBool sorted;
3372         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3373         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3374         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3375         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3376 
3377         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3378         if (sorted) {
3379           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3380           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3381           PetscFunctionReturn(0);
3382         }
3383       } else { /* call == MAT_REUSE_MATRIX */
3384         IS iscol_sub;
3385         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3386         if (iscol_sub) {
3387           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3388           PetscFunctionReturn(0);
3389         }
3390       }
3391     }
3392   }
3393 
3394   /* General case: iscol -> iscol_local which has global size of iscol */
3395   if (call == MAT_REUSE_MATRIX) {
3396     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3397     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3398   } else {
3399     if (!iscol_local) {
3400       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3401     }
3402   }
3403 
3404   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3405   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3406 
3407   if (call == MAT_INITIAL_MATRIX) {
3408     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3409     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3410   }
3411   PetscFunctionReturn(0);
3412 }
3413 
3414 /*@C
3415      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3416          and "off-diagonal" part of the matrix in CSR format.
3417 
3418    Collective
3419 
3420    Input Parameters:
3421 +  comm - MPI communicator
3422 .  A - "diagonal" portion of matrix
3423 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3424 -  garray - global index of B columns
3425 
3426    Output Parameter:
3427 .   mat - the matrix, with input A as its local diagonal matrix
3428    Level: advanced
3429 
3430    Notes:
3431        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3432        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3433 
3434 .seealso: MatCreateMPIAIJWithSplitArrays()
3435 @*/
3436 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3437 {
3438   PetscErrorCode    ierr;
3439   Mat_MPIAIJ        *maij;
3440   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3441   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3442   const PetscScalar *oa;
3443   Mat               Bnew;
3444   PetscInt          m,n,N;
3445 
3446   PetscFunctionBegin;
3447   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3448   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3449   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3450   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3451   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3452   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3453 
3454   /* Get global columns of mat */
3455   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3456 
3457   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3458   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3459   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3460   maij = (Mat_MPIAIJ*)(*mat)->data;
3461 
3462   (*mat)->preallocated = PETSC_TRUE;
3463 
3464   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3465   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3466 
3467   /* Set A as diagonal portion of *mat */
3468   maij->A = A;
3469 
3470   nz = oi[m];
3471   for (i=0; i<nz; i++) {
3472     col   = oj[i];
3473     oj[i] = garray[col];
3474   }
3475 
3476   /* Set Bnew as off-diagonal portion of *mat */
3477   ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr);
3478   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr);
3479   ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr);
3480   bnew        = (Mat_SeqAIJ*)Bnew->data;
3481   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3482   maij->B     = Bnew;
3483 
3484   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3485 
3486   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3487   b->free_a       = PETSC_FALSE;
3488   b->free_ij      = PETSC_FALSE;
3489   ierr = MatDestroy(&B);CHKERRQ(ierr);
3490 
3491   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3492   bnew->free_a       = PETSC_TRUE;
3493   bnew->free_ij      = PETSC_TRUE;
3494 
3495   /* condense columns of maij->B */
3496   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3497   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3498   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3499   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3500   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3501   PetscFunctionReturn(0);
3502 }
3503 
3504 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3505 
3506 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3507 {
3508   PetscErrorCode ierr;
3509   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3510   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3511   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3512   Mat            M,Msub,B=a->B;
3513   MatScalar      *aa;
3514   Mat_SeqAIJ     *aij;
3515   PetscInt       *garray = a->garray,*colsub,Ncols;
3516   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3517   IS             iscol_sub,iscmap;
3518   const PetscInt *is_idx,*cmap;
3519   PetscBool      allcolumns=PETSC_FALSE;
3520   MPI_Comm       comm;
3521 
3522   PetscFunctionBegin;
3523   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3524   if (call == MAT_REUSE_MATRIX) {
3525     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3526     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3527     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3528 
3529     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3530     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3531 
3532     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3533     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3534 
3535     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3536 
3537   } else { /* call == MAT_INITIAL_MATRIX) */
3538     PetscBool flg;
3539 
3540     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3541     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3542 
3543     /* (1) iscol -> nonscalable iscol_local */
3544     /* Check for special case: each processor gets entire matrix columns */
3545     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3546     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3547     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3548     if (allcolumns) {
3549       iscol_sub = iscol_local;
3550       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3551       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3552 
3553     } else {
3554       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3555       PetscInt *idx,*cmap1,k;
3556       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3557       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3558       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3559       count = 0;
3560       k     = 0;
3561       for (i=0; i<Ncols; i++) {
3562         j = is_idx[i];
3563         if (j >= cstart && j < cend) {
3564           /* diagonal part of mat */
3565           idx[count]     = j;
3566           cmap1[count++] = i; /* column index in submat */
3567         } else if (Bn) {
3568           /* off-diagonal part of mat */
3569           if (j == garray[k]) {
3570             idx[count]     = j;
3571             cmap1[count++] = i;  /* column index in submat */
3572           } else if (j > garray[k]) {
3573             while (j > garray[k] && k < Bn-1) k++;
3574             if (j == garray[k]) {
3575               idx[count]     = j;
3576               cmap1[count++] = i; /* column index in submat */
3577             }
3578           }
3579         }
3580       }
3581       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3582 
3583       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3584       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3585       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3586 
3587       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3588     }
3589 
3590     /* (3) Create sequential Msub */
3591     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3592   }
3593 
3594   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3595   aij  = (Mat_SeqAIJ*)(Msub)->data;
3596   ii   = aij->i;
3597   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3598 
3599   /*
3600       m - number of local rows
3601       Ncols - number of columns (same on all processors)
3602       rstart - first row in new global matrix generated
3603   */
3604   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3605 
3606   if (call == MAT_INITIAL_MATRIX) {
3607     /* (4) Create parallel newmat */
3608     PetscMPIInt    rank,size;
3609     PetscInt       csize;
3610 
3611     ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3612     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3613 
3614     /*
3615         Determine the number of non-zeros in the diagonal and off-diagonal
3616         portions of the matrix in order to do correct preallocation
3617     */
3618 
3619     /* first get start and end of "diagonal" columns */
3620     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3621     if (csize == PETSC_DECIDE) {
3622       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3623       if (mglobal == Ncols) { /* square matrix */
3624         nlocal = m;
3625       } else {
3626         nlocal = Ncols/size + ((Ncols % size) > rank);
3627       }
3628     } else {
3629       nlocal = csize;
3630     }
3631     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3632     rstart = rend - nlocal;
3633     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3634 
3635     /* next, compute all the lengths */
3636     jj    = aij->j;
3637     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3638     olens = dlens + m;
3639     for (i=0; i<m; i++) {
3640       jend = ii[i+1] - ii[i];
3641       olen = 0;
3642       dlen = 0;
3643       for (j=0; j<jend; j++) {
3644         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3645         else dlen++;
3646         jj++;
3647       }
3648       olens[i] = olen;
3649       dlens[i] = dlen;
3650     }
3651 
3652     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3653     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3654 
3655     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3656     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3657     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3658     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3659     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3660     ierr = PetscFree(dlens);CHKERRQ(ierr);
3661 
3662   } else { /* call == MAT_REUSE_MATRIX */
3663     M    = *newmat;
3664     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3665     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3666     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3667     /*
3668          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3669        rather than the slower MatSetValues().
3670     */
3671     M->was_assembled = PETSC_TRUE;
3672     M->assembled     = PETSC_FALSE;
3673   }
3674 
3675   /* (5) Set values of Msub to *newmat */
3676   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3677   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3678 
3679   jj   = aij->j;
3680   ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3681   for (i=0; i<m; i++) {
3682     row = rstart + i;
3683     nz  = ii[i+1] - ii[i];
3684     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3685     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3686     jj += nz; aa += nz;
3687   }
3688   ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3689   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3690 
3691   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3692   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3693 
3694   ierr = PetscFree(colsub);CHKERRQ(ierr);
3695 
3696   /* save Msub, iscol_sub and iscmap used in processor for next request */
3697   if (call == MAT_INITIAL_MATRIX) {
3698     *newmat = M;
3699     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3700     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3701 
3702     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3703     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3704 
3705     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3706     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3707 
3708     if (iscol_local) {
3709       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3710       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3711     }
3712   }
3713   PetscFunctionReturn(0);
3714 }
3715 
3716 /*
3717     Not great since it makes two copies of the submatrix, first an SeqAIJ
3718   in local and then by concatenating the local matrices the end result.
3719   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3720 
3721   Note: This requires a sequential iscol with all indices.
3722 */
3723 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3724 {
3725   PetscErrorCode ierr;
3726   PetscMPIInt    rank,size;
3727   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3728   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3729   Mat            M,Mreuse;
3730   MatScalar      *aa,*vwork;
3731   MPI_Comm       comm;
3732   Mat_SeqAIJ     *aij;
3733   PetscBool      colflag,allcolumns=PETSC_FALSE;
3734 
3735   PetscFunctionBegin;
3736   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3737   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3738   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3739 
3740   /* Check for special case: each processor gets entire matrix columns */
3741   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3742   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3743   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3744   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3745 
3746   if (call ==  MAT_REUSE_MATRIX) {
3747     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3748     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3749     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3750   } else {
3751     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3752   }
3753 
3754   /*
3755       m - number of local rows
3756       n - number of columns (same on all processors)
3757       rstart - first row in new global matrix generated
3758   */
3759   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3760   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3761   if (call == MAT_INITIAL_MATRIX) {
3762     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3763     ii  = aij->i;
3764     jj  = aij->j;
3765 
3766     /*
3767         Determine the number of non-zeros in the diagonal and off-diagonal
3768         portions of the matrix in order to do correct preallocation
3769     */
3770 
3771     /* first get start and end of "diagonal" columns */
3772     if (csize == PETSC_DECIDE) {
3773       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3774       if (mglobal == n) { /* square matrix */
3775         nlocal = m;
3776       } else {
3777         nlocal = n/size + ((n % size) > rank);
3778       }
3779     } else {
3780       nlocal = csize;
3781     }
3782     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3783     rstart = rend - nlocal;
3784     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3785 
3786     /* next, compute all the lengths */
3787     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3788     olens = dlens + m;
3789     for (i=0; i<m; i++) {
3790       jend = ii[i+1] - ii[i];
3791       olen = 0;
3792       dlen = 0;
3793       for (j=0; j<jend; j++) {
3794         if (*jj < rstart || *jj >= rend) olen++;
3795         else dlen++;
3796         jj++;
3797       }
3798       olens[i] = olen;
3799       dlens[i] = dlen;
3800     }
3801     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3802     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3803     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3804     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3805     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3806     ierr = PetscFree(dlens);CHKERRQ(ierr);
3807   } else {
3808     PetscInt ml,nl;
3809 
3810     M    = *newmat;
3811     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3812     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3813     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3814     /*
3815          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3816        rather than the slower MatSetValues().
3817     */
3818     M->was_assembled = PETSC_TRUE;
3819     M->assembled     = PETSC_FALSE;
3820   }
3821   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3822   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3823   ii   = aij->i;
3824   jj   = aij->j;
3825 
3826   /* trigger copy to CPU if needed */
3827   ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3828   for (i=0; i<m; i++) {
3829     row   = rstart + i;
3830     nz    = ii[i+1] - ii[i];
3831     cwork = jj; jj += nz;
3832     vwork = aa; aa += nz;
3833     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3834   }
3835   ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3836 
3837   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3838   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3839   *newmat = M;
3840 
3841   /* save submatrix used in processor for next request */
3842   if (call ==  MAT_INITIAL_MATRIX) {
3843     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3844     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3845   }
3846   PetscFunctionReturn(0);
3847 }
3848 
3849 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3850 {
3851   PetscInt       m,cstart, cend,j,nnz,i,d;
3852   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3853   const PetscInt *JJ;
3854   PetscErrorCode ierr;
3855   PetscBool      nooffprocentries;
3856 
3857   PetscFunctionBegin;
3858   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3859 
3860   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3861   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3862   m      = B->rmap->n;
3863   cstart = B->cmap->rstart;
3864   cend   = B->cmap->rend;
3865   rstart = B->rmap->rstart;
3866 
3867   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3868 
3869   if (PetscDefined(USE_DEBUG)) {
3870     for (i=0; i<m; i++) {
3871       nnz = Ii[i+1]- Ii[i];
3872       JJ  = J + Ii[i];
3873       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3874       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3875       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3876     }
3877   }
3878 
3879   for (i=0; i<m; i++) {
3880     nnz     = Ii[i+1]- Ii[i];
3881     JJ      = J + Ii[i];
3882     nnz_max = PetscMax(nnz_max,nnz);
3883     d       = 0;
3884     for (j=0; j<nnz; j++) {
3885       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3886     }
3887     d_nnz[i] = d;
3888     o_nnz[i] = nnz - d;
3889   }
3890   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3891   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3892 
3893   for (i=0; i<m; i++) {
3894     ii   = i + rstart;
3895     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3896   }
3897   nooffprocentries    = B->nooffprocentries;
3898   B->nooffprocentries = PETSC_TRUE;
3899   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3900   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3901   B->nooffprocentries = nooffprocentries;
3902 
3903   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3904   PetscFunctionReturn(0);
3905 }
3906 
3907 /*@
3908    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3909    (the default parallel PETSc format).
3910 
3911    Collective
3912 
3913    Input Parameters:
3914 +  B - the matrix
3915 .  i - the indices into j for the start of each local row (starts with zero)
3916 .  j - the column indices for each local row (starts with zero)
3917 -  v - optional values in the matrix
3918 
3919    Level: developer
3920 
3921    Notes:
3922        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3923      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3924      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3925 
3926        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3927 
3928        The format which is used for the sparse matrix input, is equivalent to a
3929     row-major ordering.. i.e for the following matrix, the input data expected is
3930     as shown
3931 
3932 $        1 0 0
3933 $        2 0 3     P0
3934 $       -------
3935 $        4 5 6     P1
3936 $
3937 $     Process0 [P0]: rows_owned=[0,1]
3938 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3939 $        j =  {0,0,2}  [size = 3]
3940 $        v =  {1,2,3}  [size = 3]
3941 $
3942 $     Process1 [P1]: rows_owned=[2]
3943 $        i =  {0,3}    [size = nrow+1  = 1+1]
3944 $        j =  {0,1,2}  [size = 3]
3945 $        v =  {4,5,6}  [size = 3]
3946 
3947 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3948           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3949 @*/
3950 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3951 {
3952   PetscErrorCode ierr;
3953 
3954   PetscFunctionBegin;
3955   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3956   PetscFunctionReturn(0);
3957 }
3958 
3959 /*@C
3960    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3961    (the default parallel PETSc format).  For good matrix assembly performance
3962    the user should preallocate the matrix storage by setting the parameters
3963    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3964    performance can be increased by more than a factor of 50.
3965 
3966    Collective
3967 
3968    Input Parameters:
3969 +  B - the matrix
3970 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3971            (same value is used for all local rows)
3972 .  d_nnz - array containing the number of nonzeros in the various rows of the
3973            DIAGONAL portion of the local submatrix (possibly different for each row)
3974            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3975            The size of this array is equal to the number of local rows, i.e 'm'.
3976            For matrices that will be factored, you must leave room for (and set)
3977            the diagonal entry even if it is zero.
3978 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3979            submatrix (same value is used for all local rows).
3980 -  o_nnz - array containing the number of nonzeros in the various rows of the
3981            OFF-DIAGONAL portion of the local submatrix (possibly different for
3982            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3983            structure. The size of this array is equal to the number
3984            of local rows, i.e 'm'.
3985 
3986    If the *_nnz parameter is given then the *_nz parameter is ignored
3987 
3988    The AIJ format (also called the Yale sparse matrix format or
3989    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3990    storage.  The stored row and column indices begin with zero.
3991    See Users-Manual: ch_mat for details.
3992 
3993    The parallel matrix is partitioned such that the first m0 rows belong to
3994    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3995    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3996 
3997    The DIAGONAL portion of the local submatrix of a processor can be defined
3998    as the submatrix which is obtained by extraction the part corresponding to
3999    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4000    first row that belongs to the processor, r2 is the last row belonging to
4001    the this processor, and c1-c2 is range of indices of the local part of a
4002    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4003    common case of a square matrix, the row and column ranges are the same and
4004    the DIAGONAL part is also square. The remaining portion of the local
4005    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4006 
4007    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4008 
4009    You can call MatGetInfo() to get information on how effective the preallocation was;
4010    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4011    You can also run with the option -info and look for messages with the string
4012    malloc in them to see if additional memory allocation was needed.
4013 
4014    Example usage:
4015 
4016    Consider the following 8x8 matrix with 34 non-zero values, that is
4017    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4018    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4019    as follows:
4020 
4021 .vb
4022             1  2  0  |  0  3  0  |  0  4
4023     Proc0   0  5  6  |  7  0  0  |  8  0
4024             9  0 10  | 11  0  0  | 12  0
4025     -------------------------------------
4026            13  0 14  | 15 16 17  |  0  0
4027     Proc1   0 18  0  | 19 20 21  |  0  0
4028             0  0  0  | 22 23  0  | 24  0
4029     -------------------------------------
4030     Proc2  25 26 27  |  0  0 28  | 29  0
4031            30  0  0  | 31 32 33  |  0 34
4032 .ve
4033 
4034    This can be represented as a collection of submatrices as:
4035 
4036 .vb
4037       A B C
4038       D E F
4039       G H I
4040 .ve
4041 
4042    Where the submatrices A,B,C are owned by proc0, D,E,F are
4043    owned by proc1, G,H,I are owned by proc2.
4044 
4045    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4046    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4047    The 'M','N' parameters are 8,8, and have the same values on all procs.
4048 
4049    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4050    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4051    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4052    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4053    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4054    matrix, ans [DF] as another SeqAIJ matrix.
4055 
4056    When d_nz, o_nz parameters are specified, d_nz storage elements are
4057    allocated for every row of the local diagonal submatrix, and o_nz
4058    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4059    One way to choose d_nz and o_nz is to use the max nonzerors per local
4060    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4061    In this case, the values of d_nz,o_nz are:
4062 .vb
4063      proc0 : dnz = 2, o_nz = 2
4064      proc1 : dnz = 3, o_nz = 2
4065      proc2 : dnz = 1, o_nz = 4
4066 .ve
4067    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4068    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4069    for proc3. i.e we are using 12+15+10=37 storage locations to store
4070    34 values.
4071 
4072    When d_nnz, o_nnz parameters are specified, the storage is specified
4073    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4074    In the above case the values for d_nnz,o_nnz are:
4075 .vb
4076      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4077      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4078      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4079 .ve
4080    Here the space allocated is sum of all the above values i.e 34, and
4081    hence pre-allocation is perfect.
4082 
4083    Level: intermediate
4084 
4085 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4086           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4087 @*/
4088 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4089 {
4090   PetscErrorCode ierr;
4091 
4092   PetscFunctionBegin;
4093   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4094   PetscValidType(B,1);
4095   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4096   PetscFunctionReturn(0);
4097 }
4098 
4099 /*@
4100      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4101          CSR format for the local rows.
4102 
4103    Collective
4104 
4105    Input Parameters:
4106 +  comm - MPI communicator
4107 .  m - number of local rows (Cannot be PETSC_DECIDE)
4108 .  n - This value should be the same as the local size used in creating the
4109        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4110        calculated if N is given) For square matrices n is almost always m.
4111 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4112 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4113 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4114 .   j - column indices
4115 -   a - matrix values
4116 
4117    Output Parameter:
4118 .   mat - the matrix
4119 
4120    Level: intermediate
4121 
4122    Notes:
4123        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4124      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4125      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4126 
4127        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4128 
4129        The format which is used for the sparse matrix input, is equivalent to a
4130     row-major ordering.. i.e for the following matrix, the input data expected is
4131     as shown
4132 
4133        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4134 
4135 $        1 0 0
4136 $        2 0 3     P0
4137 $       -------
4138 $        4 5 6     P1
4139 $
4140 $     Process0 [P0]: rows_owned=[0,1]
4141 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4142 $        j =  {0,0,2}  [size = 3]
4143 $        v =  {1,2,3}  [size = 3]
4144 $
4145 $     Process1 [P1]: rows_owned=[2]
4146 $        i =  {0,3}    [size = nrow+1  = 1+1]
4147 $        j =  {0,1,2}  [size = 3]
4148 $        v =  {4,5,6}  [size = 3]
4149 
4150 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4151           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4152 @*/
4153 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4154 {
4155   PetscErrorCode ierr;
4156 
4157   PetscFunctionBegin;
4158   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4159   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4160   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4161   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4162   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4163   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4164   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4165   PetscFunctionReturn(0);
4166 }
4167 
4168 /*@
4169      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4170          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4171 
4172    Collective
4173 
4174    Input Parameters:
4175 +  mat - the matrix
4176 .  m - number of local rows (Cannot be PETSC_DECIDE)
4177 .  n - This value should be the same as the local size used in creating the
4178        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4179        calculated if N is given) For square matrices n is almost always m.
4180 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4181 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4182 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4183 .  J - column indices
4184 -  v - matrix values
4185 
4186    Level: intermediate
4187 
4188 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4189           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4190 @*/
4191 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4192 {
4193   PetscErrorCode ierr;
4194   PetscInt       cstart,nnz,i,j;
4195   PetscInt       *ld;
4196   PetscBool      nooffprocentries;
4197   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4198   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4199   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4200   const PetscInt *Adi = Ad->i;
4201   PetscInt       ldi,Iii,md;
4202 
4203   PetscFunctionBegin;
4204   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4205   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4206   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4207   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4208 
4209   cstart = mat->cmap->rstart;
4210   if (!Aij->ld) {
4211     /* count number of entries below block diagonal */
4212     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4213     Aij->ld = ld;
4214     for (i=0; i<m; i++) {
4215       nnz  = Ii[i+1]- Ii[i];
4216       j     = 0;
4217       while  (J[j] < cstart && j < nnz) {j++;}
4218       J    += nnz;
4219       ld[i] = j;
4220     }
4221   } else {
4222     ld = Aij->ld;
4223   }
4224 
4225   for (i=0; i<m; i++) {
4226     nnz  = Ii[i+1]- Ii[i];
4227     Iii  = Ii[i];
4228     ldi  = ld[i];
4229     md   = Adi[i+1]-Adi[i];
4230     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4231     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4232     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4233     ad  += md;
4234     ao  += nnz - md;
4235   }
4236   nooffprocentries      = mat->nooffprocentries;
4237   mat->nooffprocentries = PETSC_TRUE;
4238   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4239   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4240   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4241   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4242   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4243   mat->nooffprocentries = nooffprocentries;
4244   PetscFunctionReturn(0);
4245 }
4246 
4247 /*@C
4248    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4249    (the default parallel PETSc format).  For good matrix assembly performance
4250    the user should preallocate the matrix storage by setting the parameters
4251    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4252    performance can be increased by more than a factor of 50.
4253 
4254    Collective
4255 
4256    Input Parameters:
4257 +  comm - MPI communicator
4258 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4259            This value should be the same as the local size used in creating the
4260            y vector for the matrix-vector product y = Ax.
4261 .  n - This value should be the same as the local size used in creating the
4262        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4263        calculated if N is given) For square matrices n is almost always m.
4264 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4265 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4266 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4267            (same value is used for all local rows)
4268 .  d_nnz - array containing the number of nonzeros in the various rows of the
4269            DIAGONAL portion of the local submatrix (possibly different for each row)
4270            or NULL, if d_nz is used to specify the nonzero structure.
4271            The size of this array is equal to the number of local rows, i.e 'm'.
4272 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4273            submatrix (same value is used for all local rows).
4274 -  o_nnz - array containing the number of nonzeros in the various rows of the
4275            OFF-DIAGONAL portion of the local submatrix (possibly different for
4276            each row) or NULL, if o_nz is used to specify the nonzero
4277            structure. The size of this array is equal to the number
4278            of local rows, i.e 'm'.
4279 
4280    Output Parameter:
4281 .  A - the matrix
4282 
4283    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4284    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4285    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4286 
4287    Notes:
4288    If the *_nnz parameter is given then the *_nz parameter is ignored
4289 
4290    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4291    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4292    storage requirements for this matrix.
4293 
4294    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4295    processor than it must be used on all processors that share the object for
4296    that argument.
4297 
4298    The user MUST specify either the local or global matrix dimensions
4299    (possibly both).
4300 
4301    The parallel matrix is partitioned across processors such that the
4302    first m0 rows belong to process 0, the next m1 rows belong to
4303    process 1, the next m2 rows belong to process 2 etc.. where
4304    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4305    values corresponding to [m x N] submatrix.
4306 
4307    The columns are logically partitioned with the n0 columns belonging
4308    to 0th partition, the next n1 columns belonging to the next
4309    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4310 
4311    The DIAGONAL portion of the local submatrix on any given processor
4312    is the submatrix corresponding to the rows and columns m,n
4313    corresponding to the given processor. i.e diagonal matrix on
4314    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4315    etc. The remaining portion of the local submatrix [m x (N-n)]
4316    constitute the OFF-DIAGONAL portion. The example below better
4317    illustrates this concept.
4318 
4319    For a square global matrix we define each processor's diagonal portion
4320    to be its local rows and the corresponding columns (a square submatrix);
4321    each processor's off-diagonal portion encompasses the remainder of the
4322    local matrix (a rectangular submatrix).
4323 
4324    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4325 
4326    When calling this routine with a single process communicator, a matrix of
4327    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4328    type of communicator, use the construction mechanism
4329 .vb
4330      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4331 .ve
4332 
4333 $     MatCreate(...,&A);
4334 $     MatSetType(A,MATMPIAIJ);
4335 $     MatSetSizes(A, m,n,M,N);
4336 $     MatMPIAIJSetPreallocation(A,...);
4337 
4338    By default, this format uses inodes (identical nodes) when possible.
4339    We search for consecutive rows with the same nonzero structure, thereby
4340    reusing matrix information to achieve increased efficiency.
4341 
4342    Options Database Keys:
4343 +  -mat_no_inode  - Do not use inodes
4344 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4345 
4346    Example usage:
4347 
4348    Consider the following 8x8 matrix with 34 non-zero values, that is
4349    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4350    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4351    as follows
4352 
4353 .vb
4354             1  2  0  |  0  3  0  |  0  4
4355     Proc0   0  5  6  |  7  0  0  |  8  0
4356             9  0 10  | 11  0  0  | 12  0
4357     -------------------------------------
4358            13  0 14  | 15 16 17  |  0  0
4359     Proc1   0 18  0  | 19 20 21  |  0  0
4360             0  0  0  | 22 23  0  | 24  0
4361     -------------------------------------
4362     Proc2  25 26 27  |  0  0 28  | 29  0
4363            30  0  0  | 31 32 33  |  0 34
4364 .ve
4365 
4366    This can be represented as a collection of submatrices as
4367 
4368 .vb
4369       A B C
4370       D E F
4371       G H I
4372 .ve
4373 
4374    Where the submatrices A,B,C are owned by proc0, D,E,F are
4375    owned by proc1, G,H,I are owned by proc2.
4376 
4377    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4378    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4379    The 'M','N' parameters are 8,8, and have the same values on all procs.
4380 
4381    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4382    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4383    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4384    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4385    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4386    matrix, ans [DF] as another SeqAIJ matrix.
4387 
4388    When d_nz, o_nz parameters are specified, d_nz storage elements are
4389    allocated for every row of the local diagonal submatrix, and o_nz
4390    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4391    One way to choose d_nz and o_nz is to use the max nonzerors per local
4392    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4393    In this case, the values of d_nz,o_nz are
4394 .vb
4395      proc0 : dnz = 2, o_nz = 2
4396      proc1 : dnz = 3, o_nz = 2
4397      proc2 : dnz = 1, o_nz = 4
4398 .ve
4399    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4400    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4401    for proc3. i.e we are using 12+15+10=37 storage locations to store
4402    34 values.
4403 
4404    When d_nnz, o_nnz parameters are specified, the storage is specified
4405    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4406    In the above case the values for d_nnz,o_nnz are
4407 .vb
4408      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4409      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4410      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4411 .ve
4412    Here the space allocated is sum of all the above values i.e 34, and
4413    hence pre-allocation is perfect.
4414 
4415    Level: intermediate
4416 
4417 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4418           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4419 @*/
4420 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4421 {
4422   PetscErrorCode ierr;
4423   PetscMPIInt    size;
4424 
4425   PetscFunctionBegin;
4426   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4427   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4428   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4429   if (size > 1) {
4430     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4431     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4432   } else {
4433     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4434     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4435   }
4436   PetscFunctionReturn(0);
4437 }
4438 
4439 /*@C
4440   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4441 
4442   Not collective
4443 
4444   Input Parameter:
4445 . A - The MPIAIJ matrix
4446 
4447   Output Parameters:
4448 + Ad - The local diagonal block as a SeqAIJ matrix
4449 . Ao - The local off-diagonal block as a SeqAIJ matrix
4450 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4451 
4452   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4453   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4454   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4455   local column numbers to global column numbers in the original matrix.
4456 
4457   Level: intermediate
4458 
4459 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4460 @*/
4461 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4462 {
4463   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4464   PetscBool      flg;
4465   PetscErrorCode ierr;
4466 
4467   PetscFunctionBegin;
4468   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4469   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4470   if (Ad)     *Ad     = a->A;
4471   if (Ao)     *Ao     = a->B;
4472   if (colmap) *colmap = a->garray;
4473   PetscFunctionReturn(0);
4474 }
4475 
4476 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4477 {
4478   PetscErrorCode ierr;
4479   PetscInt       m,N,i,rstart,nnz,Ii;
4480   PetscInt       *indx;
4481   PetscScalar    *values;
4482 
4483   PetscFunctionBegin;
4484   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4485   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4486     PetscInt       *dnz,*onz,sum,bs,cbs;
4487 
4488     if (n == PETSC_DECIDE) {
4489       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4490     }
4491     /* Check sum(n) = N */
4492     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4493     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4494 
4495     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4496     rstart -= m;
4497 
4498     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4499     for (i=0; i<m; i++) {
4500       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4501       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4502       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4503     }
4504 
4505     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4506     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4507     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4508     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4509     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4510     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4511     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4512     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4513     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
4514   }
4515 
4516   /* numeric phase */
4517   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4518   for (i=0; i<m; i++) {
4519     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4520     Ii   = i + rstart;
4521     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4522     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4523   }
4524   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4525   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4526   PetscFunctionReturn(0);
4527 }
4528 
4529 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4530 {
4531   PetscErrorCode    ierr;
4532   PetscMPIInt       rank;
4533   PetscInt          m,N,i,rstart,nnz;
4534   size_t            len;
4535   const PetscInt    *indx;
4536   PetscViewer       out;
4537   char              *name;
4538   Mat               B;
4539   const PetscScalar *values;
4540 
4541   PetscFunctionBegin;
4542   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4543   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4544   /* Should this be the type of the diagonal block of A? */
4545   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4546   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4547   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4548   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4549   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4550   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4551   for (i=0; i<m; i++) {
4552     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4553     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4554     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4555   }
4556   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4557   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4558 
4559   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
4560   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4561   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4562   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4563   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4564   ierr = PetscFree(name);CHKERRQ(ierr);
4565   ierr = MatView(B,out);CHKERRQ(ierr);
4566   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4567   ierr = MatDestroy(&B);CHKERRQ(ierr);
4568   PetscFunctionReturn(0);
4569 }
4570 
4571 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4572 {
4573   PetscErrorCode      ierr;
4574   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4575 
4576   PetscFunctionBegin;
4577   if (!merge) PetscFunctionReturn(0);
4578   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4579   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4580   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4581   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4582   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4583   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4584   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4585   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4586   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4587   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4588   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4589   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4590   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4591   ierr = PetscFree(merge);CHKERRQ(ierr);
4592   PetscFunctionReturn(0);
4593 }
4594 
4595 #include <../src/mat/utils/freespace.h>
4596 #include <petscbt.h>
4597 
4598 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4599 {
4600   PetscErrorCode      ierr;
4601   MPI_Comm            comm;
4602   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4603   PetscMPIInt         size,rank,taga,*len_s;
4604   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4605   PetscInt            proc,m;
4606   PetscInt            **buf_ri,**buf_rj;
4607   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4608   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4609   MPI_Request         *s_waits,*r_waits;
4610   MPI_Status          *status;
4611   MatScalar           *aa=a->a;
4612   MatScalar           **abuf_r,*ba_i;
4613   Mat_Merge_SeqsToMPI *merge;
4614   PetscContainer      container;
4615 
4616   PetscFunctionBegin;
4617   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4618   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4619 
4620   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4621   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4622 
4623   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4624   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4625   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4626 
4627   bi     = merge->bi;
4628   bj     = merge->bj;
4629   buf_ri = merge->buf_ri;
4630   buf_rj = merge->buf_rj;
4631 
4632   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4633   owners = merge->rowmap->range;
4634   len_s  = merge->len_s;
4635 
4636   /* send and recv matrix values */
4637   /*-----------------------------*/
4638   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4639   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4640 
4641   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4642   for (proc=0,k=0; proc<size; proc++) {
4643     if (!len_s[proc]) continue;
4644     i    = owners[proc];
4645     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr);
4646     k++;
4647   }
4648 
4649   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);}
4650   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);}
4651   ierr = PetscFree(status);CHKERRQ(ierr);
4652 
4653   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4654   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4655 
4656   /* insert mat values of mpimat */
4657   /*----------------------------*/
4658   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4659   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4660 
4661   for (k=0; k<merge->nrecv; k++) {
4662     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4663     nrows       = *(buf_ri_k[k]);
4664     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4665     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4666   }
4667 
4668   /* set values of ba */
4669   m = merge->rowmap->n;
4670   for (i=0; i<m; i++) {
4671     arow = owners[rank] + i;
4672     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4673     bnzi = bi[i+1] - bi[i];
4674     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4675 
4676     /* add local non-zero vals of this proc's seqmat into ba */
4677     anzi   = ai[arow+1] - ai[arow];
4678     aj     = a->j + ai[arow];
4679     aa     = a->a + ai[arow];
4680     nextaj = 0;
4681     for (j=0; nextaj<anzi; j++) {
4682       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4683         ba_i[j] += aa[nextaj++];
4684       }
4685     }
4686 
4687     /* add received vals into ba */
4688     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4689       /* i-th row */
4690       if (i == *nextrow[k]) {
4691         anzi   = *(nextai[k]+1) - *nextai[k];
4692         aj     = buf_rj[k] + *(nextai[k]);
4693         aa     = abuf_r[k] + *(nextai[k]);
4694         nextaj = 0;
4695         for (j=0; nextaj<anzi; j++) {
4696           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4697             ba_i[j] += aa[nextaj++];
4698           }
4699         }
4700         nextrow[k]++; nextai[k]++;
4701       }
4702     }
4703     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4704   }
4705   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4706   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4707 
4708   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4709   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4710   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4711   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4712   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4713   PetscFunctionReturn(0);
4714 }
4715 
4716 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4717 {
4718   PetscErrorCode      ierr;
4719   Mat                 B_mpi;
4720   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4721   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4722   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4723   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4724   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4725   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4726   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4727   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4728   MPI_Status          *status;
4729   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4730   PetscBT             lnkbt;
4731   Mat_Merge_SeqsToMPI *merge;
4732   PetscContainer      container;
4733 
4734   PetscFunctionBegin;
4735   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4736 
4737   /* make sure it is a PETSc comm */
4738   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4739   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4740   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4741 
4742   ierr = PetscNew(&merge);CHKERRQ(ierr);
4743   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4744 
4745   /* determine row ownership */
4746   /*---------------------------------------------------------*/
4747   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4748   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4749   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4750   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4751   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4752   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4753   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4754 
4755   m      = merge->rowmap->n;
4756   owners = merge->rowmap->range;
4757 
4758   /* determine the number of messages to send, their lengths */
4759   /*---------------------------------------------------------*/
4760   len_s = merge->len_s;
4761 
4762   len          = 0; /* length of buf_si[] */
4763   merge->nsend = 0;
4764   for (proc=0; proc<size; proc++) {
4765     len_si[proc] = 0;
4766     if (proc == rank) {
4767       len_s[proc] = 0;
4768     } else {
4769       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4770       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4771     }
4772     if (len_s[proc]) {
4773       merge->nsend++;
4774       nrows = 0;
4775       for (i=owners[proc]; i<owners[proc+1]; i++) {
4776         if (ai[i+1] > ai[i]) nrows++;
4777       }
4778       len_si[proc] = 2*(nrows+1);
4779       len         += len_si[proc];
4780     }
4781   }
4782 
4783   /* determine the number and length of messages to receive for ij-structure */
4784   /*-------------------------------------------------------------------------*/
4785   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4786   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4787 
4788   /* post the Irecv of j-structure */
4789   /*-------------------------------*/
4790   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4791   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4792 
4793   /* post the Isend of j-structure */
4794   /*--------------------------------*/
4795   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4796 
4797   for (proc=0, k=0; proc<size; proc++) {
4798     if (!len_s[proc]) continue;
4799     i    = owners[proc];
4800     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr);
4801     k++;
4802   }
4803 
4804   /* receives and sends of j-structure are complete */
4805   /*------------------------------------------------*/
4806   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);}
4807   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);}
4808 
4809   /* send and recv i-structure */
4810   /*---------------------------*/
4811   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4812   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4813 
4814   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4815   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4816   for (proc=0,k=0; proc<size; proc++) {
4817     if (!len_s[proc]) continue;
4818     /* form outgoing message for i-structure:
4819          buf_si[0]:                 nrows to be sent
4820                [1:nrows]:           row index (global)
4821                [nrows+1:2*nrows+1]: i-structure index
4822     */
4823     /*-------------------------------------------*/
4824     nrows       = len_si[proc]/2 - 1;
4825     buf_si_i    = buf_si + nrows+1;
4826     buf_si[0]   = nrows;
4827     buf_si_i[0] = 0;
4828     nrows       = 0;
4829     for (i=owners[proc]; i<owners[proc+1]; i++) {
4830       anzi = ai[i+1] - ai[i];
4831       if (anzi) {
4832         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4833         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4834         nrows++;
4835       }
4836     }
4837     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr);
4838     k++;
4839     buf_si += len_si[proc];
4840   }
4841 
4842   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);}
4843   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);}
4844 
4845   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4846   for (i=0; i<merge->nrecv; i++) {
4847     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4848   }
4849 
4850   ierr = PetscFree(len_si);CHKERRQ(ierr);
4851   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4852   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4853   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4854   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4855   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4856   ierr = PetscFree(status);CHKERRQ(ierr);
4857 
4858   /* compute a local seq matrix in each processor */
4859   /*----------------------------------------------*/
4860   /* allocate bi array and free space for accumulating nonzero column info */
4861   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4862   bi[0] = 0;
4863 
4864   /* create and initialize a linked list */
4865   nlnk = N+1;
4866   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4867 
4868   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4869   len  = ai[owners[rank+1]] - ai[owners[rank]];
4870   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4871 
4872   current_space = free_space;
4873 
4874   /* determine symbolic info for each local row */
4875   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4876 
4877   for (k=0; k<merge->nrecv; k++) {
4878     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4879     nrows       = *buf_ri_k[k];
4880     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4881     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4882   }
4883 
4884   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4885   len  = 0;
4886   for (i=0; i<m; i++) {
4887     bnzi = 0;
4888     /* add local non-zero cols of this proc's seqmat into lnk */
4889     arow  = owners[rank] + i;
4890     anzi  = ai[arow+1] - ai[arow];
4891     aj    = a->j + ai[arow];
4892     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4893     bnzi += nlnk;
4894     /* add received col data into lnk */
4895     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4896       if (i == *nextrow[k]) { /* i-th row */
4897         anzi  = *(nextai[k]+1) - *nextai[k];
4898         aj    = buf_rj[k] + *nextai[k];
4899         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4900         bnzi += nlnk;
4901         nextrow[k]++; nextai[k]++;
4902       }
4903     }
4904     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4905 
4906     /* if free space is not available, make more free space */
4907     if (current_space->local_remaining<bnzi) {
4908       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4909       nspacedouble++;
4910     }
4911     /* copy data into free space, then initialize lnk */
4912     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4913     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4914 
4915     current_space->array           += bnzi;
4916     current_space->local_used      += bnzi;
4917     current_space->local_remaining -= bnzi;
4918 
4919     bi[i+1] = bi[i] + bnzi;
4920   }
4921 
4922   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4923 
4924   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4925   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4926   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4927 
4928   /* create symbolic parallel matrix B_mpi */
4929   /*---------------------------------------*/
4930   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4931   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4932   if (n==PETSC_DECIDE) {
4933     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4934   } else {
4935     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4936   }
4937   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4938   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4939   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4940   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4941   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4942 
4943   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4944   B_mpi->assembled  = PETSC_FALSE;
4945   merge->bi         = bi;
4946   merge->bj         = bj;
4947   merge->buf_ri     = buf_ri;
4948   merge->buf_rj     = buf_rj;
4949   merge->coi        = NULL;
4950   merge->coj        = NULL;
4951   merge->owners_co  = NULL;
4952 
4953   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4954 
4955   /* attach the supporting struct to B_mpi for reuse */
4956   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4957   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4958   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
4959   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4960   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4961   *mpimat = B_mpi;
4962 
4963   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4964   PetscFunctionReturn(0);
4965 }
4966 
4967 /*@C
4968       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4969                  matrices from each processor
4970 
4971     Collective
4972 
4973    Input Parameters:
4974 +    comm - the communicators the parallel matrix will live on
4975 .    seqmat - the input sequential matrices
4976 .    m - number of local rows (or PETSC_DECIDE)
4977 .    n - number of local columns (or PETSC_DECIDE)
4978 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4979 
4980    Output Parameter:
4981 .    mpimat - the parallel matrix generated
4982 
4983     Level: advanced
4984 
4985    Notes:
4986      The dimensions of the sequential matrix in each processor MUST be the same.
4987      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4988      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4989 @*/
4990 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4991 {
4992   PetscErrorCode ierr;
4993   PetscMPIInt    size;
4994 
4995   PetscFunctionBegin;
4996   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4997   if (size == 1) {
4998     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4999     if (scall == MAT_INITIAL_MATRIX) {
5000       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5001     } else {
5002       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5003     }
5004     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5005     PetscFunctionReturn(0);
5006   }
5007   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5008   if (scall == MAT_INITIAL_MATRIX) {
5009     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5010   }
5011   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5012   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5013   PetscFunctionReturn(0);
5014 }
5015 
5016 /*@
5017      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5018           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5019           with MatGetSize()
5020 
5021     Not Collective
5022 
5023    Input Parameters:
5024 +    A - the matrix
5025 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5026 
5027    Output Parameter:
5028 .    A_loc - the local sequential matrix generated
5029 
5030     Level: developer
5031 
5032    Notes:
5033      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5034      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5035      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5036      modify the values of the returned A_loc.
5037 
5038 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
5039 @*/
5040 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5041 {
5042   PetscErrorCode    ierr;
5043   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5044   Mat_SeqAIJ        *mat,*a,*b;
5045   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5046   const PetscScalar *aa,*ba,*aav,*bav;
5047   PetscScalar       *ca,*cam;
5048   PetscMPIInt       size;
5049   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5050   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5051   PetscBool         match;
5052 
5053   PetscFunctionBegin;
5054   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5055   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5056   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5057   if (size == 1) {
5058     if (scall == MAT_INITIAL_MATRIX) {
5059       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5060       *A_loc = mpimat->A;
5061     } else if (scall == MAT_REUSE_MATRIX) {
5062       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5063     }
5064     PetscFunctionReturn(0);
5065   }
5066 
5067   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5068   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5069   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5070   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5071   ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5072   ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5073   aa   = aav;
5074   ba   = bav;
5075   if (scall == MAT_INITIAL_MATRIX) {
5076     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5077     ci[0] = 0;
5078     for (i=0; i<am; i++) {
5079       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5080     }
5081     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5082     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5083     k    = 0;
5084     for (i=0; i<am; i++) {
5085       ncols_o = bi[i+1] - bi[i];
5086       ncols_d = ai[i+1] - ai[i];
5087       /* off-diagonal portion of A */
5088       for (jo=0; jo<ncols_o; jo++) {
5089         col = cmap[*bj];
5090         if (col >= cstart) break;
5091         cj[k]   = col; bj++;
5092         ca[k++] = *ba++;
5093       }
5094       /* diagonal portion of A */
5095       for (j=0; j<ncols_d; j++) {
5096         cj[k]   = cstart + *aj++;
5097         ca[k++] = *aa++;
5098       }
5099       /* off-diagonal portion of A */
5100       for (j=jo; j<ncols_o; j++) {
5101         cj[k]   = cmap[*bj++];
5102         ca[k++] = *ba++;
5103       }
5104     }
5105     /* put together the new matrix */
5106     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5107     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5108     /* Since these are PETSc arrays, change flags to free them as necessary. */
5109     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5110     mat->free_a  = PETSC_TRUE;
5111     mat->free_ij = PETSC_TRUE;
5112     mat->nonew   = 0;
5113   } else if (scall == MAT_REUSE_MATRIX) {
5114     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5115 #if defined(PETSC_USE_DEVICE)
5116     (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5117 #endif
5118     ci = mat->i; cj = mat->j; cam = mat->a;
5119     for (i=0; i<am; i++) {
5120       /* off-diagonal portion of A */
5121       ncols_o = bi[i+1] - bi[i];
5122       for (jo=0; jo<ncols_o; jo++) {
5123         col = cmap[*bj];
5124         if (col >= cstart) break;
5125         *cam++ = *ba++; bj++;
5126       }
5127       /* diagonal portion of A */
5128       ncols_d = ai[i+1] - ai[i];
5129       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5130       /* off-diagonal portion of A */
5131       for (j=jo; j<ncols_o; j++) {
5132         *cam++ = *ba++; bj++;
5133       }
5134     }
5135   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5136   ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5137   ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5138   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5139   PetscFunctionReturn(0);
5140 }
5141 
5142 /*@
5143      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5144           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5145 
5146     Not Collective
5147 
5148    Input Parameters:
5149 +    A - the matrix
5150 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5151 
5152    Output Parameter:
5153 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5154 -    A_loc - the local sequential matrix generated
5155 
5156     Level: developer
5157 
5158    Notes:
5159      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5160 
5161 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()
5162 
5163 @*/
5164 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5165 {
5166   PetscErrorCode ierr;
5167   Mat            Ao,Ad;
5168   const PetscInt *cmap;
5169   PetscMPIInt    size;
5170   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5171 
5172   PetscFunctionBegin;
5173   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr);
5174   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5175   if (size == 1) {
5176     if (scall == MAT_INITIAL_MATRIX) {
5177       ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr);
5178       *A_loc = Ad;
5179     } else if (scall == MAT_REUSE_MATRIX) {
5180       ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5181     }
5182     if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); }
5183     PetscFunctionReturn(0);
5184   }
5185   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr);
5186   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5187   if (f) {
5188     ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr);
5189   } else {
5190     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5191     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5192     Mat_SeqAIJ        *c;
5193     PetscInt          *ai = a->i, *aj = a->j;
5194     PetscInt          *bi = b->i, *bj = b->j;
5195     PetscInt          *ci,*cj;
5196     const PetscScalar *aa,*ba;
5197     PetscScalar       *ca;
5198     PetscInt          i,j,am,dn,on;
5199 
5200     ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr);
5201     ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr);
5202     ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr);
5203     ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr);
5204     if (scall == MAT_INITIAL_MATRIX) {
5205       PetscInt k;
5206       ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5207       ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr);
5208       ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr);
5209       ci[0] = 0;
5210       for (i=0,k=0; i<am; i++) {
5211         const PetscInt ncols_o = bi[i+1] - bi[i];
5212         const PetscInt ncols_d = ai[i+1] - ai[i];
5213         ci[i+1] = ci[i] + ncols_o + ncols_d;
5214         /* diagonal portion of A */
5215         for (j=0; j<ncols_d; j++,k++) {
5216           cj[k] = *aj++;
5217           ca[k] = *aa++;
5218         }
5219         /* off-diagonal portion of A */
5220         for (j=0; j<ncols_o; j++,k++) {
5221           cj[k] = dn + *bj++;
5222           ca[k] = *ba++;
5223         }
5224       }
5225       /* put together the new matrix */
5226       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr);
5227       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5228       /* Since these are PETSc arrays, change flags to free them as necessary. */
5229       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5230       c->free_a  = PETSC_TRUE;
5231       c->free_ij = PETSC_TRUE;
5232       c->nonew   = 0;
5233       ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr);
5234     } else if (scall == MAT_REUSE_MATRIX) {
5235 #if defined(PETSC_HAVE_DEVICE)
5236       (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5237 #endif
5238       c  = (Mat_SeqAIJ*)(*A_loc)->data;
5239       ca = c->a;
5240       for (i=0; i<am; i++) {
5241         const PetscInt ncols_d = ai[i+1] - ai[i];
5242         const PetscInt ncols_o = bi[i+1] - bi[i];
5243         /* diagonal portion of A */
5244         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5245         /* off-diagonal portion of A */
5246         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5247       }
5248     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5249     ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr);
5250     ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr);
5251     if (glob) {
5252       PetscInt cst, *gidx;
5253 
5254       ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr);
5255       ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr);
5256       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5257       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5258       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr);
5259     }
5260   }
5261   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5262   PetscFunctionReturn(0);
5263 }
5264 
5265 /*@C
5266      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5267 
5268     Not Collective
5269 
5270    Input Parameters:
5271 +    A - the matrix
5272 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5273 -    row, col - index sets of rows and columns to extract (or NULL)
5274 
5275    Output Parameter:
5276 .    A_loc - the local sequential matrix generated
5277 
5278     Level: developer
5279 
5280 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5281 
5282 @*/
5283 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5284 {
5285   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5286   PetscErrorCode ierr;
5287   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5288   IS             isrowa,iscola;
5289   Mat            *aloc;
5290   PetscBool      match;
5291 
5292   PetscFunctionBegin;
5293   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5294   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5295   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5296   if (!row) {
5297     start = A->rmap->rstart; end = A->rmap->rend;
5298     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5299   } else {
5300     isrowa = *row;
5301   }
5302   if (!col) {
5303     start = A->cmap->rstart;
5304     cmap  = a->garray;
5305     nzA   = a->A->cmap->n;
5306     nzB   = a->B->cmap->n;
5307     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5308     ncols = 0;
5309     for (i=0; i<nzB; i++) {
5310       if (cmap[i] < start) idx[ncols++] = cmap[i];
5311       else break;
5312     }
5313     imark = i;
5314     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5315     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5316     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5317   } else {
5318     iscola = *col;
5319   }
5320   if (scall != MAT_INITIAL_MATRIX) {
5321     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5322     aloc[0] = *A_loc;
5323   }
5324   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5325   if (!col) { /* attach global id of condensed columns */
5326     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5327   }
5328   *A_loc = aloc[0];
5329   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5330   if (!row) {
5331     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5332   }
5333   if (!col) {
5334     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5335   }
5336   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5337   PetscFunctionReturn(0);
5338 }
5339 
5340 /*
5341  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5342  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5343  * on a global size.
5344  * */
5345 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5346 {
5347   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5348   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5349   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5350   PetscMPIInt              owner;
5351   PetscSFNode              *iremote,*oiremote;
5352   const PetscInt           *lrowindices;
5353   PetscErrorCode           ierr;
5354   PetscSF                  sf,osf;
5355   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5356   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5357   MPI_Comm                 comm;
5358   ISLocalToGlobalMapping   mapping;
5359 
5360   PetscFunctionBegin;
5361   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5362   /* plocalsize is the number of roots
5363    * nrows is the number of leaves
5364    * */
5365   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5366   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5367   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5368   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5369   for (i=0;i<nrows;i++) {
5370     /* Find a remote index and an owner for a row
5371      * The row could be local or remote
5372      * */
5373     owner = 0;
5374     lidx  = 0;
5375     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5376     iremote[i].index = lidx;
5377     iremote[i].rank  = owner;
5378   }
5379   /* Create SF to communicate how many nonzero columns for each row */
5380   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5381   /* SF will figure out the number of nonzero colunms for each row, and their
5382    * offsets
5383    * */
5384   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5385   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5386   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5387 
5388   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5389   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5390   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5391   roffsets[0] = 0;
5392   roffsets[1] = 0;
5393   for (i=0;i<plocalsize;i++) {
5394     /* diag */
5395     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5396     /* off diag */
5397     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5398     /* compute offsets so that we relative location for each row */
5399     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5400     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5401   }
5402   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5403   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5404   /* 'r' means root, and 'l' means leaf */
5405   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5406   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5407   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5408   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5409   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5410   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5411   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5412   dntotalcols = 0;
5413   ontotalcols = 0;
5414   ncol = 0;
5415   for (i=0;i<nrows;i++) {
5416     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5417     ncol = PetscMax(pnnz[i],ncol);
5418     /* diag */
5419     dntotalcols += nlcols[i*2+0];
5420     /* off diag */
5421     ontotalcols += nlcols[i*2+1];
5422   }
5423   /* We do not need to figure the right number of columns
5424    * since all the calculations will be done by going through the raw data
5425    * */
5426   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5427   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5428   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5429   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5430   /* diag */
5431   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5432   /* off diag */
5433   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5434   /* diag */
5435   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5436   /* off diag */
5437   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5438   dntotalcols = 0;
5439   ontotalcols = 0;
5440   ntotalcols  = 0;
5441   for (i=0;i<nrows;i++) {
5442     owner = 0;
5443     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5444     /* Set iremote for diag matrix */
5445     for (j=0;j<nlcols[i*2+0];j++) {
5446       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5447       iremote[dntotalcols].rank    = owner;
5448       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5449       ilocal[dntotalcols++]        = ntotalcols++;
5450     }
5451     /* off diag */
5452     for (j=0;j<nlcols[i*2+1];j++) {
5453       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5454       oiremote[ontotalcols].rank    = owner;
5455       oilocal[ontotalcols++]        = ntotalcols++;
5456     }
5457   }
5458   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5459   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5460   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5461   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5462   /* P serves as roots and P_oth is leaves
5463    * Diag matrix
5464    * */
5465   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5466   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5467   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5468 
5469   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5470   /* Off diag */
5471   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5472   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5473   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5474   /* We operate on the matrix internal data for saving memory */
5475   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5476   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5477   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5478   /* Convert to global indices for diag matrix */
5479   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5480   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5481   /* We want P_oth store global indices */
5482   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5483   /* Use memory scalable approach */
5484   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5485   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5486   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5487   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5488   /* Convert back to local indices */
5489   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5490   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5491   nout = 0;
5492   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5493   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5494   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5495   /* Exchange values */
5496   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5497   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5498   /* Stop PETSc from shrinking memory */
5499   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5500   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5501   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5502   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5503   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5504   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5505   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5506   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5507   PetscFunctionReturn(0);
5508 }
5509 
5510 /*
5511  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5512  * This supports MPIAIJ and MAIJ
5513  * */
5514 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5515 {
5516   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5517   Mat_SeqAIJ            *p_oth;
5518   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5519   IS                    rows,map;
5520   PetscHMapI            hamp;
5521   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5522   MPI_Comm              comm;
5523   PetscSF               sf,osf;
5524   PetscBool             has;
5525   PetscErrorCode        ierr;
5526 
5527   PetscFunctionBegin;
5528   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5529   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5530   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5531    *  and then create a submatrix (that often is an overlapping matrix)
5532    * */
5533   if (reuse == MAT_INITIAL_MATRIX) {
5534     /* Use a hash table to figure out unique keys */
5535     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5536     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5537     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5538     count = 0;
5539     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5540     for (i=0;i<a->B->cmap->n;i++) {
5541       key  = a->garray[i]/dof;
5542       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5543       if (!has) {
5544         mapping[i] = count;
5545         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5546       } else {
5547         /* Current 'i' has the same value the previous step */
5548         mapping[i] = count-1;
5549       }
5550     }
5551     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5552     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5553     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);
5554     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5555     off = 0;
5556     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5557     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5558     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5559     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5560     /* In case, the matrix was already created but users want to recreate the matrix */
5561     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5562     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5563     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5564     ierr = ISDestroy(&map);CHKERRQ(ierr);
5565     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5566   } else if (reuse == MAT_REUSE_MATRIX) {
5567     /* If matrix was already created, we simply update values using SF objects
5568      * that as attached to the matrix ealier.
5569      *  */
5570     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5571     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5572     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5573     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5574     /* Update values in place */
5575     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5576     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5577     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5578     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5579   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5580   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5581   PetscFunctionReturn(0);
5582 }
5583 
5584 /*@C
5585     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5586 
5587     Collective on Mat
5588 
5589    Input Parameters:
5590 +    A,B - the matrices in mpiaij format
5591 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5592 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5593 
5594    Output Parameter:
5595 +    rowb, colb - index sets of rows and columns of B to extract
5596 -    B_seq - the sequential matrix generated
5597 
5598     Level: developer
5599 
5600 @*/
5601 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5602 {
5603   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5604   PetscErrorCode ierr;
5605   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5606   IS             isrowb,iscolb;
5607   Mat            *bseq=NULL;
5608 
5609   PetscFunctionBegin;
5610   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5611     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5612   }
5613   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5614 
5615   if (scall == MAT_INITIAL_MATRIX) {
5616     start = A->cmap->rstart;
5617     cmap  = a->garray;
5618     nzA   = a->A->cmap->n;
5619     nzB   = a->B->cmap->n;
5620     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5621     ncols = 0;
5622     for (i=0; i<nzB; i++) {  /* row < local row index */
5623       if (cmap[i] < start) idx[ncols++] = cmap[i];
5624       else break;
5625     }
5626     imark = i;
5627     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5628     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5629     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5630     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5631   } else {
5632     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5633     isrowb  = *rowb; iscolb = *colb;
5634     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5635     bseq[0] = *B_seq;
5636   }
5637   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5638   *B_seq = bseq[0];
5639   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5640   if (!rowb) {
5641     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5642   } else {
5643     *rowb = isrowb;
5644   }
5645   if (!colb) {
5646     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5647   } else {
5648     *colb = iscolb;
5649   }
5650   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5651   PetscFunctionReturn(0);
5652 }
5653 
5654 /*
5655     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5656     of the OFF-DIAGONAL portion of local A
5657 
5658     Collective on Mat
5659 
5660    Input Parameters:
5661 +    A,B - the matrices in mpiaij format
5662 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5663 
5664    Output Parameter:
5665 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5666 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5667 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5668 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5669 
5670     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5671      for this matrix. This is not desirable..
5672 
5673     Level: developer
5674 
5675 */
5676 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5677 {
5678   PetscErrorCode         ierr;
5679   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5680   Mat_SeqAIJ             *b_oth;
5681   VecScatter             ctx;
5682   MPI_Comm               comm;
5683   const PetscMPIInt      *rprocs,*sprocs;
5684   const PetscInt         *srow,*rstarts,*sstarts;
5685   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5686   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5687   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5688   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5689   PetscMPIInt            size,tag,rank,nreqs;
5690 
5691   PetscFunctionBegin;
5692   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5693   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5694 
5695   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5696     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5697   }
5698   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5699   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
5700 
5701   if (size == 1) {
5702     startsj_s = NULL;
5703     bufa_ptr  = NULL;
5704     *B_oth    = NULL;
5705     PetscFunctionReturn(0);
5706   }
5707 
5708   ctx = a->Mvctx;
5709   tag = ((PetscObject)ctx)->tag;
5710 
5711   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5712   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5713   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5714   ierr = PetscMPIIntCast(nsends+nrecvs,&nreqs);CHKERRQ(ierr);
5715   ierr = PetscMalloc1(nreqs,&reqs);CHKERRQ(ierr);
5716   rwaits = reqs;
5717   swaits = reqs + nrecvs;
5718 
5719   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5720   if (scall == MAT_INITIAL_MATRIX) {
5721     /* i-array */
5722     /*---------*/
5723     /*  post receives */
5724     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5725     for (i=0; i<nrecvs; i++) {
5726       rowlen = rvalues + rstarts[i]*rbs;
5727       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5728       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5729     }
5730 
5731     /* pack the outgoing message */
5732     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5733 
5734     sstartsj[0] = 0;
5735     rstartsj[0] = 0;
5736     len         = 0; /* total length of j or a array to be sent */
5737     if (nsends) {
5738       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5739       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5740     }
5741     for (i=0; i<nsends; i++) {
5742       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5743       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5744       for (j=0; j<nrows; j++) {
5745         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5746         for (l=0; l<sbs; l++) {
5747           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5748 
5749           rowlen[j*sbs+l] = ncols;
5750 
5751           len += ncols;
5752           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5753         }
5754         k++;
5755       }
5756       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5757 
5758       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5759     }
5760     /* recvs and sends of i-array are completed */
5761     if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5762     ierr = PetscFree(svalues);CHKERRQ(ierr);
5763 
5764     /* allocate buffers for sending j and a arrays */
5765     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5766     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5767 
5768     /* create i-array of B_oth */
5769     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5770 
5771     b_othi[0] = 0;
5772     len       = 0; /* total length of j or a array to be received */
5773     k         = 0;
5774     for (i=0; i<nrecvs; i++) {
5775       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5776       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5777       for (j=0; j<nrows; j++) {
5778         b_othi[k+1] = b_othi[k] + rowlen[j];
5779         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5780         k++;
5781       }
5782       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5783     }
5784     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5785 
5786     /* allocate space for j and a arrrays of B_oth */
5787     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5788     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5789 
5790     /* j-array */
5791     /*---------*/
5792     /*  post receives of j-array */
5793     for (i=0; i<nrecvs; i++) {
5794       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5795       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5796     }
5797 
5798     /* pack the outgoing message j-array */
5799     if (nsends) k = sstarts[0];
5800     for (i=0; i<nsends; i++) {
5801       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5802       bufJ  = bufj+sstartsj[i];
5803       for (j=0; j<nrows; j++) {
5804         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5805         for (ll=0; ll<sbs; ll++) {
5806           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5807           for (l=0; l<ncols; l++) {
5808             *bufJ++ = cols[l];
5809           }
5810           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5811         }
5812       }
5813       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5814     }
5815 
5816     /* recvs and sends of j-array are completed */
5817     if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5818   } else if (scall == MAT_REUSE_MATRIX) {
5819     sstartsj = *startsj_s;
5820     rstartsj = *startsj_r;
5821     bufa     = *bufa_ptr;
5822     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5823     b_otha   = b_oth->a;
5824 #if defined(PETSC_HAVE_DEVICE)
5825     (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU;
5826 #endif
5827   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5828 
5829   /* a-array */
5830   /*---------*/
5831   /*  post receives of a-array */
5832   for (i=0; i<nrecvs; i++) {
5833     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5834     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5835   }
5836 
5837   /* pack the outgoing message a-array */
5838   if (nsends) k = sstarts[0];
5839   for (i=0; i<nsends; i++) {
5840     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5841     bufA  = bufa+sstartsj[i];
5842     for (j=0; j<nrows; j++) {
5843       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5844       for (ll=0; ll<sbs; ll++) {
5845         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5846         for (l=0; l<ncols; l++) {
5847           *bufA++ = vals[l];
5848         }
5849         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5850       }
5851     }
5852     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5853   }
5854   /* recvs and sends of a-array are completed */
5855   if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5856   ierr = PetscFree(reqs);CHKERRQ(ierr);
5857 
5858   if (scall == MAT_INITIAL_MATRIX) {
5859     /* put together the new matrix */
5860     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5861 
5862     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5863     /* Since these are PETSc arrays, change flags to free them as necessary. */
5864     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5865     b_oth->free_a  = PETSC_TRUE;
5866     b_oth->free_ij = PETSC_TRUE;
5867     b_oth->nonew   = 0;
5868 
5869     ierr = PetscFree(bufj);CHKERRQ(ierr);
5870     if (!startsj_s || !bufa_ptr) {
5871       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5872       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5873     } else {
5874       *startsj_s = sstartsj;
5875       *startsj_r = rstartsj;
5876       *bufa_ptr  = bufa;
5877     }
5878   }
5879 
5880   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5881   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5882   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5883   PetscFunctionReturn(0);
5884 }
5885 
5886 /*@C
5887   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5888 
5889   Not Collective
5890 
5891   Input Parameters:
5892 . A - The matrix in mpiaij format
5893 
5894   Output Parameter:
5895 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5896 . colmap - A map from global column index to local index into lvec
5897 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5898 
5899   Level: developer
5900 
5901 @*/
5902 #if defined(PETSC_USE_CTABLE)
5903 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5904 #else
5905 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5906 #endif
5907 {
5908   Mat_MPIAIJ *a;
5909 
5910   PetscFunctionBegin;
5911   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5912   PetscValidPointer(lvec, 2);
5913   PetscValidPointer(colmap, 3);
5914   PetscValidPointer(multScatter, 4);
5915   a = (Mat_MPIAIJ*) A->data;
5916   if (lvec) *lvec = a->lvec;
5917   if (colmap) *colmap = a->colmap;
5918   if (multScatter) *multScatter = a->Mvctx;
5919   PetscFunctionReturn(0);
5920 }
5921 
5922 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5923 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5924 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5925 #if defined(PETSC_HAVE_MKL_SPARSE)
5926 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5927 #endif
5928 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5929 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5930 #if defined(PETSC_HAVE_ELEMENTAL)
5931 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5932 #endif
5933 #if defined(PETSC_HAVE_SCALAPACK)
5934 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5935 #endif
5936 #if defined(PETSC_HAVE_HYPRE)
5937 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5938 #endif
5939 #if defined(PETSC_HAVE_CUDA)
5940 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5941 #endif
5942 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5943 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5944 #endif
5945 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5946 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5947 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5948 
5949 /*
5950     Computes (B'*A')' since computing B*A directly is untenable
5951 
5952                n                       p                          p
5953         [             ]       [             ]         [                 ]
5954       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5955         [             ]       [             ]         [                 ]
5956 
5957 */
5958 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5959 {
5960   PetscErrorCode ierr;
5961   Mat            At,Bt,Ct;
5962 
5963   PetscFunctionBegin;
5964   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5965   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5966   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5967   ierr = MatDestroy(&At);CHKERRQ(ierr);
5968   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5969   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5970   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5971   PetscFunctionReturn(0);
5972 }
5973 
5974 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5975 {
5976   PetscErrorCode ierr;
5977   PetscBool      cisdense;
5978 
5979   PetscFunctionBegin;
5980   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5981   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
5982   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5983   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
5984   if (!cisdense) {
5985     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
5986   }
5987   ierr = MatSetUp(C);CHKERRQ(ierr);
5988 
5989   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5990   PetscFunctionReturn(0);
5991 }
5992 
5993 /* ----------------------------------------------------------------*/
5994 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5995 {
5996   Mat_Product *product = C->product;
5997   Mat         A = product->A,B=product->B;
5998 
5999   PetscFunctionBegin;
6000   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6001     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6002 
6003   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6004   C->ops->productsymbolic = MatProductSymbolic_AB;
6005   PetscFunctionReturn(0);
6006 }
6007 
6008 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6009 {
6010   PetscErrorCode ierr;
6011   Mat_Product    *product = C->product;
6012 
6013   PetscFunctionBegin;
6014   if (product->type == MATPRODUCT_AB) {
6015     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
6016   }
6017   PetscFunctionReturn(0);
6018 }
6019 /* ----------------------------------------------------------------*/
6020 
6021 /*MC
6022    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6023 
6024    Options Database Keys:
6025 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6026 
6027    Level: beginner
6028 
6029    Notes:
6030     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6031     in this case the values associated with the rows and columns one passes in are set to zero
6032     in the matrix
6033 
6034     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6035     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6036 
6037 .seealso: MatCreateAIJ()
6038 M*/
6039 
6040 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6041 {
6042   Mat_MPIAIJ     *b;
6043   PetscErrorCode ierr;
6044   PetscMPIInt    size;
6045 
6046   PetscFunctionBegin;
6047   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
6048 
6049   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6050   B->data       = (void*)b;
6051   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6052   B->assembled  = PETSC_FALSE;
6053   B->insertmode = NOT_SET_VALUES;
6054   b->size       = size;
6055 
6056   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
6057 
6058   /* build cache for off array entries formed */
6059   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6060 
6061   b->donotstash  = PETSC_FALSE;
6062   b->colmap      = NULL;
6063   b->garray      = NULL;
6064   b->roworiented = PETSC_TRUE;
6065 
6066   /* stuff used for matrix vector multiply */
6067   b->lvec  = NULL;
6068   b->Mvctx = NULL;
6069 
6070   /* stuff for MatGetRow() */
6071   b->rowindices   = NULL;
6072   b->rowvalues    = NULL;
6073   b->getrowactive = PETSC_FALSE;
6074 
6075   /* flexible pointer used in CUSPARSE classes */
6076   b->spptr = NULL;
6077 
6078   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6079   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6080   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6081   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6082   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6083   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6084   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6085   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6086   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6087   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6088 #if defined(PETSC_HAVE_CUDA)
6089   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr);
6090 #endif
6091 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6092   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr);
6093 #endif
6094 #if defined(PETSC_HAVE_MKL_SPARSE)
6095   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6096 #endif
6097   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6098   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6099   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6100   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr);
6101 #if defined(PETSC_HAVE_ELEMENTAL)
6102   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6103 #endif
6104 #if defined(PETSC_HAVE_SCALAPACK)
6105   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6106 #endif
6107   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6108   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6109 #if defined(PETSC_HAVE_HYPRE)
6110   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6111   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6112 #endif
6113   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6114   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6115   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6116   PetscFunctionReturn(0);
6117 }
6118 
6119 /*@C
6120      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6121          and "off-diagonal" part of the matrix in CSR format.
6122 
6123    Collective
6124 
6125    Input Parameters:
6126 +  comm - MPI communicator
6127 .  m - number of local rows (Cannot be PETSC_DECIDE)
6128 .  n - This value should be the same as the local size used in creating the
6129        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6130        calculated if N is given) For square matrices n is almost always m.
6131 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6132 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6133 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6134 .   j - column indices
6135 .   a - matrix values
6136 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6137 .   oj - column indices
6138 -   oa - matrix values
6139 
6140    Output Parameter:
6141 .   mat - the matrix
6142 
6143    Level: advanced
6144 
6145    Notes:
6146        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6147        must free the arrays once the matrix has been destroyed and not before.
6148 
6149        The i and j indices are 0 based
6150 
6151        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6152 
6153        This sets local rows and cannot be used to set off-processor values.
6154 
6155        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6156        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6157        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6158        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6159        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6160        communication if it is known that only local entries will be set.
6161 
6162 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6163           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6164 @*/
6165 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6166 {
6167   PetscErrorCode ierr;
6168   Mat_MPIAIJ     *maij;
6169 
6170   PetscFunctionBegin;
6171   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6172   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6173   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6174   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6175   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6176   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6177   maij = (Mat_MPIAIJ*) (*mat)->data;
6178 
6179   (*mat)->preallocated = PETSC_TRUE;
6180 
6181   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6182   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6183 
6184   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6185   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6186 
6187   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6188   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6189   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6190   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6191 
6192   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6193   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6194   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6195   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6196   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6197   PetscFunctionReturn(0);
6198 }
6199 
6200 /*
6201     Special version for direct calls from Fortran
6202 */
6203 #include <petsc/private/fortranimpl.h>
6204 
6205 /* Change these macros so can be used in void function */
6206 #undef CHKERRQ
6207 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6208 #undef SETERRQ2
6209 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6210 #undef SETERRQ3
6211 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6212 #undef SETERRQ
6213 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6214 
6215 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6216 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6217 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6218 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6219 #else
6220 #endif
6221 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6222 {
6223   Mat            mat  = *mmat;
6224   PetscInt       m    = *mm, n = *mn;
6225   InsertMode     addv = *maddv;
6226   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6227   PetscScalar    value;
6228   PetscErrorCode ierr;
6229 
6230   MatCheckPreallocated(mat,1);
6231   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6232   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6233   {
6234     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6235     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6236     PetscBool roworiented = aij->roworiented;
6237 
6238     /* Some Variables required in the macro */
6239     Mat        A                    = aij->A;
6240     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6241     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6242     MatScalar  *aa                  = a->a;
6243     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6244     Mat        B                    = aij->B;
6245     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6246     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6247     MatScalar  *ba                  = b->a;
6248     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6249      * cannot use "#if defined" inside a macro. */
6250     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6251 
6252     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6253     PetscInt  nonew = a->nonew;
6254     MatScalar *ap1,*ap2;
6255 
6256     PetscFunctionBegin;
6257     for (i=0; i<m; i++) {
6258       if (im[i] < 0) continue;
6259       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6260       if (im[i] >= rstart && im[i] < rend) {
6261         row      = im[i] - rstart;
6262         lastcol1 = -1;
6263         rp1      = aj + ai[row];
6264         ap1      = aa + ai[row];
6265         rmax1    = aimax[row];
6266         nrow1    = ailen[row];
6267         low1     = 0;
6268         high1    = nrow1;
6269         lastcol2 = -1;
6270         rp2      = bj + bi[row];
6271         ap2      = ba + bi[row];
6272         rmax2    = bimax[row];
6273         nrow2    = bilen[row];
6274         low2     = 0;
6275         high2    = nrow2;
6276 
6277         for (j=0; j<n; j++) {
6278           if (roworiented) value = v[i*n+j];
6279           else value = v[i+j*m];
6280           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6281           if (in[j] >= cstart && in[j] < cend) {
6282             col = in[j] - cstart;
6283             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6284 #if defined(PETSC_HAVE_DEVICE)
6285             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6286 #endif
6287           } else if (in[j] < 0) continue;
6288           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6289             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6290             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6291           } else {
6292             if (mat->was_assembled) {
6293               if (!aij->colmap) {
6294                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6295               }
6296 #if defined(PETSC_USE_CTABLE)
6297               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6298               col--;
6299 #else
6300               col = aij->colmap[in[j]] - 1;
6301 #endif
6302               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6303                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6304                 col  =  in[j];
6305                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6306                 B        = aij->B;
6307                 b        = (Mat_SeqAIJ*)B->data;
6308                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6309                 rp2      = bj + bi[row];
6310                 ap2      = ba + bi[row];
6311                 rmax2    = bimax[row];
6312                 nrow2    = bilen[row];
6313                 low2     = 0;
6314                 high2    = nrow2;
6315                 bm       = aij->B->rmap->n;
6316                 ba       = b->a;
6317                 inserted = PETSC_FALSE;
6318               }
6319             } else col = in[j];
6320             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6321 #if defined(PETSC_HAVE_DEVICE)
6322             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6323 #endif
6324           }
6325         }
6326       } else if (!aij->donotstash) {
6327         if (roworiented) {
6328           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6329         } else {
6330           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6331         }
6332       }
6333     }
6334   }
6335   PetscFunctionReturnVoid();
6336 }
6337 
6338 typedef struct {
6339   Mat       *mp;    /* intermediate products */
6340   PetscBool *mptmp; /* is the intermediate product temporary ? */
6341   PetscInt  cp;     /* number of intermediate products */
6342 
6343   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6344   PetscInt    *startsj_s,*startsj_r;
6345   PetscScalar *bufa;
6346   Mat         P_oth;
6347 
6348   /* may take advantage of merging product->B */
6349   Mat Bloc; /* B-local by merging diag and off-diag */
6350 
6351   /* cusparse does not have support to split between symbolic and numeric phases.
6352      When api_user is true, we don't need to update the numerical values
6353      of the temporary storage */
6354   PetscBool reusesym;
6355 
6356   /* support for COO values insertion */
6357   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6358   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6359   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6360   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6361   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6362   PetscMemType mtype;
6363 
6364   /* customization */
6365   PetscBool abmerge;
6366   PetscBool P_oth_bind;
6367 } MatMatMPIAIJBACKEND;
6368 
6369 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6370 {
6371   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6372   PetscInt            i;
6373   PetscErrorCode      ierr;
6374 
6375   PetscFunctionBegin;
6376   ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr);
6377   ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr);
6378   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr);
6379   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr);
6380   ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr);
6381   ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr);
6382   ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr);
6383   for (i = 0; i < mmdata->cp; i++) {
6384     ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr);
6385   }
6386   ierr = PetscFree2(mmdata->mp,mmdata->mptmp);CHKERRQ(ierr);
6387   ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr);
6388   ierr = PetscFree(mmdata->own);CHKERRQ(ierr);
6389   ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr);
6390   ierr = PetscFree(mmdata->off);CHKERRQ(ierr);
6391   ierr = PetscFree(mmdata);CHKERRQ(ierr);
6392   PetscFunctionReturn(0);
6393 }
6394 
6395 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6396 {
6397   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6398   PetscErrorCode ierr;
6399 
6400   PetscFunctionBegin;
6401   ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr);
6402   if (f) {
6403     ierr = (*f)(A,n,idx,v);CHKERRQ(ierr);
6404   } else {
6405     const PetscScalar *vv;
6406 
6407     ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr);
6408     if (n && idx) {
6409       PetscScalar    *w = v;
6410       const PetscInt *oi = idx;
6411       PetscInt       j;
6412 
6413       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6414     } else {
6415       ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr);
6416     }
6417     ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr);
6418   }
6419   PetscFunctionReturn(0);
6420 }
6421 
6422 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6423 {
6424   MatMatMPIAIJBACKEND *mmdata;
6425   PetscInt            i,n_d,n_o;
6426   PetscErrorCode      ierr;
6427 
6428   PetscFunctionBegin;
6429   MatCheckProduct(C,1);
6430   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6431   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6432   if (!mmdata->reusesym) { /* update temporary matrices */
6433     if (mmdata->P_oth) {
6434       ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6435     }
6436     if (mmdata->Bloc) {
6437       ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr);
6438     }
6439   }
6440   mmdata->reusesym = PETSC_FALSE;
6441 
6442   for (i = 0; i < mmdata->cp; i++) {
6443     if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6444     ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr);
6445   }
6446   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6447     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6448 
6449     if (mmdata->mptmp[i]) continue;
6450     if (noff) {
6451       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6452 
6453       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr);
6454       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr);
6455       n_o += noff;
6456       n_d += nown;
6457     } else {
6458       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6459 
6460       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr);
6461       n_d += mm->nz;
6462     }
6463   }
6464   if (mmdata->hasoffproc) { /* offprocess insertion */
6465     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6466     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6467   }
6468   ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr);
6469   PetscFunctionReturn(0);
6470 }
6471 
6472 /* Support for Pt * A, A * P, or Pt * A * P */
6473 #define MAX_NUMBER_INTERMEDIATE 4
6474 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6475 {
6476   Mat_Product            *product = C->product;
6477   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6478   Mat_MPIAIJ             *a,*p;
6479   MatMatMPIAIJBACKEND    *mmdata;
6480   ISLocalToGlobalMapping P_oth_l2g = NULL;
6481   IS                     glob = NULL;
6482   const char             *prefix;
6483   char                   pprefix[256];
6484   const PetscInt         *globidx,*P_oth_idx;
6485   PetscInt               i,j,cp,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j;
6486   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6487                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
6488                                                                                         /* a base offset; type-2: sparse with a local to global map table */
6489   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6490 
6491   MatProductType         ptype;
6492   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6493   PetscMPIInt            size;
6494   PetscErrorCode         ierr;
6495 
6496   PetscFunctionBegin;
6497   MatCheckProduct(C,1);
6498   if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6499   ptype = product->type;
6500   if (product->A->symmetric && ptype == MATPRODUCT_AtB) ptype = MATPRODUCT_AB;
6501   switch (ptype) {
6502   case MATPRODUCT_AB:
6503     A = product->A;
6504     P = product->B;
6505     m = A->rmap->n;
6506     n = P->cmap->n;
6507     M = A->rmap->N;
6508     N = P->cmap->N;
6509     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6510     break;
6511   case MATPRODUCT_AtB:
6512     P = product->A;
6513     A = product->B;
6514     m = P->cmap->n;
6515     n = A->cmap->n;
6516     M = P->cmap->N;
6517     N = A->cmap->N;
6518     hasoffproc = PETSC_TRUE;
6519     break;
6520   case MATPRODUCT_PtAP:
6521     A = product->A;
6522     P = product->B;
6523     m = P->cmap->n;
6524     n = P->cmap->n;
6525     M = P->cmap->N;
6526     N = P->cmap->N;
6527     hasoffproc = PETSC_TRUE;
6528     break;
6529   default:
6530     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6531   }
6532   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr);
6533   if (size == 1) hasoffproc = PETSC_FALSE;
6534 
6535   /* defaults */
6536   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6537     mp[i]    = NULL;
6538     mptmp[i] = PETSC_FALSE;
6539     rmapt[i] = -1;
6540     cmapt[i] = -1;
6541     rmapa[i] = NULL;
6542     cmapa[i] = NULL;
6543   }
6544 
6545   /* customization */
6546   ierr = PetscNew(&mmdata);CHKERRQ(ierr);
6547   mmdata->reusesym = product->api_user;
6548   if (ptype == MATPRODUCT_AB) {
6549     if (product->api_user) {
6550       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
6551       ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6552       ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6553       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6554     } else {
6555       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
6556       ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6557       ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6558       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6559     }
6560   } else if (ptype == MATPRODUCT_PtAP) {
6561     if (product->api_user) {
6562       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
6563       ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6564       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6565     } else {
6566       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
6567       ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6568       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6569     }
6570   }
6571   a = (Mat_MPIAIJ*)A->data;
6572   p = (Mat_MPIAIJ*)P->data;
6573   ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr);
6574   ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr);
6575   ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr);
6576   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6577   ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr);
6578 
6579   cp   = 0;
6580   switch (ptype) {
6581   case MATPRODUCT_AB: /* A * P */
6582     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6583 
6584     /* A_diag * P_local (merged or not) */
6585     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
6586       /* P is product->B */
6587       ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6588       ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6589       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6590       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6591       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6592       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6593       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6594       mp[cp]->product->api_user = product->api_user;
6595       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6596       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6597       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6598       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6599       rmapt[cp] = 1;
6600       cmapt[cp] = 2;
6601       cmapa[cp] = globidx;
6602       mptmp[cp] = PETSC_FALSE;
6603       cp++;
6604     } else { /* A_diag * P_diag and A_diag * P_off */
6605       ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr);
6606       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6607       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6608       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6609       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6610       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6611       mp[cp]->product->api_user = product->api_user;
6612       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6613       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6614       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6615       rmapt[cp] = 1;
6616       cmapt[cp] = 1;
6617       mptmp[cp] = PETSC_FALSE;
6618       cp++;
6619       ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr);
6620       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6621       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6622       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6623       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6624       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6625       mp[cp]->product->api_user = product->api_user;
6626       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6627       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6628       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6629       rmapt[cp] = 1;
6630       cmapt[cp] = 2;
6631       cmapa[cp] = p->garray;
6632       mptmp[cp] = PETSC_FALSE;
6633       cp++;
6634     }
6635 
6636     /* A_off * P_other */
6637     if (mmdata->P_oth) {
6638       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); /* make P_oth use local col ids */
6639       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6640       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6641       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6642       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6643       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6644       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6645       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6646       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6647       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6648       mp[cp]->product->api_user = product->api_user;
6649       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6650       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6651       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6652       rmapt[cp] = 1;
6653       cmapt[cp] = 2;
6654       cmapa[cp] = P_oth_idx;
6655       mptmp[cp] = PETSC_FALSE;
6656       cp++;
6657     }
6658     break;
6659 
6660   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
6661     /* A is product->B */
6662     ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6663     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
6664       ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6665       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6666       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6667       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6668       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6669       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6670       mp[cp]->product->api_user = product->api_user;
6671       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6672       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6673       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6674       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6675       rmapt[cp] = 2;
6676       rmapa[cp] = globidx;
6677       cmapt[cp] = 2;
6678       cmapa[cp] = globidx;
6679       mptmp[cp] = PETSC_FALSE;
6680       cp++;
6681     } else {
6682       ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6683       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6684       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6685       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6686       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6687       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6688       mp[cp]->product->api_user = product->api_user;
6689       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6690       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6691       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6692       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6693       rmapt[cp] = 1;
6694       cmapt[cp] = 2;
6695       cmapa[cp] = globidx;
6696       mptmp[cp] = PETSC_FALSE;
6697       cp++;
6698       ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6699       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6700       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6701       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6702       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6703       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6704       mp[cp]->product->api_user = product->api_user;
6705       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6706       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6707       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6708       rmapt[cp] = 2;
6709       rmapa[cp] = p->garray;
6710       cmapt[cp] = 2;
6711       cmapa[cp] = globidx;
6712       mptmp[cp] = PETSC_FALSE;
6713       cp++;
6714     }
6715     break;
6716   case MATPRODUCT_PtAP:
6717     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6718     /* P is product->B */
6719     ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6720     ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6721     ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr);
6722     ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6723     ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6724     ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6725     ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6726     mp[cp]->product->api_user = product->api_user;
6727     ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6728     if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6729     ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6730     ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6731     rmapt[cp] = 2;
6732     rmapa[cp] = globidx;
6733     cmapt[cp] = 2;
6734     cmapa[cp] = globidx;
6735     mptmp[cp] = PETSC_FALSE;
6736     cp++;
6737     if (mmdata->P_oth) {
6738       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr);
6739       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6740       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6741       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6742       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6743       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6744       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6745       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6746       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6747       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6748       mp[cp]->product->api_user = product->api_user;
6749       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6750       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6751       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6752       mptmp[cp] = PETSC_TRUE;
6753       cp++;
6754       ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr);
6755       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6756       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6757       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6758       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6759       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6760       mp[cp]->product->api_user = product->api_user;
6761       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6762       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6763       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6764       rmapt[cp] = 2;
6765       rmapa[cp] = globidx;
6766       cmapt[cp] = 2;
6767       cmapa[cp] = P_oth_idx;
6768       mptmp[cp] = PETSC_FALSE;
6769       cp++;
6770     }
6771     break;
6772   default:
6773     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6774   }
6775   /* sanity check */
6776   if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i);
6777 
6778   ierr = PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp);CHKERRQ(ierr);
6779   for (i = 0; i < cp; i++) {
6780     mmdata->mp[i]    = mp[i];
6781     mmdata->mptmp[i] = mptmp[i];
6782   }
6783   mmdata->cp = cp;
6784   C->product->data       = mmdata;
6785   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
6786   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
6787 
6788   /* memory type */
6789   mmdata->mtype = PETSC_MEMTYPE_HOST;
6790   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr);
6791   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr);
6792   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
6793   // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented
6794   //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE;
6795 
6796   /* prepare coo coordinates for values insertion */
6797 
6798   /* count total nonzeros of those intermediate seqaij Mats
6799     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
6800     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
6801     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
6802   */
6803   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
6804     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6805     if (mptmp[cp]) continue;
6806     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
6807       const PetscInt *rmap = rmapa[cp];
6808       const PetscInt mr = mp[cp]->rmap->n;
6809       const PetscInt rs = C->rmap->rstart;
6810       const PetscInt re = C->rmap->rend;
6811       const PetscInt *ii  = mm->i;
6812       for (i = 0; i < mr; i++) {
6813         const PetscInt gr = rmap[i];
6814         const PetscInt nz = ii[i+1] - ii[i];
6815         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
6816         else ncoo_oown += nz; /* this row is local */
6817       }
6818     } else ncoo_d += mm->nz;
6819   }
6820 
6821   /*
6822     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
6823 
6824     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
6825 
6826     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
6827 
6828     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
6829     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
6830     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
6831 
6832     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
6833     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
6834   */
6835   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); /* +1 to make a csr-like data structure */
6836   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr);
6837 
6838   /* gather (i,j) of nonzeros inserted by remote procs */
6839   if (hasoffproc) {
6840     PetscSF  msf;
6841     PetscInt ncoo2,*coo_i2,*coo_j2;
6842 
6843     ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr);
6844     ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr);
6845     ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); /* to collect (i,j) of entries to be sent to others */
6846 
6847     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
6848       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6849       PetscInt   *idxoff = mmdata->off[cp];
6850       PetscInt   *idxown = mmdata->own[cp];
6851       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
6852         const PetscInt *rmap = rmapa[cp];
6853         const PetscInt *cmap = cmapa[cp];
6854         const PetscInt *ii  = mm->i;
6855         PetscInt       *coi = coo_i + ncoo_o;
6856         PetscInt       *coj = coo_j + ncoo_o;
6857         const PetscInt mr = mp[cp]->rmap->n;
6858         const PetscInt rs = C->rmap->rstart;
6859         const PetscInt re = C->rmap->rend;
6860         const PetscInt cs = C->cmap->rstart;
6861         for (i = 0; i < mr; i++) {
6862           const PetscInt *jj = mm->j + ii[i];
6863           const PetscInt gr  = rmap[i];
6864           const PetscInt nz  = ii[i+1] - ii[i];
6865           if (gr < rs || gr >= re) { /* this is an offproc row */
6866             for (j = ii[i]; j < ii[i+1]; j++) {
6867               *coi++ = gr;
6868               *idxoff++ = j;
6869             }
6870             if (!cmapt[cp]) { /* already global */
6871               for (j = 0; j < nz; j++) *coj++ = jj[j];
6872             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6873               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6874             } else { /* offdiag */
6875               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6876             }
6877             ncoo_o += nz;
6878           } else { /* this is a local row */
6879             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
6880           }
6881         }
6882       }
6883       mmdata->off[cp + 1] = idxoff;
6884       mmdata->own[cp + 1] = idxown;
6885     }
6886 
6887     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6888     ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr);
6889     ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr);
6890     ierr = PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL);CHKERRQ(ierr);
6891     ncoo = ncoo_d + ncoo_oown + ncoo2;
6892     ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr);
6893     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); /* put (i,j) of remote nonzeros at back */
6894     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6895     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6896     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6897     ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6898     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
6899     ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr);
6900     coo_i = coo_i2;
6901     coo_j = coo_j2;
6902   } else { /* no offproc values insertion */
6903     ncoo = ncoo_d;
6904     ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr);
6905 
6906     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6907     ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr);
6908     ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr);
6909   }
6910   mmdata->hasoffproc = hasoffproc;
6911 
6912    /* gather (i,j) of nonzeros inserted locally */
6913   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
6914     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
6915     PetscInt       *coi = coo_i + ncoo_d;
6916     PetscInt       *coj = coo_j + ncoo_d;
6917     const PetscInt *jj  = mm->j;
6918     const PetscInt *ii  = mm->i;
6919     const PetscInt *cmap = cmapa[cp];
6920     const PetscInt *rmap = rmapa[cp];
6921     const PetscInt mr = mp[cp]->rmap->n;
6922     const PetscInt rs = C->rmap->rstart;
6923     const PetscInt re = C->rmap->rend;
6924     const PetscInt cs = C->cmap->rstart;
6925 
6926     if (mptmp[cp]) continue;
6927     if (rmapt[cp] == 1) { /* consecutive rows */
6928       /* fill coo_i */
6929       for (i = 0; i < mr; i++) {
6930         const PetscInt gr = i + rs;
6931         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
6932       }
6933       /* fill coo_j */
6934       if (!cmapt[cp]) { /* type-0, already global */
6935         ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr);
6936       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
6937         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
6938       } else { /* type-2, local to global for sparse columns */
6939         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
6940       }
6941       ncoo_d += mm->nz;
6942     } else if (rmapt[cp] == 2) { /* sparse rows */
6943       for (i = 0; i < mr; i++) {
6944         const PetscInt *jj = mm->j + ii[i];
6945         const PetscInt gr  = rmap[i];
6946         const PetscInt nz  = ii[i+1] - ii[i];
6947         if (gr >= rs && gr < re) { /* local rows */
6948           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
6949           if (!cmapt[cp]) { /* type-0, already global */
6950             for (j = 0; j < nz; j++) *coj++ = jj[j];
6951           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6952             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6953           } else { /* type-2, local to global for sparse columns */
6954             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6955           }
6956           ncoo_d += nz;
6957         }
6958       }
6959     }
6960   }
6961   if (glob) {
6962     ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr);
6963   }
6964   ierr = ISDestroy(&glob);CHKERRQ(ierr);
6965   if (P_oth_l2g) {
6966     ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6967   }
6968   ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr);
6969   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
6970   ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr);
6971 
6972   /* preallocate with COO data */
6973   ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr);
6974   ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6975   PetscFunctionReturn(0);
6976 }
6977 
6978 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
6979 {
6980   Mat_Product    *product = mat->product;
6981   PetscErrorCode ierr;
6982 #if defined(PETSC_HAVE_DEVICE)
6983   PetscBool      match = PETSC_FALSE;
6984   PetscBool      usecpu = PETSC_FALSE;
6985 #else
6986   PetscBool      match = PETSC_TRUE;
6987 #endif
6988 
6989   PetscFunctionBegin;
6990   MatCheckProduct(mat,1);
6991 #if defined(PETSC_HAVE_DEVICE)
6992   if (!product->A->boundtocpu && !product->B->boundtocpu) {
6993     ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr);
6994   }
6995   if (match) { /* we can always fallback to the CPU if requested */
6996     switch (product->type) {
6997     case MATPRODUCT_AB:
6998       if (product->api_user) {
6999         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
7000         ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7001         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7002       } else {
7003         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
7004         ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7005         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7006       }
7007       break;
7008     case MATPRODUCT_AtB:
7009       if (product->api_user) {
7010         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr);
7011         ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7012         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7013       } else {
7014         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr);
7015         ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7016         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7017       }
7018       break;
7019     case MATPRODUCT_PtAP:
7020       if (product->api_user) {
7021         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
7022         ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7023         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7024       } else {
7025         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
7026         ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7027         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7028       }
7029       break;
7030     default:
7031       break;
7032     }
7033     match = (PetscBool)!usecpu;
7034   }
7035 #endif
7036   if (match) {
7037     switch (product->type) {
7038     case MATPRODUCT_AB:
7039     case MATPRODUCT_AtB:
7040     case MATPRODUCT_PtAP:
7041       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7042       break;
7043     default:
7044       break;
7045     }
7046   }
7047   /* fallback to MPIAIJ ops */
7048   if (!mat->ops->productsymbolic) {
7049     ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr);
7050   }
7051   PetscFunctionReturn(0);
7052 }
7053