xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision f2e2784ee781ab53297b5e7ed5f71c2be92534c1)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb,*aav,*bav;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = NULL;
92 
93   ia   = a->i;
94   ib   = b->i;
95   ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr);
96   ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr);
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) {
101       cnt++;
102       goto ok1;
103     }
104     aa = aav + ia[i];
105     for (j=0; j<na; j++) {
106       if (aa[j] != 0.0) goto ok1;
107     }
108     bb = bav + ib[i];
109     for (j=0; j <nb; j++) {
110       if (bb[j] != 0.0) goto ok1;
111     }
112     cnt++;
113 ok1:;
114   }
115   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
116   if (!n0rows) {
117     ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
118     ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
119     PetscFunctionReturn(0);
120   }
121   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
122   cnt  = 0;
123   for (i=0; i<m; i++) {
124     na = ia[i+1] - ia[i];
125     nb = ib[i+1] - ib[i];
126     if (!na && !nb) continue;
127     aa = aav + ia[i];
128     for (j=0; j<na;j++) {
129       if (aa[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134     bb = bav + ib[i];
135     for (j=0; j<nb; j++) {
136       if (bb[j] != 0.0) {
137         rows[cnt++] = rstart + i;
138         goto ok2;
139       }
140     }
141 ok2:;
142   }
143   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
144   ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
145   ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
150 {
151   PetscErrorCode    ierr;
152   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
153   PetscBool         cong;
154 
155   PetscFunctionBegin;
156   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
157   if (Y->assembled && cong) {
158     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
159   } else {
160     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
161   }
162   PetscFunctionReturn(0);
163 }
164 
165 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
166 {
167   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
168   PetscErrorCode ierr;
169   PetscInt       i,rstart,nrows,*rows;
170 
171   PetscFunctionBegin;
172   *zrows = NULL;
173   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
174   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
175   for (i=0; i<nrows; i++) rows[i] += rstart;
176   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
181 {
182   PetscErrorCode    ierr;
183   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
184   PetscInt          i,n,*garray = aij->garray;
185   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
186   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
187   PetscReal         *work;
188   const PetscScalar *dummy;
189 
190   PetscFunctionBegin;
191   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
192   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
193   ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr);
194   ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr);
195   ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr);
196   ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr);
197   if (type == NORM_2) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
203     }
204   } else if (type == NORM_1) {
205     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
206       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
207     }
208     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
209       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
210     }
211   } else if (type == NORM_INFINITY) {
212     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
213       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
214     }
215     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
216       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
217     }
218 
219   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
220   if (type == NORM_INFINITY) {
221     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
222   } else {
223     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
224   }
225   ierr = PetscFree(work);CHKERRQ(ierr);
226   if (type == NORM_2) {
227     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
228   }
229   PetscFunctionReturn(0);
230 }
231 
232 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
233 {
234   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
235   IS              sis,gis;
236   PetscErrorCode  ierr;
237   const PetscInt  *isis,*igis;
238   PetscInt        n,*iis,nsis,ngis,rstart,i;
239 
240   PetscFunctionBegin;
241   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
242   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
243   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
244   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
245   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
246   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
247 
248   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
249   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
250   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
251   n    = ngis + nsis;
252   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
253   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
254   for (i=0; i<n; i++) iis[i] += rstart;
255   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
256 
257   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
258   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
259   ierr = ISDestroy(&sis);CHKERRQ(ierr);
260   ierr = ISDestroy(&gis);CHKERRQ(ierr);
261   PetscFunctionReturn(0);
262 }
263 
264 /*
265   Local utility routine that creates a mapping from the global column
266 number to the local number in the off-diagonal part of the local
267 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
268 a slightly higher hash table cost; without it it is not scalable (each processor
269 has an order N integer array but is fast to access.
270 */
271 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
272 {
273   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
274   PetscErrorCode ierr;
275   PetscInt       n = aij->B->cmap->n,i;
276 
277   PetscFunctionBegin;
278   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
279 #if defined(PETSC_USE_CTABLE)
280   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
281   for (i=0; i<n; i++) {
282     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
283   }
284 #else
285   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
286   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
287   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
288 #endif
289   PetscFunctionReturn(0);
290 }
291 
292 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
293 { \
294     if (col <= lastcol1)  low1 = 0;     \
295     else                 high1 = nrow1; \
296     lastcol1 = col;\
297     while (high1-low1 > 5) { \
298       t = (low1+high1)/2; \
299       if (rp1[t] > col) high1 = t; \
300       else              low1  = t; \
301     } \
302       for (_i=low1; _i<high1; _i++) { \
303         if (rp1[_i] > col) break; \
304         if (rp1[_i] == col) { \
305           if (addv == ADD_VALUES) { \
306             ap1[_i] += value;   \
307             /* Not sure LogFlops will slow dow the code or not */ \
308             (void)PetscLogFlops(1.0);   \
309            } \
310           else                    ap1[_i] = value; \
311           inserted = PETSC_TRUE; \
312           goto a_noinsert; \
313         } \
314       }  \
315       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
316       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
317       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
318       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
319       N = nrow1++ - 1; a->nz++; high1++; \
320       /* shift up all the later entries in this row */ \
321       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
322       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
323       rp1[_i] = col;  \
324       ap1[_i] = value;  \
325       A->nonzerostate++;\
326       a_noinsert: ; \
327       ailen[row] = nrow1; \
328 }
329 
330 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
331   { \
332     if (col <= lastcol2) low2 = 0;                        \
333     else high2 = nrow2;                                   \
334     lastcol2 = col;                                       \
335     while (high2-low2 > 5) {                              \
336       t = (low2+high2)/2;                                 \
337       if (rp2[t] > col) high2 = t;                        \
338       else             low2  = t;                         \
339     }                                                     \
340     for (_i=low2; _i<high2; _i++) {                       \
341       if (rp2[_i] > col) break;                           \
342       if (rp2[_i] == col) {                               \
343         if (addv == ADD_VALUES) {                         \
344           ap2[_i] += value;                               \
345           (void)PetscLogFlops(1.0);                       \
346         }                                                 \
347         else                    ap2[_i] = value;          \
348         inserted = PETSC_TRUE;                            \
349         goto b_noinsert;                                  \
350       }                                                   \
351     }                                                     \
352     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
353     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
354     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
355     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
356     N = nrow2++ - 1; b->nz++; high2++;                    \
357     /* shift up all the later entries in this row */      \
358     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
359     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
360     rp2[_i] = col;                                        \
361     ap2[_i] = value;                                      \
362     B->nonzerostate++;                                    \
363     b_noinsert: ;                                         \
364     bilen[row] = nrow2;                                   \
365   }
366 
367 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
368 {
369   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
370   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
371   PetscErrorCode ierr;
372   PetscInt       l,*garray = mat->garray,diag;
373 
374   PetscFunctionBegin;
375   /* code only works for square matrices A */
376 
377   /* find size of row to the left of the diagonal part */
378   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
379   row  = row - diag;
380   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
381     if (garray[b->j[b->i[row]+l]] > diag) break;
382   }
383   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
384 
385   /* diagonal part */
386   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
387 
388   /* right of diagonal part */
389   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
390 #if defined(PETSC_HAVE_DEVICE)
391   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
392 #endif
393   PetscFunctionReturn(0);
394 }
395 
396 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
397 {
398   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
399   PetscScalar    value = 0.0;
400   PetscErrorCode ierr;
401   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
402   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
403   PetscBool      roworiented = aij->roworiented;
404 
405   /* Some Variables required in the macro */
406   Mat        A                    = aij->A;
407   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
408   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
409   PetscBool  ignorezeroentries    = a->ignorezeroentries;
410   Mat        B                    = aij->B;
411   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
412   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
413   MatScalar  *aa,*ba;
414   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
415    * cannot use "#if defined" inside a macro. */
416   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
417 
418   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
419   PetscInt  nonew;
420   MatScalar *ap1,*ap2;
421 
422   PetscFunctionBegin;
423 #if defined(PETSC_HAVE_DEVICE)
424   if (A->offloadmask == PETSC_OFFLOAD_GPU) {
425     const PetscScalar *dummy;
426     ierr = MatSeqAIJGetArrayRead(A,&dummy);CHKERRQ(ierr);
427     ierr = MatSeqAIJRestoreArrayRead(A,&dummy);CHKERRQ(ierr);
428   }
429   if (B->offloadmask == PETSC_OFFLOAD_GPU) {
430     const PetscScalar *dummy;
431     ierr = MatSeqAIJGetArrayRead(B,&dummy);CHKERRQ(ierr);
432     ierr = MatSeqAIJRestoreArrayRead(B,&dummy);CHKERRQ(ierr);
433   }
434 #endif
435   aa = a->a;
436   ba = b->a;
437   for (i=0; i<m; i++) {
438     if (im[i] < 0) continue;
439     if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
440     if (im[i] >= rstart && im[i] < rend) {
441       row      = im[i] - rstart;
442       lastcol1 = -1;
443       rp1      = aj + ai[row];
444       ap1      = aa + ai[row];
445       rmax1    = aimax[row];
446       nrow1    = ailen[row];
447       low1     = 0;
448       high1    = nrow1;
449       lastcol2 = -1;
450       rp2      = bj + bi[row];
451       ap2      = ba + bi[row];
452       rmax2    = bimax[row];
453       nrow2    = bilen[row];
454       low2     = 0;
455       high2    = nrow2;
456 
457       for (j=0; j<n; j++) {
458         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
459         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
460         if (in[j] >= cstart && in[j] < cend) {
461           col   = in[j] - cstart;
462           nonew = a->nonew;
463           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
464 #if defined(PETSC_HAVE_DEVICE)
465           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
466 #endif
467         } else if (in[j] < 0) continue;
468         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
469         else {
470           if (mat->was_assembled) {
471             if (!aij->colmap) {
472               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
473             }
474 #if defined(PETSC_USE_CTABLE)
475             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
476             col--;
477 #else
478             col = aij->colmap[in[j]] - 1;
479 #endif
480             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
481               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
482               col  =  in[j];
483               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
484               B        = aij->B;
485               b        = (Mat_SeqAIJ*)B->data;
486               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
487               rp2      = bj + bi[row];
488               ap2      = ba + bi[row];
489               rmax2    = bimax[row];
490               nrow2    = bilen[row];
491               low2     = 0;
492               high2    = nrow2;
493               bm       = aij->B->rmap->n;
494               ba       = b->a;
495               inserted = PETSC_FALSE;
496             } else if (col < 0) {
497               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
498                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
499               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
500             }
501           } else col = in[j];
502           nonew = b->nonew;
503           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
504 #if defined(PETSC_HAVE_DEVICE)
505           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
506 #endif
507         }
508       }
509     } else {
510       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
511       if (!aij->donotstash) {
512         mat->assembled = PETSC_FALSE;
513         if (roworiented) {
514           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
515         } else {
516           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
517         }
518       }
519     }
520   }
521   PetscFunctionReturn(0);
522 }
523 
524 /*
525     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
526     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
527     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
528 */
529 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
530 {
531   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
532   Mat            A           = aij->A; /* diagonal part of the matrix */
533   Mat            B           = aij->B; /* offdiagonal part of the matrix */
534   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
535   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
536   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
537   PetscInt       *ailen      = a->ilen,*aj = a->j;
538   PetscInt       *bilen      = b->ilen,*bj = b->j;
539   PetscInt       am          = aij->A->rmap->n,j;
540   PetscInt       diag_so_far = 0,dnz;
541   PetscInt       offd_so_far = 0,onz;
542 
543   PetscFunctionBegin;
544   /* Iterate over all rows of the matrix */
545   for (j=0; j<am; j++) {
546     dnz = onz = 0;
547     /*  Iterate over all non-zero columns of the current row */
548     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
549       /* If column is in the diagonal */
550       if (mat_j[col] >= cstart && mat_j[col] < cend) {
551         aj[diag_so_far++] = mat_j[col] - cstart;
552         dnz++;
553       } else { /* off-diagonal entries */
554         bj[offd_so_far++] = mat_j[col];
555         onz++;
556       }
557     }
558     ailen[j] = dnz;
559     bilen[j] = onz;
560   }
561   PetscFunctionReturn(0);
562 }
563 
564 /*
565     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
566     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
567     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
568     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
569     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
570 */
571 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
572 {
573   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
574   Mat            A      = aij->A; /* diagonal part of the matrix */
575   Mat            B      = aij->B; /* offdiagonal part of the matrix */
576   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
577   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
578   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
579   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
580   PetscInt       *ailen = a->ilen,*aj = a->j;
581   PetscInt       *bilen = b->ilen,*bj = b->j;
582   PetscInt       am     = aij->A->rmap->n,j;
583   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
584   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
585   PetscScalar    *aa = a->a,*ba = b->a;
586 
587   PetscFunctionBegin;
588   /* Iterate over all rows of the matrix */
589   for (j=0; j<am; j++) {
590     dnz_row = onz_row = 0;
591     rowstart_offd = full_offd_i[j];
592     rowstart_diag = full_diag_i[j];
593     /*  Iterate over all non-zero columns of the current row */
594     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
595       /* If column is in the diagonal */
596       if (mat_j[col] >= cstart && mat_j[col] < cend) {
597         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
598         aa[rowstart_diag+dnz_row] = mat_a[col];
599         dnz_row++;
600       } else { /* off-diagonal entries */
601         bj[rowstart_offd+onz_row] = mat_j[col];
602         ba[rowstart_offd+onz_row] = mat_a[col];
603         onz_row++;
604       }
605     }
606     ailen[j] = dnz_row;
607     bilen[j] = onz_row;
608   }
609   PetscFunctionReturn(0);
610 }
611 
612 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
613 {
614   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
615   PetscErrorCode ierr;
616   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
617   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
618 
619   PetscFunctionBegin;
620   for (i=0; i<m; i++) {
621     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
622     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
623     if (idxm[i] >= rstart && idxm[i] < rend) {
624       row = idxm[i] - rstart;
625       for (j=0; j<n; j++) {
626         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
627         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
628         if (idxn[j] >= cstart && idxn[j] < cend) {
629           col  = idxn[j] - cstart;
630           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
631         } else {
632           if (!aij->colmap) {
633             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
634           }
635 #if defined(PETSC_USE_CTABLE)
636           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
637           col--;
638 #else
639           col = aij->colmap[idxn[j]] - 1;
640 #endif
641           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
642           else {
643             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
644           }
645         }
646       }
647     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
648   }
649   PetscFunctionReturn(0);
650 }
651 
652 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
653 {
654   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
655   PetscErrorCode ierr;
656   PetscInt       nstash,reallocs;
657 
658   PetscFunctionBegin;
659   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
660 
661   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
662   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
663   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
668 {
669   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
670   PetscErrorCode ierr;
671   PetscMPIInt    n;
672   PetscInt       i,j,rstart,ncols,flg;
673   PetscInt       *row,*col;
674   PetscBool      other_disassembled;
675   PetscScalar    *val;
676 
677   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
678 
679   PetscFunctionBegin;
680   if (!aij->donotstash && !mat->nooffprocentries) {
681     while (1) {
682       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
683       if (!flg) break;
684 
685       for (i=0; i<n;) {
686         /* Now identify the consecutive vals belonging to the same row */
687         for (j=i,rstart=row[j]; j<n; j++) {
688           if (row[j] != rstart) break;
689         }
690         if (j < n) ncols = j-i;
691         else       ncols = n-i;
692         /* Now assemble all these values with a single function call */
693         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
694         i    = j;
695       }
696     }
697     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
698   }
699 #if defined(PETSC_HAVE_DEVICE)
700   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
701   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
702   if (mat->boundtocpu) {
703     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
704     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
705   }
706 #endif
707   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
708   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
709 
710   /* determine if any processor has disassembled, if so we must
711      also disassemble ourself, in order that we may reassemble. */
712   /*
713      if nonzero structure of submatrix B cannot change then we know that
714      no processor disassembled thus we can skip this stuff
715   */
716   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
717     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
718     if (mat->was_assembled && !other_disassembled) {
719 #if defined(PETSC_HAVE_DEVICE)
720       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
721 #endif
722       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
723     }
724   }
725   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
726     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
727   }
728   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
729 #if defined(PETSC_HAVE_DEVICE)
730   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
731 #endif
732   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
733   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
734 
735   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
736 
737   aij->rowvalues = NULL;
738 
739   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
740 
741   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
742   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
743     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
744     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
745   }
746 #if defined(PETSC_HAVE_DEVICE)
747   mat->offloadmask = PETSC_OFFLOAD_BOTH;
748 #endif
749   PetscFunctionReturn(0);
750 }
751 
752 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
753 {
754   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
755   PetscErrorCode ierr;
756 
757   PetscFunctionBegin;
758   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
759   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
760   PetscFunctionReturn(0);
761 }
762 
763 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
764 {
765   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
766   PetscObjectState sA, sB;
767   PetscInt        *lrows;
768   PetscInt         r, len;
769   PetscBool        cong, lch, gch;
770   PetscErrorCode   ierr;
771 
772   PetscFunctionBegin;
773   /* get locally owned rows */
774   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
775   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
776   /* fix right hand side if needed */
777   if (x && b) {
778     const PetscScalar *xx;
779     PetscScalar       *bb;
780 
781     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
782     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
783     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
784     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
785     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
786     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
787   }
788 
789   sA = mat->A->nonzerostate;
790   sB = mat->B->nonzerostate;
791 
792   if (diag != 0.0 && cong) {
793     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
794     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
795   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
796     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
797     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
798     PetscInt   nnwA, nnwB;
799     PetscBool  nnzA, nnzB;
800 
801     nnwA = aijA->nonew;
802     nnwB = aijB->nonew;
803     nnzA = aijA->keepnonzeropattern;
804     nnzB = aijB->keepnonzeropattern;
805     if (!nnzA) {
806       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
807       aijA->nonew = 0;
808     }
809     if (!nnzB) {
810       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
811       aijB->nonew = 0;
812     }
813     /* Must zero here before the next loop */
814     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
815     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
816     for (r = 0; r < len; ++r) {
817       const PetscInt row = lrows[r] + A->rmap->rstart;
818       if (row >= A->cmap->N) continue;
819       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
820     }
821     aijA->nonew = nnwA;
822     aijB->nonew = nnwB;
823   } else {
824     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
825     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
826   }
827   ierr = PetscFree(lrows);CHKERRQ(ierr);
828   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
829   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
830 
831   /* reduce nonzerostate */
832   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
833   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
834   if (gch) A->nonzerostate++;
835   PetscFunctionReturn(0);
836 }
837 
838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
839 {
840   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
841   PetscErrorCode    ierr;
842   PetscMPIInt       n = A->rmap->n;
843   PetscInt          i,j,r,m,len = 0;
844   PetscInt          *lrows,*owners = A->rmap->range;
845   PetscMPIInt       p = 0;
846   PetscSFNode       *rrows;
847   PetscSF           sf;
848   const PetscScalar *xx;
849   PetscScalar       *bb,*mask;
850   Vec               xmask,lmask;
851   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
852   const PetscInt    *aj, *ii,*ridx;
853   PetscScalar       *aa;
854 
855   PetscFunctionBegin;
856   /* Create SF where leaves are input rows and roots are owned rows */
857   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
858   for (r = 0; r < n; ++r) lrows[r] = -1;
859   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
860   for (r = 0; r < N; ++r) {
861     const PetscInt idx   = rows[r];
862     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
863     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
864       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
865     }
866     rrows[r].rank  = p;
867     rrows[r].index = rows[r] - owners[p];
868   }
869   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
870   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
871   /* Collect flags for rows to be zeroed */
872   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
873   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
874   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
875   /* Compress and put in row numbers */
876   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
877   /* zero diagonal part of matrix */
878   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
879   /* handle off diagonal part of matrix */
880   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
881   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
882   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
883   for (i=0; i<len; i++) bb[lrows[i]] = 1;
884   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
885   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
888   if (x && b) { /* this code is buggy when the row and column layout don't match */
889     PetscBool cong;
890 
891     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
892     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
893     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
894     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
895     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
896     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
897   }
898   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
899   /* remove zeroed rows of off diagonal matrix */
900   ii = aij->i;
901   for (i=0; i<len; i++) {
902     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
903   }
904   /* loop over all elements of off process part of matrix zeroing removed columns*/
905   if (aij->compressedrow.use) {
906     m    = aij->compressedrow.nrows;
907     ii   = aij->compressedrow.i;
908     ridx = aij->compressedrow.rindex;
909     for (i=0; i<m; i++) {
910       n  = ii[i+1] - ii[i];
911       aj = aij->j + ii[i];
912       aa = aij->a + ii[i];
913 
914       for (j=0; j<n; j++) {
915         if (PetscAbsScalar(mask[*aj])) {
916           if (b) bb[*ridx] -= *aa*xx[*aj];
917           *aa = 0.0;
918         }
919         aa++;
920         aj++;
921       }
922       ridx++;
923     }
924   } else { /* do not use compressed row format */
925     m = l->B->rmap->n;
926     for (i=0; i<m; i++) {
927       n  = ii[i+1] - ii[i];
928       aj = aij->j + ii[i];
929       aa = aij->a + ii[i];
930       for (j=0; j<n; j++) {
931         if (PetscAbsScalar(mask[*aj])) {
932           if (b) bb[i] -= *aa*xx[*aj];
933           *aa = 0.0;
934         }
935         aa++;
936         aj++;
937       }
938     }
939   }
940   if (x && b) {
941     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
942     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
943   }
944   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
945   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
946   ierr = PetscFree(lrows);CHKERRQ(ierr);
947 
948   /* only change matrix nonzero state if pattern was allowed to be changed */
949   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
950     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
951     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
952   }
953   PetscFunctionReturn(0);
954 }
955 
956 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
957 {
958   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
959   PetscErrorCode ierr;
960   PetscInt       nt;
961   VecScatter     Mvctx = a->Mvctx;
962 
963   PetscFunctionBegin;
964   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
965   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
966   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
967   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
968   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
969   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
970   PetscFunctionReturn(0);
971 }
972 
973 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
974 {
975   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
976   PetscErrorCode ierr;
977 
978   PetscFunctionBegin;
979   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
980   PetscFunctionReturn(0);
981 }
982 
983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
984 {
985   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
986   PetscErrorCode ierr;
987   VecScatter     Mvctx = a->Mvctx;
988 
989   PetscFunctionBegin;
990   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
991   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
992   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
993   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
994   PetscFunctionReturn(0);
995 }
996 
997 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
998 {
999   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1000   PetscErrorCode ierr;
1001 
1002   PetscFunctionBegin;
1003   /* do nondiagonal part */
1004   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1005   /* do local part */
1006   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1007   /* add partial results together */
1008   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1009   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1014 {
1015   MPI_Comm       comm;
1016   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1017   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1018   IS             Me,Notme;
1019   PetscErrorCode ierr;
1020   PetscInt       M,N,first,last,*notme,i;
1021   PetscBool      lf;
1022   PetscMPIInt    size;
1023 
1024   PetscFunctionBegin;
1025   /* Easy test: symmetric diagonal block */
1026   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1027   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1028   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1029   if (!*f) PetscFunctionReturn(0);
1030   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1031   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1032   if (size == 1) PetscFunctionReturn(0);
1033 
1034   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1035   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1036   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1037   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1038   for (i=0; i<first; i++) notme[i] = i;
1039   for (i=last; i<M; i++) notme[i-last+first] = i;
1040   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1041   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1042   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1043   Aoff = Aoffs[0];
1044   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1045   Boff = Boffs[0];
1046   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1047   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1048   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1049   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1050   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1051   ierr = PetscFree(notme);CHKERRQ(ierr);
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1056 {
1057   PetscErrorCode ierr;
1058 
1059   PetscFunctionBegin;
1060   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1065 {
1066   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1067   PetscErrorCode ierr;
1068 
1069   PetscFunctionBegin;
1070   /* do nondiagonal part */
1071   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1072   /* do local part */
1073   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1074   /* add partial results together */
1075   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1076   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 /*
1081   This only works correctly for square matrices where the subblock A->A is the
1082    diagonal block
1083 */
1084 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1085 {
1086   PetscErrorCode ierr;
1087   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1088 
1089   PetscFunctionBegin;
1090   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1091   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1092   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1097 {
1098   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1099   PetscErrorCode ierr;
1100 
1101   PetscFunctionBegin;
1102   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1103   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1108 {
1109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1110   PetscErrorCode ierr;
1111 
1112   PetscFunctionBegin;
1113 #if defined(PETSC_USE_LOG)
1114   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1115 #endif
1116   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1117   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1118   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1119   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1120 #if defined(PETSC_USE_CTABLE)
1121   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1122 #else
1123   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1124 #endif
1125   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1126   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1127   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1128   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1129   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1130   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1131 
1132   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1133   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1134 
1135   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1136   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1137   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1138   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1139   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1140   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1141   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1142   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1143   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1144   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1145 #if defined(PETSC_HAVE_CUDA)
1146   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1147 #endif
1148 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1149   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr);
1150 #endif
1151 #if defined(PETSC_HAVE_ELEMENTAL)
1152   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1153 #endif
1154 #if defined(PETSC_HAVE_SCALAPACK)
1155   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1156 #endif
1157 #if defined(PETSC_HAVE_HYPRE)
1158   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1159   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1160 #endif
1161   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1162   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1163   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1164   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr);
1165   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr);
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr);
1167 #if defined(PETSC_HAVE_MKL_SPARSE)
1168   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr);
1169 #endif
1170   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr);
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr);
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1177 {
1178   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1179   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1180   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1181   const PetscInt    *garray = aij->garray;
1182   const PetscScalar *aa,*ba;
1183   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1184   PetscInt          *rowlens;
1185   PetscInt          *colidxs;
1186   PetscScalar       *matvals;
1187   PetscErrorCode    ierr;
1188 
1189   PetscFunctionBegin;
1190   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1191 
1192   M  = mat->rmap->N;
1193   N  = mat->cmap->N;
1194   m  = mat->rmap->n;
1195   rs = mat->rmap->rstart;
1196   cs = mat->cmap->rstart;
1197   nz = A->nz + B->nz;
1198 
1199   /* write matrix header */
1200   header[0] = MAT_FILE_CLASSID;
1201   header[1] = M; header[2] = N; header[3] = nz;
1202   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1203   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1204 
1205   /* fill in and store row lengths  */
1206   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1207   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1208   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1209   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1210 
1211   /* fill in and store column indices */
1212   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1213   for (cnt=0, i=0; i<m; i++) {
1214     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1215       if (garray[B->j[jb]] > cs) break;
1216       colidxs[cnt++] = garray[B->j[jb]];
1217     }
1218     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1219       colidxs[cnt++] = A->j[ja] + cs;
1220     for (; jb<B->i[i+1]; jb++)
1221       colidxs[cnt++] = garray[B->j[jb]];
1222   }
1223   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1224   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1225   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1226 
1227   /* fill in and store nonzero values */
1228   ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr);
1229   ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr);
1230   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1231   for (cnt=0, i=0; i<m; i++) {
1232     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1233       if (garray[B->j[jb]] > cs) break;
1234       matvals[cnt++] = ba[jb];
1235     }
1236     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1237       matvals[cnt++] = aa[ja];
1238     for (; jb<B->i[i+1]; jb++)
1239       matvals[cnt++] = ba[jb];
1240   }
1241   ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr);
1242   ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr);
1243   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1244   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1245   ierr = PetscFree(matvals);CHKERRQ(ierr);
1246 
1247   /* write block size option to the viewer's .info file */
1248   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #include <petscdraw.h>
1253 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1254 {
1255   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1256   PetscErrorCode    ierr;
1257   PetscMPIInt       rank = aij->rank,size = aij->size;
1258   PetscBool         isdraw,iascii,isbinary;
1259   PetscViewer       sviewer;
1260   PetscViewerFormat format;
1261 
1262   PetscFunctionBegin;
1263   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1264   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1265   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1266   if (iascii) {
1267     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1268     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1269       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1270       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1271       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1272       for (i=0; i<(PetscInt)size; i++) {
1273         nmax = PetscMax(nmax,nz[i]);
1274         nmin = PetscMin(nmin,nz[i]);
1275         navg += nz[i];
1276       }
1277       ierr = PetscFree(nz);CHKERRQ(ierr);
1278       navg = navg/size;
1279       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1280       PetscFunctionReturn(0);
1281     }
1282     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1283     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1284       MatInfo   info;
1285       PetscBool inodes;
1286 
1287       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
1288       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1289       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1290       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1291       if (!inodes) {
1292         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1293                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1294       } else {
1295         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1296                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1297       }
1298       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1299       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1300       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1301       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1302       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1303       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1304       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1305       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1306       PetscFunctionReturn(0);
1307     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1308       PetscInt inodecount,inodelimit,*inodes;
1309       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1310       if (inodes) {
1311         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1312       } else {
1313         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1314       }
1315       PetscFunctionReturn(0);
1316     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1317       PetscFunctionReturn(0);
1318     }
1319   } else if (isbinary) {
1320     if (size == 1) {
1321       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1322       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1323     } else {
1324       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1325     }
1326     PetscFunctionReturn(0);
1327   } else if (iascii && size == 1) {
1328     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1329     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1330     PetscFunctionReturn(0);
1331   } else if (isdraw) {
1332     PetscDraw draw;
1333     PetscBool isnull;
1334     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1335     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1336     if (isnull) PetscFunctionReturn(0);
1337   }
1338 
1339   { /* assemble the entire matrix onto first processor */
1340     Mat A = NULL, Av;
1341     IS  isrow,iscol;
1342 
1343     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1344     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1345     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1346     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1347 /*  The commented code uses MatCreateSubMatrices instead */
1348 /*
1349     Mat *AA, A = NULL, Av;
1350     IS  isrow,iscol;
1351 
1352     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1353     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1354     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1355     if (!rank) {
1356        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1357        A    = AA[0];
1358        Av   = AA[0];
1359     }
1360     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1361 */
1362     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1363     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1364     /*
1365        Everyone has to call to draw the matrix since the graphics waits are
1366        synchronized across all processors that share the PetscDraw object
1367     */
1368     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1369     if (!rank) {
1370       if (((PetscObject)mat)->name) {
1371         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1372       }
1373       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1374     }
1375     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1376     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1377     ierr = MatDestroy(&A);CHKERRQ(ierr);
1378   }
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1383 {
1384   PetscErrorCode ierr;
1385   PetscBool      iascii,isdraw,issocket,isbinary;
1386 
1387   PetscFunctionBegin;
1388   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1389   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1390   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1391   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1392   if (iascii || isdraw || isbinary || issocket) {
1393     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1394   }
1395   PetscFunctionReturn(0);
1396 }
1397 
1398 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1399 {
1400   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1401   PetscErrorCode ierr;
1402   Vec            bb1 = NULL;
1403   PetscBool      hasop;
1404 
1405   PetscFunctionBegin;
1406   if (flag == SOR_APPLY_UPPER) {
1407     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1408     PetscFunctionReturn(0);
1409   }
1410 
1411   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1412     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1413   }
1414 
1415   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1416     if (flag & SOR_ZERO_INITIAL_GUESS) {
1417       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1418       its--;
1419     }
1420 
1421     while (its--) {
1422       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1423       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1424 
1425       /* update rhs: bb1 = bb - B*x */
1426       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1427       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1428 
1429       /* local sweep */
1430       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1431     }
1432   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1433     if (flag & SOR_ZERO_INITIAL_GUESS) {
1434       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1435       its--;
1436     }
1437     while (its--) {
1438       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1439       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1440 
1441       /* update rhs: bb1 = bb - B*x */
1442       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1443       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1444 
1445       /* local sweep */
1446       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1447     }
1448   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1449     if (flag & SOR_ZERO_INITIAL_GUESS) {
1450       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1451       its--;
1452     }
1453     while (its--) {
1454       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1455       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1456 
1457       /* update rhs: bb1 = bb - B*x */
1458       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1459       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1460 
1461       /* local sweep */
1462       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1463     }
1464   } else if (flag & SOR_EISENSTAT) {
1465     Vec xx1;
1466 
1467     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1468     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1469 
1470     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1471     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1472     if (!mat->diag) {
1473       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1474       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1475     }
1476     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1477     if (hasop) {
1478       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1479     } else {
1480       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1481     }
1482     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1483 
1484     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1485 
1486     /* local sweep */
1487     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1488     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1489     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1490   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1491 
1492   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1493 
1494   matin->factorerrortype = mat->A->factorerrortype;
1495   PetscFunctionReturn(0);
1496 }
1497 
1498 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1499 {
1500   Mat            aA,aB,Aperm;
1501   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1502   PetscScalar    *aa,*ba;
1503   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1504   PetscSF        rowsf,sf;
1505   IS             parcolp = NULL;
1506   PetscBool      done;
1507   PetscErrorCode ierr;
1508 
1509   PetscFunctionBegin;
1510   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1511   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1512   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1513   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1514 
1515   /* Invert row permutation to find out where my rows should go */
1516   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1517   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1518   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1519   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1520   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1521   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1522 
1523   /* Invert column permutation to find out where my columns should go */
1524   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1525   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1526   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1527   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1528   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1529   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1530   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1531 
1532   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1533   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1534   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1535 
1536   /* Find out where my gcols should go */
1537   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1538   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1539   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1540   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1541   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1542   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1543   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1544   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1545 
1546   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1547   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1548   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1549   for (i=0; i<m; i++) {
1550     PetscInt    row = rdest[i];
1551     PetscMPIInt rowner;
1552     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1553     for (j=ai[i]; j<ai[i+1]; j++) {
1554       PetscInt    col = cdest[aj[j]];
1555       PetscMPIInt cowner;
1556       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1557       if (rowner == cowner) dnnz[i]++;
1558       else onnz[i]++;
1559     }
1560     for (j=bi[i]; j<bi[i+1]; j++) {
1561       PetscInt    col = gcdest[bj[j]];
1562       PetscMPIInt cowner;
1563       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1564       if (rowner == cowner) dnnz[i]++;
1565       else onnz[i]++;
1566     }
1567   }
1568   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1569   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1570   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1571   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1572   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1573 
1574   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1575   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1576   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1577   for (i=0; i<m; i++) {
1578     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1579     PetscInt j0,rowlen;
1580     rowlen = ai[i+1] - ai[i];
1581     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1582       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1583       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1584     }
1585     rowlen = bi[i+1] - bi[i];
1586     for (j0=j=0; j<rowlen; j0=j) {
1587       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1588       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1589     }
1590   }
1591   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1592   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1593   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1594   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1595   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1596   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1597   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1598   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1599   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1600   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1601   *B = Aperm;
1602   PetscFunctionReturn(0);
1603 }
1604 
1605 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1606 {
1607   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1608   PetscErrorCode ierr;
1609 
1610   PetscFunctionBegin;
1611   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1612   if (ghosts) *ghosts = aij->garray;
1613   PetscFunctionReturn(0);
1614 }
1615 
1616 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1617 {
1618   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1619   Mat            A    = mat->A,B = mat->B;
1620   PetscErrorCode ierr;
1621   PetscLogDouble isend[5],irecv[5];
1622 
1623   PetscFunctionBegin;
1624   info->block_size = 1.0;
1625   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1626 
1627   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1628   isend[3] = info->memory;  isend[4] = info->mallocs;
1629 
1630   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1631 
1632   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1633   isend[3] += info->memory;  isend[4] += info->mallocs;
1634   if (flag == MAT_LOCAL) {
1635     info->nz_used      = isend[0];
1636     info->nz_allocated = isend[1];
1637     info->nz_unneeded  = isend[2];
1638     info->memory       = isend[3];
1639     info->mallocs      = isend[4];
1640   } else if (flag == MAT_GLOBAL_MAX) {
1641     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1642 
1643     info->nz_used      = irecv[0];
1644     info->nz_allocated = irecv[1];
1645     info->nz_unneeded  = irecv[2];
1646     info->memory       = irecv[3];
1647     info->mallocs      = irecv[4];
1648   } else if (flag == MAT_GLOBAL_SUM) {
1649     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1650 
1651     info->nz_used      = irecv[0];
1652     info->nz_allocated = irecv[1];
1653     info->nz_unneeded  = irecv[2];
1654     info->memory       = irecv[3];
1655     info->mallocs      = irecv[4];
1656   }
1657   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1658   info->fill_ratio_needed = 0;
1659   info->factor_mallocs    = 0;
1660   PetscFunctionReturn(0);
1661 }
1662 
1663 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1664 {
1665   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1666   PetscErrorCode ierr;
1667 
1668   PetscFunctionBegin;
1669   switch (op) {
1670   case MAT_NEW_NONZERO_LOCATIONS:
1671   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1672   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1673   case MAT_KEEP_NONZERO_PATTERN:
1674   case MAT_NEW_NONZERO_LOCATION_ERR:
1675   case MAT_USE_INODES:
1676   case MAT_IGNORE_ZERO_ENTRIES:
1677     MatCheckPreallocated(A,1);
1678     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1679     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1680     break;
1681   case MAT_ROW_ORIENTED:
1682     MatCheckPreallocated(A,1);
1683     a->roworiented = flg;
1684 
1685     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1686     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1687     break;
1688   case MAT_FORCE_DIAGONAL_ENTRIES:
1689   case MAT_SORTED_FULL:
1690     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1691     break;
1692   case MAT_IGNORE_OFF_PROC_ENTRIES:
1693     a->donotstash = flg;
1694     break;
1695   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1696   case MAT_SPD:
1697   case MAT_SYMMETRIC:
1698   case MAT_STRUCTURALLY_SYMMETRIC:
1699   case MAT_HERMITIAN:
1700   case MAT_SYMMETRY_ETERNAL:
1701     break;
1702   case MAT_SUBMAT_SINGLEIS:
1703     A->submat_singleis = flg;
1704     break;
1705   case MAT_STRUCTURE_ONLY:
1706     /* The option is handled directly by MatSetOption() */
1707     break;
1708   default:
1709     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1710   }
1711   PetscFunctionReturn(0);
1712 }
1713 
1714 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1715 {
1716   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1717   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1718   PetscErrorCode ierr;
1719   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1720   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1721   PetscInt       *cmap,*idx_p;
1722 
1723   PetscFunctionBegin;
1724   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1725   mat->getrowactive = PETSC_TRUE;
1726 
1727   if (!mat->rowvalues && (idx || v)) {
1728     /*
1729         allocate enough space to hold information from the longest row.
1730     */
1731     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1732     PetscInt   max = 1,tmp;
1733     for (i=0; i<matin->rmap->n; i++) {
1734       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1735       if (max < tmp) max = tmp;
1736     }
1737     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1738   }
1739 
1740   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1741   lrow = row - rstart;
1742 
1743   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1744   if (!v)   {pvA = NULL; pvB = NULL;}
1745   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1746   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1747   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1748   nztot = nzA + nzB;
1749 
1750   cmap = mat->garray;
1751   if (v  || idx) {
1752     if (nztot) {
1753       /* Sort by increasing column numbers, assuming A and B already sorted */
1754       PetscInt imark = -1;
1755       if (v) {
1756         *v = v_p = mat->rowvalues;
1757         for (i=0; i<nzB; i++) {
1758           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1759           else break;
1760         }
1761         imark = i;
1762         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1763         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1764       }
1765       if (idx) {
1766         *idx = idx_p = mat->rowindices;
1767         if (imark > -1) {
1768           for (i=0; i<imark; i++) {
1769             idx_p[i] = cmap[cworkB[i]];
1770           }
1771         } else {
1772           for (i=0; i<nzB; i++) {
1773             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1774             else break;
1775           }
1776           imark = i;
1777         }
1778         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1779         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1780       }
1781     } else {
1782       if (idx) *idx = NULL;
1783       if (v)   *v   = NULL;
1784     }
1785   }
1786   *nz  = nztot;
1787   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1788   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1793 {
1794   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1795 
1796   PetscFunctionBegin;
1797   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1798   aij->getrowactive = PETSC_FALSE;
1799   PetscFunctionReturn(0);
1800 }
1801 
1802 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1803 {
1804   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1805   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1806   PetscErrorCode ierr;
1807   PetscInt       i,j,cstart = mat->cmap->rstart;
1808   PetscReal      sum = 0.0;
1809   MatScalar      *v;
1810 
1811   PetscFunctionBegin;
1812   if (aij->size == 1) {
1813     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1814   } else {
1815     if (type == NORM_FROBENIUS) {
1816       v = amat->a;
1817       for (i=0; i<amat->nz; i++) {
1818         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1819       }
1820       v = bmat->a;
1821       for (i=0; i<bmat->nz; i++) {
1822         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1823       }
1824       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1825       *norm = PetscSqrtReal(*norm);
1826       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1827     } else if (type == NORM_1) { /* max column norm */
1828       PetscReal *tmp,*tmp2;
1829       PetscInt  *jj,*garray = aij->garray;
1830       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1831       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1832       *norm = 0.0;
1833       v     = amat->a; jj = amat->j;
1834       for (j=0; j<amat->nz; j++) {
1835         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1836       }
1837       v = bmat->a; jj = bmat->j;
1838       for (j=0; j<bmat->nz; j++) {
1839         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1840       }
1841       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1842       for (j=0; j<mat->cmap->N; j++) {
1843         if (tmp2[j] > *norm) *norm = tmp2[j];
1844       }
1845       ierr = PetscFree(tmp);CHKERRQ(ierr);
1846       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1847       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1848     } else if (type == NORM_INFINITY) { /* max row norm */
1849       PetscReal ntemp = 0.0;
1850       for (j=0; j<aij->A->rmap->n; j++) {
1851         v   = amat->a + amat->i[j];
1852         sum = 0.0;
1853         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1854           sum += PetscAbsScalar(*v); v++;
1855         }
1856         v = bmat->a + bmat->i[j];
1857         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1858           sum += PetscAbsScalar(*v); v++;
1859         }
1860         if (sum > ntemp) ntemp = sum;
1861       }
1862       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1863       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1864     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1865   }
1866   PetscFunctionReturn(0);
1867 }
1868 
1869 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1870 {
1871   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1872   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1873   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1874   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1875   PetscErrorCode  ierr;
1876   Mat             B,A_diag,*B_diag;
1877   const MatScalar *pbv,*bv;
1878 
1879   PetscFunctionBegin;
1880   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1881   ai = Aloc->i; aj = Aloc->j;
1882   bi = Bloc->i; bj = Bloc->j;
1883   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1884     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1885     PetscSFNode          *oloc;
1886     PETSC_UNUSED PetscSF sf;
1887 
1888     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1889     /* compute d_nnz for preallocation */
1890     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
1891     for (i=0; i<ai[ma]; i++) {
1892       d_nnz[aj[i]]++;
1893     }
1894     /* compute local off-diagonal contributions */
1895     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
1896     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1897     /* map those to global */
1898     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1899     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1900     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1901     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
1902     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1903     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1904     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1905 
1906     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1907     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1908     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1909     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1910     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1911     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1912   } else {
1913     B    = *matout;
1914     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1915   }
1916 
1917   b           = (Mat_MPIAIJ*)B->data;
1918   A_diag      = a->A;
1919   B_diag      = &b->A;
1920   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1921   A_diag_ncol = A_diag->cmap->N;
1922   B_diag_ilen = sub_B_diag->ilen;
1923   B_diag_i    = sub_B_diag->i;
1924 
1925   /* Set ilen for diagonal of B */
1926   for (i=0; i<A_diag_ncol; i++) {
1927     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1928   }
1929 
1930   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1931   very quickly (=without using MatSetValues), because all writes are local. */
1932   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
1933 
1934   /* copy over the B part */
1935   ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
1936   ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr);
1937   pbv  = bv;
1938   row  = A->rmap->rstart;
1939   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1940   cols_tmp = cols;
1941   for (i=0; i<mb; i++) {
1942     ncol = bi[i+1]-bi[i];
1943     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr);
1944     row++;
1945     pbv += ncol; cols_tmp += ncol;
1946   }
1947   ierr = PetscFree(cols);CHKERRQ(ierr);
1948   ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr);
1949 
1950   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1951   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1952   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1953     *matout = B;
1954   } else {
1955     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1956   }
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1961 {
1962   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1963   Mat            a    = aij->A,b = aij->B;
1964   PetscErrorCode ierr;
1965   PetscInt       s1,s2,s3;
1966 
1967   PetscFunctionBegin;
1968   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1969   if (rr) {
1970     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1971     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1972     /* Overlap communication with computation. */
1973     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1974   }
1975   if (ll) {
1976     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1977     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1978     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1979   }
1980   /* scale  the diagonal block */
1981   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1982 
1983   if (rr) {
1984     /* Do a scatter end and then right scale the off-diagonal block */
1985     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1986     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
1987   }
1988   PetscFunctionReturn(0);
1989 }
1990 
1991 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1992 {
1993   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1994   PetscErrorCode ierr;
1995 
1996   PetscFunctionBegin;
1997   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1998   PetscFunctionReturn(0);
1999 }
2000 
2001 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2002 {
2003   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2004   Mat            a,b,c,d;
2005   PetscBool      flg;
2006   PetscErrorCode ierr;
2007 
2008   PetscFunctionBegin;
2009   a = matA->A; b = matA->B;
2010   c = matB->A; d = matB->B;
2011 
2012   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2013   if (flg) {
2014     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2015   }
2016   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2021 {
2022   PetscErrorCode ierr;
2023   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2024   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2025 
2026   PetscFunctionBegin;
2027   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2028   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2029     /* because of the column compression in the off-processor part of the matrix a->B,
2030        the number of columns in a->B and b->B may be different, hence we cannot call
2031        the MatCopy() directly on the two parts. If need be, we can provide a more
2032        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2033        then copying the submatrices */
2034     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2035   } else {
2036     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2037     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2038   }
2039   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2040   PetscFunctionReturn(0);
2041 }
2042 
2043 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2044 {
2045   PetscErrorCode ierr;
2046 
2047   PetscFunctionBegin;
2048   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 /*
2053    Computes the number of nonzeros per row needed for preallocation when X and Y
2054    have different nonzero structure.
2055 */
2056 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2057 {
2058   PetscInt       i,j,k,nzx,nzy;
2059 
2060   PetscFunctionBegin;
2061   /* Set the number of nonzeros in the new matrix */
2062   for (i=0; i<m; i++) {
2063     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2064     nzx = xi[i+1] - xi[i];
2065     nzy = yi[i+1] - yi[i];
2066     nnz[i] = 0;
2067     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2068       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2069       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2070       nnz[i]++;
2071     }
2072     for (; k<nzy; k++) nnz[i]++;
2073   }
2074   PetscFunctionReturn(0);
2075 }
2076 
2077 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2078 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2079 {
2080   PetscErrorCode ierr;
2081   PetscInt       m = Y->rmap->N;
2082   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2083   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2084 
2085   PetscFunctionBegin;
2086   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2091 {
2092   PetscErrorCode ierr;
2093   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2094 
2095   PetscFunctionBegin;
2096   if (str == SAME_NONZERO_PATTERN) {
2097     ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr);
2098     ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr);
2099   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2100     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2101   } else {
2102     Mat      B;
2103     PetscInt *nnz_d,*nnz_o;
2104 
2105     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2106     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2107     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2108     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2109     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2110     ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr);
2111     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2112     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2113     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2114     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2115     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2116     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2117     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2118   }
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2123 
2124 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2125 {
2126 #if defined(PETSC_USE_COMPLEX)
2127   PetscErrorCode ierr;
2128   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2129 
2130   PetscFunctionBegin;
2131   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2132   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2133 #else
2134   PetscFunctionBegin;
2135 #endif
2136   PetscFunctionReturn(0);
2137 }
2138 
2139 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2140 {
2141   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2142   PetscErrorCode ierr;
2143 
2144   PetscFunctionBegin;
2145   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2146   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2151 {
2152   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2153   PetscErrorCode ierr;
2154 
2155   PetscFunctionBegin;
2156   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2157   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2158   PetscFunctionReturn(0);
2159 }
2160 
2161 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2162 {
2163   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2164   PetscErrorCode    ierr;
2165   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2166   PetscScalar       *va,*vv;
2167   Vec               vB,vA;
2168   const PetscScalar *vb;
2169 
2170   PetscFunctionBegin;
2171   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
2172   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2173 
2174   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2175   if (idx) {
2176     for (i=0; i<m; i++) {
2177       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2178     }
2179   }
2180 
2181   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
2182   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
2183   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2184 
2185   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2186   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2187   for (i=0; i<m; i++) {
2188     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2189       vv[i] = vb[i];
2190       if (idx) idx[i] = a->garray[idxb[i]];
2191     } else {
2192       vv[i] = va[i];
2193       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2194         idx[i] = a->garray[idxb[i]];
2195     }
2196   }
2197   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2198   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2199   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2200   ierr = PetscFree(idxb);CHKERRQ(ierr);
2201   ierr = VecDestroy(&vA);CHKERRQ(ierr);
2202   ierr = VecDestroy(&vB);CHKERRQ(ierr);
2203   PetscFunctionReturn(0);
2204 }
2205 
2206 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2207 {
2208   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2209   PetscInt          m = A->rmap->n,n = A->cmap->n;
2210   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2211   PetscInt          *cmap  = mat->garray;
2212   PetscInt          *diagIdx, *offdiagIdx;
2213   Vec               diagV, offdiagV;
2214   PetscScalar       *a, *diagA, *offdiagA;
2215   const PetscScalar *ba,*bav;
2216   PetscInt          r,j,col,ncols,*bi,*bj;
2217   PetscErrorCode    ierr;
2218   Mat               B = mat->B;
2219   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2220 
2221   PetscFunctionBegin;
2222   /* When a process holds entire A and other processes have no entry */
2223   if (A->cmap->N == n) {
2224     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2225     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2226     ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr);
2227     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2228     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2229     PetscFunctionReturn(0);
2230   } else if (n == 0) {
2231     if (m) {
2232       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2233       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2234       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2235     }
2236     PetscFunctionReturn(0);
2237   }
2238 
2239   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2240   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2241   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2242   ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2243 
2244   /* Get offdiagIdx[] for implicit 0.0 */
2245   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2246   ba   = bav;
2247   bi   = b->i;
2248   bj   = b->j;
2249   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2250   for (r = 0; r < m; r++) {
2251     ncols = bi[r+1] - bi[r];
2252     if (ncols == A->cmap->N - n) { /* Brow is dense */
2253       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2254     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2255       offdiagA[r] = 0.0;
2256 
2257       /* Find first hole in the cmap */
2258       for (j=0; j<ncols; j++) {
2259         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2260         if (col > j && j < cstart) {
2261           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2262           break;
2263         } else if (col > j + n && j >= cstart) {
2264           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2265           break;
2266         }
2267       }
2268       if (j == ncols && ncols < A->cmap->N - n) {
2269         /* a hole is outside compressed Bcols */
2270         if (ncols == 0) {
2271           if (cstart) {
2272             offdiagIdx[r] = 0;
2273           } else offdiagIdx[r] = cend;
2274         } else { /* ncols > 0 */
2275           offdiagIdx[r] = cmap[ncols-1] + 1;
2276           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2277         }
2278       }
2279     }
2280 
2281     for (j=0; j<ncols; j++) {
2282       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2283       ba++; bj++;
2284     }
2285   }
2286 
2287   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2288   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2289   for (r = 0; r < m; ++r) {
2290     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2291       a[r]   = diagA[r];
2292       if (idx) idx[r] = cstart + diagIdx[r];
2293     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2294       a[r] = diagA[r];
2295       if (idx) {
2296         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2297           idx[r] = cstart + diagIdx[r];
2298         } else idx[r] = offdiagIdx[r];
2299       }
2300     } else {
2301       a[r]   = offdiagA[r];
2302       if (idx) idx[r] = offdiagIdx[r];
2303     }
2304   }
2305   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2306   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2307   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2308   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2309   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2310   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2311   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2312   PetscFunctionReturn(0);
2313 }
2314 
2315 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2316 {
2317   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2318   PetscInt          m = A->rmap->n,n = A->cmap->n;
2319   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2320   PetscInt          *cmap  = mat->garray;
2321   PetscInt          *diagIdx, *offdiagIdx;
2322   Vec               diagV, offdiagV;
2323   PetscScalar       *a, *diagA, *offdiagA;
2324   const PetscScalar *ba,*bav;
2325   PetscInt          r,j,col,ncols,*bi,*bj;
2326   PetscErrorCode    ierr;
2327   Mat               B = mat->B;
2328   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2329 
2330   PetscFunctionBegin;
2331   /* When a process holds entire A and other processes have no entry */
2332   if (A->cmap->N == n) {
2333     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2334     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2335     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2336     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2337     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2338     PetscFunctionReturn(0);
2339   } else if (n == 0) {
2340     if (m) {
2341       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2342       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2343       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2344     }
2345     PetscFunctionReturn(0);
2346   }
2347 
2348   ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2349   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2350   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2351   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2352 
2353   /* Get offdiagIdx[] for implicit 0.0 */
2354   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2355   ba   = bav;
2356   bi   = b->i;
2357   bj   = b->j;
2358   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2359   for (r = 0; r < m; r++) {
2360     ncols = bi[r+1] - bi[r];
2361     if (ncols == A->cmap->N - n) { /* Brow is dense */
2362       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2363     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2364       offdiagA[r] = 0.0;
2365 
2366       /* Find first hole in the cmap */
2367       for (j=0; j<ncols; j++) {
2368         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2369         if (col > j && j < cstart) {
2370           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2371           break;
2372         } else if (col > j + n && j >= cstart) {
2373           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2374           break;
2375         }
2376       }
2377       if (j == ncols && ncols < A->cmap->N - n) {
2378         /* a hole is outside compressed Bcols */
2379         if (ncols == 0) {
2380           if (cstart) {
2381             offdiagIdx[r] = 0;
2382           } else offdiagIdx[r] = cend;
2383         } else { /* ncols > 0 */
2384           offdiagIdx[r] = cmap[ncols-1] + 1;
2385           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2386         }
2387       }
2388     }
2389 
2390     for (j=0; j<ncols; j++) {
2391       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2392       ba++; bj++;
2393     }
2394   }
2395 
2396   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2397   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2398   for (r = 0; r < m; ++r) {
2399     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2400       a[r]   = diagA[r];
2401       if (idx) idx[r] = cstart + diagIdx[r];
2402     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2403       a[r] = diagA[r];
2404       if (idx) {
2405         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2406           idx[r] = cstart + diagIdx[r];
2407         } else idx[r] = offdiagIdx[r];
2408       }
2409     } else {
2410       a[r]   = offdiagA[r];
2411       if (idx) idx[r] = offdiagIdx[r];
2412     }
2413   }
2414   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2415   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2416   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2417   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2418   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2419   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2420   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2421   PetscFunctionReturn(0);
2422 }
2423 
2424 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2425 {
2426   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2427   PetscInt          m = A->rmap->n,n = A->cmap->n;
2428   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2429   PetscInt          *cmap  = mat->garray;
2430   PetscInt          *diagIdx, *offdiagIdx;
2431   Vec               diagV, offdiagV;
2432   PetscScalar       *a, *diagA, *offdiagA;
2433   const PetscScalar *ba,*bav;
2434   PetscInt          r,j,col,ncols,*bi,*bj;
2435   PetscErrorCode    ierr;
2436   Mat               B = mat->B;
2437   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2438 
2439   PetscFunctionBegin;
2440   /* When a process holds entire A and other processes have no entry */
2441   if (A->cmap->N == n) {
2442     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2443     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2444     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2445     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2446     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2447     PetscFunctionReturn(0);
2448   } else if (n == 0) {
2449     if (m) {
2450       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2451       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2452       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2453     }
2454     PetscFunctionReturn(0);
2455   }
2456 
2457   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2458   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2459   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2460   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2461 
2462   /* Get offdiagIdx[] for implicit 0.0 */
2463   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2464   ba   = bav;
2465   bi   = b->i;
2466   bj   = b->j;
2467   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2468   for (r = 0; r < m; r++) {
2469     ncols = bi[r+1] - bi[r];
2470     if (ncols == A->cmap->N - n) { /* Brow is dense */
2471       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2472     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2473       offdiagA[r] = 0.0;
2474 
2475       /* Find first hole in the cmap */
2476       for (j=0; j<ncols; j++) {
2477         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2478         if (col > j && j < cstart) {
2479           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2480           break;
2481         } else if (col > j + n && j >= cstart) {
2482           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2483           break;
2484         }
2485       }
2486       if (j == ncols && ncols < A->cmap->N - n) {
2487         /* a hole is outside compressed Bcols */
2488         if (ncols == 0) {
2489           if (cstart) {
2490             offdiagIdx[r] = 0;
2491           } else offdiagIdx[r] = cend;
2492         } else { /* ncols > 0 */
2493           offdiagIdx[r] = cmap[ncols-1] + 1;
2494           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2495         }
2496       }
2497     }
2498 
2499     for (j=0; j<ncols; j++) {
2500       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2501       ba++; bj++;
2502     }
2503   }
2504 
2505   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2506   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2507   for (r = 0; r < m; ++r) {
2508     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2509       a[r] = diagA[r];
2510       if (idx) idx[r] = cstart + diagIdx[r];
2511     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2512       a[r] = diagA[r];
2513       if (idx) {
2514         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2515           idx[r] = cstart + diagIdx[r];
2516         } else idx[r] = offdiagIdx[r];
2517       }
2518     } else {
2519       a[r] = offdiagA[r];
2520       if (idx) idx[r] = offdiagIdx[r];
2521     }
2522   }
2523   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2524   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2525   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2526   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2527   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2528   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2529   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2530   PetscFunctionReturn(0);
2531 }
2532 
2533 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2534 {
2535   PetscErrorCode ierr;
2536   Mat            *dummy;
2537 
2538   PetscFunctionBegin;
2539   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2540   *newmat = *dummy;
2541   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2542   PetscFunctionReturn(0);
2543 }
2544 
2545 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2546 {
2547   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2548   PetscErrorCode ierr;
2549 
2550   PetscFunctionBegin;
2551   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2552   A->factorerrortype = a->A->factorerrortype;
2553   PetscFunctionReturn(0);
2554 }
2555 
2556 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2557 {
2558   PetscErrorCode ierr;
2559   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2560 
2561   PetscFunctionBegin;
2562   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2563   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2564   if (x->assembled) {
2565     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2566   } else {
2567     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2568   }
2569   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2570   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2571   PetscFunctionReturn(0);
2572 }
2573 
2574 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2575 {
2576   PetscFunctionBegin;
2577   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2578   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2579   PetscFunctionReturn(0);
2580 }
2581 
2582 /*@
2583    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2584 
2585    Collective on Mat
2586 
2587    Input Parameters:
2588 +    A - the matrix
2589 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2590 
2591  Level: advanced
2592 
2593 @*/
2594 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2595 {
2596   PetscErrorCode       ierr;
2597 
2598   PetscFunctionBegin;
2599   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2600   PetscFunctionReturn(0);
2601 }
2602 
2603 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2604 {
2605   PetscErrorCode       ierr;
2606   PetscBool            sc = PETSC_FALSE,flg;
2607 
2608   PetscFunctionBegin;
2609   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2610   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2611   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2612   if (flg) {
2613     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2614   }
2615   ierr = PetscOptionsTail();CHKERRQ(ierr);
2616   PetscFunctionReturn(0);
2617 }
2618 
2619 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2620 {
2621   PetscErrorCode ierr;
2622   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2623   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2624 
2625   PetscFunctionBegin;
2626   if (!Y->preallocated) {
2627     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2628   } else if (!aij->nz) {
2629     PetscInt nonew = aij->nonew;
2630     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2631     aij->nonew = nonew;
2632   }
2633   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2634   PetscFunctionReturn(0);
2635 }
2636 
2637 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2638 {
2639   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2640   PetscErrorCode ierr;
2641 
2642   PetscFunctionBegin;
2643   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2644   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2645   if (d) {
2646     PetscInt rstart;
2647     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2648     *d += rstart;
2649 
2650   }
2651   PetscFunctionReturn(0);
2652 }
2653 
2654 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2655 {
2656   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2657   PetscErrorCode ierr;
2658 
2659   PetscFunctionBegin;
2660   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 /* -------------------------------------------------------------------*/
2665 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2666                                        MatGetRow_MPIAIJ,
2667                                        MatRestoreRow_MPIAIJ,
2668                                        MatMult_MPIAIJ,
2669                                 /* 4*/ MatMultAdd_MPIAIJ,
2670                                        MatMultTranspose_MPIAIJ,
2671                                        MatMultTransposeAdd_MPIAIJ,
2672                                        NULL,
2673                                        NULL,
2674                                        NULL,
2675                                 /*10*/ NULL,
2676                                        NULL,
2677                                        NULL,
2678                                        MatSOR_MPIAIJ,
2679                                        MatTranspose_MPIAIJ,
2680                                 /*15*/ MatGetInfo_MPIAIJ,
2681                                        MatEqual_MPIAIJ,
2682                                        MatGetDiagonal_MPIAIJ,
2683                                        MatDiagonalScale_MPIAIJ,
2684                                        MatNorm_MPIAIJ,
2685                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2686                                        MatAssemblyEnd_MPIAIJ,
2687                                        MatSetOption_MPIAIJ,
2688                                        MatZeroEntries_MPIAIJ,
2689                                 /*24*/ MatZeroRows_MPIAIJ,
2690                                        NULL,
2691                                        NULL,
2692                                        NULL,
2693                                        NULL,
2694                                 /*29*/ MatSetUp_MPIAIJ,
2695                                        NULL,
2696                                        NULL,
2697                                        MatGetDiagonalBlock_MPIAIJ,
2698                                        NULL,
2699                                 /*34*/ MatDuplicate_MPIAIJ,
2700                                        NULL,
2701                                        NULL,
2702                                        NULL,
2703                                        NULL,
2704                                 /*39*/ MatAXPY_MPIAIJ,
2705                                        MatCreateSubMatrices_MPIAIJ,
2706                                        MatIncreaseOverlap_MPIAIJ,
2707                                        MatGetValues_MPIAIJ,
2708                                        MatCopy_MPIAIJ,
2709                                 /*44*/ MatGetRowMax_MPIAIJ,
2710                                        MatScale_MPIAIJ,
2711                                        MatShift_MPIAIJ,
2712                                        MatDiagonalSet_MPIAIJ,
2713                                        MatZeroRowsColumns_MPIAIJ,
2714                                 /*49*/ MatSetRandom_MPIAIJ,
2715                                        NULL,
2716                                        NULL,
2717                                        NULL,
2718                                        NULL,
2719                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2720                                        NULL,
2721                                        MatSetUnfactored_MPIAIJ,
2722                                        MatPermute_MPIAIJ,
2723                                        NULL,
2724                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2725                                        MatDestroy_MPIAIJ,
2726                                        MatView_MPIAIJ,
2727                                        NULL,
2728                                        NULL,
2729                                 /*64*/ NULL,
2730                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2735                                        MatGetRowMinAbs_MPIAIJ,
2736                                        NULL,
2737                                        NULL,
2738                                        NULL,
2739                                        NULL,
2740                                 /*75*/ MatFDColoringApply_AIJ,
2741                                        MatSetFromOptions_MPIAIJ,
2742                                        NULL,
2743                                        NULL,
2744                                        MatFindZeroDiagonals_MPIAIJ,
2745                                 /*80*/ NULL,
2746                                        NULL,
2747                                        NULL,
2748                                 /*83*/ MatLoad_MPIAIJ,
2749                                        MatIsSymmetric_MPIAIJ,
2750                                        NULL,
2751                                        NULL,
2752                                        NULL,
2753                                        NULL,
2754                                 /*89*/ NULL,
2755                                        NULL,
2756                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2757                                        NULL,
2758                                        NULL,
2759                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2760                                        NULL,
2761                                        NULL,
2762                                        NULL,
2763                                        MatBindToCPU_MPIAIJ,
2764                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2765                                        NULL,
2766                                        NULL,
2767                                        MatConjugate_MPIAIJ,
2768                                        NULL,
2769                                 /*104*/MatSetValuesRow_MPIAIJ,
2770                                        MatRealPart_MPIAIJ,
2771                                        MatImaginaryPart_MPIAIJ,
2772                                        NULL,
2773                                        NULL,
2774                                 /*109*/NULL,
2775                                        NULL,
2776                                        MatGetRowMin_MPIAIJ,
2777                                        NULL,
2778                                        MatMissingDiagonal_MPIAIJ,
2779                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2780                                        NULL,
2781                                        MatGetGhosts_MPIAIJ,
2782                                        NULL,
2783                                        NULL,
2784                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2785                                        NULL,
2786                                        NULL,
2787                                        NULL,
2788                                        MatGetMultiProcBlock_MPIAIJ,
2789                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2790                                        MatGetColumnNorms_MPIAIJ,
2791                                        MatInvertBlockDiagonal_MPIAIJ,
2792                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2793                                        MatCreateSubMatricesMPI_MPIAIJ,
2794                                 /*129*/NULL,
2795                                        NULL,
2796                                        NULL,
2797                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2798                                        NULL,
2799                                 /*134*/NULL,
2800                                        NULL,
2801                                        NULL,
2802                                        NULL,
2803                                        NULL,
2804                                 /*139*/MatSetBlockSizes_MPIAIJ,
2805                                        NULL,
2806                                        NULL,
2807                                        MatFDColoringSetUp_MPIXAIJ,
2808                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2809                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2810                                 /*145*/NULL,
2811                                        NULL,
2812                                        NULL
2813 };
2814 
2815 /* ----------------------------------------------------------------------------------------*/
2816 
2817 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2818 {
2819   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2820   PetscErrorCode ierr;
2821 
2822   PetscFunctionBegin;
2823   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2824   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2825   PetscFunctionReturn(0);
2826 }
2827 
2828 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2829 {
2830   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2831   PetscErrorCode ierr;
2832 
2833   PetscFunctionBegin;
2834   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2835   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2836   PetscFunctionReturn(0);
2837 }
2838 
2839 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2840 {
2841   Mat_MPIAIJ     *b;
2842   PetscErrorCode ierr;
2843   PetscMPIInt    size;
2844 
2845   PetscFunctionBegin;
2846   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2847   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2848   b = (Mat_MPIAIJ*)B->data;
2849 
2850 #if defined(PETSC_USE_CTABLE)
2851   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2852 #else
2853   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2854 #endif
2855   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2856   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2857   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2858 
2859   /* Because the B will have been resized we simply destroy it and create a new one each time */
2860   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2861   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2862   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2863   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2864   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2865   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2866   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2867 
2868   if (!B->preallocated) {
2869     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2870     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2871     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2872     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2873     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2874   }
2875 
2876   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2877   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2878   B->preallocated  = PETSC_TRUE;
2879   B->was_assembled = PETSC_FALSE;
2880   B->assembled     = PETSC_FALSE;
2881   PetscFunctionReturn(0);
2882 }
2883 
2884 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2885 {
2886   Mat_MPIAIJ     *b;
2887   PetscErrorCode ierr;
2888 
2889   PetscFunctionBegin;
2890   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2891   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2892   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2893   b = (Mat_MPIAIJ*)B->data;
2894 
2895 #if defined(PETSC_USE_CTABLE)
2896   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2897 #else
2898   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2899 #endif
2900   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2901   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2902   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2903 
2904   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2905   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2906   B->preallocated  = PETSC_TRUE;
2907   B->was_assembled = PETSC_FALSE;
2908   B->assembled = PETSC_FALSE;
2909   PetscFunctionReturn(0);
2910 }
2911 
2912 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2913 {
2914   Mat            mat;
2915   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2916   PetscErrorCode ierr;
2917 
2918   PetscFunctionBegin;
2919   *newmat = NULL;
2920   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2921   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2922   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2923   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2924   a       = (Mat_MPIAIJ*)mat->data;
2925 
2926   mat->factortype   = matin->factortype;
2927   mat->assembled    = matin->assembled;
2928   mat->insertmode   = NOT_SET_VALUES;
2929   mat->preallocated = matin->preallocated;
2930 
2931   a->size         = oldmat->size;
2932   a->rank         = oldmat->rank;
2933   a->donotstash   = oldmat->donotstash;
2934   a->roworiented  = oldmat->roworiented;
2935   a->rowindices   = NULL;
2936   a->rowvalues    = NULL;
2937   a->getrowactive = PETSC_FALSE;
2938 
2939   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2940   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2941 
2942   if (oldmat->colmap) {
2943 #if defined(PETSC_USE_CTABLE)
2944     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2945 #else
2946     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2947     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2948     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2949 #endif
2950   } else a->colmap = NULL;
2951   if (oldmat->garray) {
2952     PetscInt len;
2953     len  = oldmat->B->cmap->n;
2954     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2955     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2956     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2957   } else a->garray = NULL;
2958 
2959   /* It may happen MatDuplicate is called with a non-assembled matrix
2960      In fact, MatDuplicate only requires the matrix to be preallocated
2961      This may happen inside a DMCreateMatrix_Shell */
2962   if (oldmat->lvec) {
2963     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2964     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2965   }
2966   if (oldmat->Mvctx) {
2967     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2968     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2969   }
2970   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2971   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2972   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2973   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2974   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2975   *newmat = mat;
2976   PetscFunctionReturn(0);
2977 }
2978 
2979 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2980 {
2981   PetscBool      isbinary, ishdf5;
2982   PetscErrorCode ierr;
2983 
2984   PetscFunctionBegin;
2985   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2986   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2987   /* force binary viewer to load .info file if it has not yet done so */
2988   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2989   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2990   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2991   if (isbinary) {
2992     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2993   } else if (ishdf5) {
2994 #if defined(PETSC_HAVE_HDF5)
2995     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2996 #else
2997     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2998 #endif
2999   } else {
3000     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3001   }
3002   PetscFunctionReturn(0);
3003 }
3004 
3005 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3006 {
3007   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3008   PetscInt       *rowidxs,*colidxs;
3009   PetscScalar    *matvals;
3010   PetscErrorCode ierr;
3011 
3012   PetscFunctionBegin;
3013   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3014 
3015   /* read in matrix header */
3016   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3017   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3018   M  = header[1]; N = header[2]; nz = header[3];
3019   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3020   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3021   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3022 
3023   /* set block sizes from the viewer's .info file */
3024   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3025   /* set global sizes if not set already */
3026   if (mat->rmap->N < 0) mat->rmap->N = M;
3027   if (mat->cmap->N < 0) mat->cmap->N = N;
3028   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3029   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3030 
3031   /* check if the matrix sizes are correct */
3032   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3033   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3034 
3035   /* read in row lengths and build row indices */
3036   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3037   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3038   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3039   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3040   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
3041   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3042   /* read in column indices and matrix values */
3043   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3044   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3045   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3046   /* store matrix indices and values */
3047   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3048   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3049   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3050   PetscFunctionReturn(0);
3051 }
3052 
3053 /* Not scalable because of ISAllGather() unless getting all columns. */
3054 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3055 {
3056   PetscErrorCode ierr;
3057   IS             iscol_local;
3058   PetscBool      isstride;
3059   PetscMPIInt    lisstride=0,gisstride;
3060 
3061   PetscFunctionBegin;
3062   /* check if we are grabbing all columns*/
3063   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3064 
3065   if (isstride) {
3066     PetscInt  start,len,mstart,mlen;
3067     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3068     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3069     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3070     if (mstart == start && mlen-mstart == len) lisstride = 1;
3071   }
3072 
3073   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3074   if (gisstride) {
3075     PetscInt N;
3076     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3077     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3078     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3079     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3080   } else {
3081     PetscInt cbs;
3082     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3083     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3084     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3085   }
3086 
3087   *isseq = iscol_local;
3088   PetscFunctionReturn(0);
3089 }
3090 
3091 /*
3092  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3093  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3094 
3095  Input Parameters:
3096    mat - matrix
3097    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3098            i.e., mat->rstart <= isrow[i] < mat->rend
3099    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3100            i.e., mat->cstart <= iscol[i] < mat->cend
3101  Output Parameter:
3102    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3103    iscol_o - sequential column index set for retrieving mat->B
3104    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3105  */
3106 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3107 {
3108   PetscErrorCode ierr;
3109   Vec            x,cmap;
3110   const PetscInt *is_idx;
3111   PetscScalar    *xarray,*cmaparray;
3112   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3113   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3114   Mat            B=a->B;
3115   Vec            lvec=a->lvec,lcmap;
3116   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3117   MPI_Comm       comm;
3118   VecScatter     Mvctx=a->Mvctx;
3119 
3120   PetscFunctionBegin;
3121   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3122   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3123 
3124   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3125   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3126   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3127   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3128   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3129 
3130   /* Get start indices */
3131   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3132   isstart -= ncols;
3133   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3134 
3135   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3136   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3137   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3138   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3139   for (i=0; i<ncols; i++) {
3140     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3141     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3142     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3143   }
3144   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3145   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3146   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3147 
3148   /* Get iscol_d */
3149   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3150   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3151   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3152 
3153   /* Get isrow_d */
3154   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3155   rstart = mat->rmap->rstart;
3156   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3157   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3158   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3159   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3160 
3161   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3162   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3163   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3164 
3165   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3166   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3167   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3168 
3169   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3170 
3171   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3172   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3173 
3174   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3175   /* off-process column indices */
3176   count = 0;
3177   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3178   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3179 
3180   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3181   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3182   for (i=0; i<Bn; i++) {
3183     if (PetscRealPart(xarray[i]) > -1.0) {
3184       idx[count]     = i;                   /* local column index in off-diagonal part B */
3185       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3186       count++;
3187     }
3188   }
3189   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3190   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3191 
3192   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3193   /* cannot ensure iscol_o has same blocksize as iscol! */
3194 
3195   ierr = PetscFree(idx);CHKERRQ(ierr);
3196   *garray = cmap1;
3197 
3198   ierr = VecDestroy(&x);CHKERRQ(ierr);
3199   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3200   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3201   PetscFunctionReturn(0);
3202 }
3203 
3204 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3205 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3206 {
3207   PetscErrorCode ierr;
3208   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3209   Mat            M = NULL;
3210   MPI_Comm       comm;
3211   IS             iscol_d,isrow_d,iscol_o;
3212   Mat            Asub = NULL,Bsub = NULL;
3213   PetscInt       n;
3214 
3215   PetscFunctionBegin;
3216   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3217 
3218   if (call == MAT_REUSE_MATRIX) {
3219     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3220     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3221     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3222 
3223     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3224     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3225 
3226     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3227     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3228 
3229     /* Update diagonal and off-diagonal portions of submat */
3230     asub = (Mat_MPIAIJ*)(*submat)->data;
3231     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3232     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3233     if (n) {
3234       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3235     }
3236     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3237     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3238 
3239   } else { /* call == MAT_INITIAL_MATRIX) */
3240     const PetscInt *garray;
3241     PetscInt        BsubN;
3242 
3243     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3244     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3245 
3246     /* Create local submatrices Asub and Bsub */
3247     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3248     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3249 
3250     /* Create submatrix M */
3251     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3252 
3253     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3254     asub = (Mat_MPIAIJ*)M->data;
3255 
3256     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3257     n = asub->B->cmap->N;
3258     if (BsubN > n) {
3259       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3260       const PetscInt *idx;
3261       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3262       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3263 
3264       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3265       j = 0;
3266       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3267       for (i=0; i<n; i++) {
3268         if (j >= BsubN) break;
3269         while (subgarray[i] > garray[j]) j++;
3270 
3271         if (subgarray[i] == garray[j]) {
3272           idx_new[i] = idx[j++];
3273         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3274       }
3275       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3276 
3277       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3278       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3279 
3280     } else if (BsubN < n) {
3281       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3282     }
3283 
3284     ierr = PetscFree(garray);CHKERRQ(ierr);
3285     *submat = M;
3286 
3287     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3288     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3289     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3290 
3291     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3292     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3293 
3294     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3295     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3296   }
3297   PetscFunctionReturn(0);
3298 }
3299 
3300 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3301 {
3302   PetscErrorCode ierr;
3303   IS             iscol_local=NULL,isrow_d;
3304   PetscInt       csize;
3305   PetscInt       n,i,j,start,end;
3306   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3307   MPI_Comm       comm;
3308 
3309   PetscFunctionBegin;
3310   /* If isrow has same processor distribution as mat,
3311      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3312   if (call == MAT_REUSE_MATRIX) {
3313     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3314     if (isrow_d) {
3315       sameRowDist  = PETSC_TRUE;
3316       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3317     } else {
3318       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3319       if (iscol_local) {
3320         sameRowDist  = PETSC_TRUE;
3321         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3322       }
3323     }
3324   } else {
3325     /* Check if isrow has same processor distribution as mat */
3326     sameDist[0] = PETSC_FALSE;
3327     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3328     if (!n) {
3329       sameDist[0] = PETSC_TRUE;
3330     } else {
3331       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3332       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3333       if (i >= start && j < end) {
3334         sameDist[0] = PETSC_TRUE;
3335       }
3336     }
3337 
3338     /* Check if iscol has same processor distribution as mat */
3339     sameDist[1] = PETSC_FALSE;
3340     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3341     if (!n) {
3342       sameDist[1] = PETSC_TRUE;
3343     } else {
3344       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3345       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3346       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3347     }
3348 
3349     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3350     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3351     sameRowDist = tsameDist[0];
3352   }
3353 
3354   if (sameRowDist) {
3355     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3356       /* isrow and iscol have same processor distribution as mat */
3357       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3358       PetscFunctionReturn(0);
3359     } else { /* sameRowDist */
3360       /* isrow has same processor distribution as mat */
3361       if (call == MAT_INITIAL_MATRIX) {
3362         PetscBool sorted;
3363         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3364         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3365         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3366         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3367 
3368         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3369         if (sorted) {
3370           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3371           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3372           PetscFunctionReturn(0);
3373         }
3374       } else { /* call == MAT_REUSE_MATRIX */
3375         IS iscol_sub;
3376         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3377         if (iscol_sub) {
3378           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3379           PetscFunctionReturn(0);
3380         }
3381       }
3382     }
3383   }
3384 
3385   /* General case: iscol -> iscol_local which has global size of iscol */
3386   if (call == MAT_REUSE_MATRIX) {
3387     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3388     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3389   } else {
3390     if (!iscol_local) {
3391       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3392     }
3393   }
3394 
3395   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3396   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3397 
3398   if (call == MAT_INITIAL_MATRIX) {
3399     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3400     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3401   }
3402   PetscFunctionReturn(0);
3403 }
3404 
3405 /*@C
3406      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3407          and "off-diagonal" part of the matrix in CSR format.
3408 
3409    Collective
3410 
3411    Input Parameters:
3412 +  comm - MPI communicator
3413 .  A - "diagonal" portion of matrix
3414 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3415 -  garray - global index of B columns
3416 
3417    Output Parameter:
3418 .   mat - the matrix, with input A as its local diagonal matrix
3419    Level: advanced
3420 
3421    Notes:
3422        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3423        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3424 
3425 .seealso: MatCreateMPIAIJWithSplitArrays()
3426 @*/
3427 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3428 {
3429   PetscErrorCode    ierr;
3430   Mat_MPIAIJ        *maij;
3431   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3432   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3433   const PetscScalar *oa;
3434   Mat               Bnew;
3435   PetscInt          m,n,N;
3436 
3437   PetscFunctionBegin;
3438   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3439   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3440   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3441   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3442   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3443   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3444 
3445   /* Get global columns of mat */
3446   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3447 
3448   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3449   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3450   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3451   maij = (Mat_MPIAIJ*)(*mat)->data;
3452 
3453   (*mat)->preallocated = PETSC_TRUE;
3454 
3455   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3456   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3457 
3458   /* Set A as diagonal portion of *mat */
3459   maij->A = A;
3460 
3461   nz = oi[m];
3462   for (i=0; i<nz; i++) {
3463     col   = oj[i];
3464     oj[i] = garray[col];
3465   }
3466 
3467   /* Set Bnew as off-diagonal portion of *mat */
3468   ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr);
3469   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr);
3470   ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr);
3471   bnew        = (Mat_SeqAIJ*)Bnew->data;
3472   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3473   maij->B     = Bnew;
3474 
3475   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3476 
3477   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3478   b->free_a       = PETSC_FALSE;
3479   b->free_ij      = PETSC_FALSE;
3480   ierr = MatDestroy(&B);CHKERRQ(ierr);
3481 
3482   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3483   bnew->free_a       = PETSC_TRUE;
3484   bnew->free_ij      = PETSC_TRUE;
3485 
3486   /* condense columns of maij->B */
3487   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3488   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3489   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3490   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3491   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3492   PetscFunctionReturn(0);
3493 }
3494 
3495 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3496 
3497 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3498 {
3499   PetscErrorCode ierr;
3500   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3501   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3502   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3503   Mat            M,Msub,B=a->B;
3504   MatScalar      *aa;
3505   Mat_SeqAIJ     *aij;
3506   PetscInt       *garray = a->garray,*colsub,Ncols;
3507   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3508   IS             iscol_sub,iscmap;
3509   const PetscInt *is_idx,*cmap;
3510   PetscBool      allcolumns=PETSC_FALSE;
3511   MPI_Comm       comm;
3512 
3513   PetscFunctionBegin;
3514   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3515   if (call == MAT_REUSE_MATRIX) {
3516     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3517     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3518     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3519 
3520     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3521     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3522 
3523     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3524     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3525 
3526     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3527 
3528   } else { /* call == MAT_INITIAL_MATRIX) */
3529     PetscBool flg;
3530 
3531     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3532     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3533 
3534     /* (1) iscol -> nonscalable iscol_local */
3535     /* Check for special case: each processor gets entire matrix columns */
3536     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3537     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3538     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3539     if (allcolumns) {
3540       iscol_sub = iscol_local;
3541       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3542       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3543 
3544     } else {
3545       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3546       PetscInt *idx,*cmap1,k;
3547       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3548       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3549       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3550       count = 0;
3551       k     = 0;
3552       for (i=0; i<Ncols; i++) {
3553         j = is_idx[i];
3554         if (j >= cstart && j < cend) {
3555           /* diagonal part of mat */
3556           idx[count]     = j;
3557           cmap1[count++] = i; /* column index in submat */
3558         } else if (Bn) {
3559           /* off-diagonal part of mat */
3560           if (j == garray[k]) {
3561             idx[count]     = j;
3562             cmap1[count++] = i;  /* column index in submat */
3563           } else if (j > garray[k]) {
3564             while (j > garray[k] && k < Bn-1) k++;
3565             if (j == garray[k]) {
3566               idx[count]     = j;
3567               cmap1[count++] = i; /* column index in submat */
3568             }
3569           }
3570         }
3571       }
3572       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3573 
3574       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3575       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3576       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3577 
3578       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3579     }
3580 
3581     /* (3) Create sequential Msub */
3582     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3583   }
3584 
3585   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3586   aij  = (Mat_SeqAIJ*)(Msub)->data;
3587   ii   = aij->i;
3588   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3589 
3590   /*
3591       m - number of local rows
3592       Ncols - number of columns (same on all processors)
3593       rstart - first row in new global matrix generated
3594   */
3595   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3596 
3597   if (call == MAT_INITIAL_MATRIX) {
3598     /* (4) Create parallel newmat */
3599     PetscMPIInt    rank,size;
3600     PetscInt       csize;
3601 
3602     ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3603     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3604 
3605     /*
3606         Determine the number of non-zeros in the diagonal and off-diagonal
3607         portions of the matrix in order to do correct preallocation
3608     */
3609 
3610     /* first get start and end of "diagonal" columns */
3611     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3612     if (csize == PETSC_DECIDE) {
3613       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3614       if (mglobal == Ncols) { /* square matrix */
3615         nlocal = m;
3616       } else {
3617         nlocal = Ncols/size + ((Ncols % size) > rank);
3618       }
3619     } else {
3620       nlocal = csize;
3621     }
3622     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3623     rstart = rend - nlocal;
3624     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3625 
3626     /* next, compute all the lengths */
3627     jj    = aij->j;
3628     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3629     olens = dlens + m;
3630     for (i=0; i<m; i++) {
3631       jend = ii[i+1] - ii[i];
3632       olen = 0;
3633       dlen = 0;
3634       for (j=0; j<jend; j++) {
3635         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3636         else dlen++;
3637         jj++;
3638       }
3639       olens[i] = olen;
3640       dlens[i] = dlen;
3641     }
3642 
3643     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3644     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3645 
3646     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3647     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3648     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3649     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3650     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3651     ierr = PetscFree(dlens);CHKERRQ(ierr);
3652 
3653   } else { /* call == MAT_REUSE_MATRIX */
3654     M    = *newmat;
3655     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3656     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3657     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3658     /*
3659          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3660        rather than the slower MatSetValues().
3661     */
3662     M->was_assembled = PETSC_TRUE;
3663     M->assembled     = PETSC_FALSE;
3664   }
3665 
3666   /* (5) Set values of Msub to *newmat */
3667   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3668   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3669 
3670   jj   = aij->j;
3671   ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3672   for (i=0; i<m; i++) {
3673     row = rstart + i;
3674     nz  = ii[i+1] - ii[i];
3675     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3676     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3677     jj += nz; aa += nz;
3678   }
3679   ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3680   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3681 
3682   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3683   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3684 
3685   ierr = PetscFree(colsub);CHKERRQ(ierr);
3686 
3687   /* save Msub, iscol_sub and iscmap used in processor for next request */
3688   if (call == MAT_INITIAL_MATRIX) {
3689     *newmat = M;
3690     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3691     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3692 
3693     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3694     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3695 
3696     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3697     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3698 
3699     if (iscol_local) {
3700       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3701       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3702     }
3703   }
3704   PetscFunctionReturn(0);
3705 }
3706 
3707 /*
3708     Not great since it makes two copies of the submatrix, first an SeqAIJ
3709   in local and then by concatenating the local matrices the end result.
3710   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3711 
3712   Note: This requires a sequential iscol with all indices.
3713 */
3714 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3715 {
3716   PetscErrorCode ierr;
3717   PetscMPIInt    rank,size;
3718   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3719   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3720   Mat            M,Mreuse;
3721   MatScalar      *aa,*vwork;
3722   MPI_Comm       comm;
3723   Mat_SeqAIJ     *aij;
3724   PetscBool      colflag,allcolumns=PETSC_FALSE;
3725 
3726   PetscFunctionBegin;
3727   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3728   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3729   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3730 
3731   /* Check for special case: each processor gets entire matrix columns */
3732   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3733   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3734   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3735   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3736 
3737   if (call ==  MAT_REUSE_MATRIX) {
3738     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3739     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3740     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3741   } else {
3742     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3743   }
3744 
3745   /*
3746       m - number of local rows
3747       n - number of columns (same on all processors)
3748       rstart - first row in new global matrix generated
3749   */
3750   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3751   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3752   if (call == MAT_INITIAL_MATRIX) {
3753     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3754     ii  = aij->i;
3755     jj  = aij->j;
3756 
3757     /*
3758         Determine the number of non-zeros in the diagonal and off-diagonal
3759         portions of the matrix in order to do correct preallocation
3760     */
3761 
3762     /* first get start and end of "diagonal" columns */
3763     if (csize == PETSC_DECIDE) {
3764       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3765       if (mglobal == n) { /* square matrix */
3766         nlocal = m;
3767       } else {
3768         nlocal = n/size + ((n % size) > rank);
3769       }
3770     } else {
3771       nlocal = csize;
3772     }
3773     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3774     rstart = rend - nlocal;
3775     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3776 
3777     /* next, compute all the lengths */
3778     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3779     olens = dlens + m;
3780     for (i=0; i<m; i++) {
3781       jend = ii[i+1] - ii[i];
3782       olen = 0;
3783       dlen = 0;
3784       for (j=0; j<jend; j++) {
3785         if (*jj < rstart || *jj >= rend) olen++;
3786         else dlen++;
3787         jj++;
3788       }
3789       olens[i] = olen;
3790       dlens[i] = dlen;
3791     }
3792     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3793     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3794     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3795     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3796     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3797     ierr = PetscFree(dlens);CHKERRQ(ierr);
3798   } else {
3799     PetscInt ml,nl;
3800 
3801     M    = *newmat;
3802     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3803     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3804     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3805     /*
3806          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3807        rather than the slower MatSetValues().
3808     */
3809     M->was_assembled = PETSC_TRUE;
3810     M->assembled     = PETSC_FALSE;
3811   }
3812   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3813   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3814   ii   = aij->i;
3815   jj   = aij->j;
3816 
3817   /* trigger copy to CPU if needed */
3818   ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3819   for (i=0; i<m; i++) {
3820     row   = rstart + i;
3821     nz    = ii[i+1] - ii[i];
3822     cwork = jj; jj += nz;
3823     vwork = aa; aa += nz;
3824     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3825   }
3826   ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3827 
3828   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3829   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3830   *newmat = M;
3831 
3832   /* save submatrix used in processor for next request */
3833   if (call ==  MAT_INITIAL_MATRIX) {
3834     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3835     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3836   }
3837   PetscFunctionReturn(0);
3838 }
3839 
3840 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3841 {
3842   PetscInt       m,cstart, cend,j,nnz,i,d;
3843   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3844   const PetscInt *JJ;
3845   PetscErrorCode ierr;
3846   PetscBool      nooffprocentries;
3847 
3848   PetscFunctionBegin;
3849   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3850 
3851   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3852   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3853   m      = B->rmap->n;
3854   cstart = B->cmap->rstart;
3855   cend   = B->cmap->rend;
3856   rstart = B->rmap->rstart;
3857 
3858   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3859 
3860   if (PetscDefined(USE_DEBUG)) {
3861     for (i=0; i<m; i++) {
3862       nnz = Ii[i+1]- Ii[i];
3863       JJ  = J + Ii[i];
3864       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3865       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3866       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3867     }
3868   }
3869 
3870   for (i=0; i<m; i++) {
3871     nnz     = Ii[i+1]- Ii[i];
3872     JJ      = J + Ii[i];
3873     nnz_max = PetscMax(nnz_max,nnz);
3874     d       = 0;
3875     for (j=0; j<nnz; j++) {
3876       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3877     }
3878     d_nnz[i] = d;
3879     o_nnz[i] = nnz - d;
3880   }
3881   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3882   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3883 
3884   for (i=0; i<m; i++) {
3885     ii   = i + rstart;
3886     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3887   }
3888   nooffprocentries    = B->nooffprocentries;
3889   B->nooffprocentries = PETSC_TRUE;
3890   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3891   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3892   B->nooffprocentries = nooffprocentries;
3893 
3894   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3895   PetscFunctionReturn(0);
3896 }
3897 
3898 /*@
3899    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3900    (the default parallel PETSc format).
3901 
3902    Collective
3903 
3904    Input Parameters:
3905 +  B - the matrix
3906 .  i - the indices into j for the start of each local row (starts with zero)
3907 .  j - the column indices for each local row (starts with zero)
3908 -  v - optional values in the matrix
3909 
3910    Level: developer
3911 
3912    Notes:
3913        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3914      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3915      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3916 
3917        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3918 
3919        The format which is used for the sparse matrix input, is equivalent to a
3920     row-major ordering.. i.e for the following matrix, the input data expected is
3921     as shown
3922 
3923 $        1 0 0
3924 $        2 0 3     P0
3925 $       -------
3926 $        4 5 6     P1
3927 $
3928 $     Process0 [P0]: rows_owned=[0,1]
3929 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3930 $        j =  {0,0,2}  [size = 3]
3931 $        v =  {1,2,3}  [size = 3]
3932 $
3933 $     Process1 [P1]: rows_owned=[2]
3934 $        i =  {0,3}    [size = nrow+1  = 1+1]
3935 $        j =  {0,1,2}  [size = 3]
3936 $        v =  {4,5,6}  [size = 3]
3937 
3938 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3939           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3940 @*/
3941 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3942 {
3943   PetscErrorCode ierr;
3944 
3945   PetscFunctionBegin;
3946   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3947   PetscFunctionReturn(0);
3948 }
3949 
3950 /*@C
3951    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3952    (the default parallel PETSc format).  For good matrix assembly performance
3953    the user should preallocate the matrix storage by setting the parameters
3954    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3955    performance can be increased by more than a factor of 50.
3956 
3957    Collective
3958 
3959    Input Parameters:
3960 +  B - the matrix
3961 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3962            (same value is used for all local rows)
3963 .  d_nnz - array containing the number of nonzeros in the various rows of the
3964            DIAGONAL portion of the local submatrix (possibly different for each row)
3965            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3966            The size of this array is equal to the number of local rows, i.e 'm'.
3967            For matrices that will be factored, you must leave room for (and set)
3968            the diagonal entry even if it is zero.
3969 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3970            submatrix (same value is used for all local rows).
3971 -  o_nnz - array containing the number of nonzeros in the various rows of the
3972            OFF-DIAGONAL portion of the local submatrix (possibly different for
3973            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3974            structure. The size of this array is equal to the number
3975            of local rows, i.e 'm'.
3976 
3977    If the *_nnz parameter is given then the *_nz parameter is ignored
3978 
3979    The AIJ format (also called the Yale sparse matrix format or
3980    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3981    storage.  The stored row and column indices begin with zero.
3982    See Users-Manual: ch_mat for details.
3983 
3984    The parallel matrix is partitioned such that the first m0 rows belong to
3985    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3986    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3987 
3988    The DIAGONAL portion of the local submatrix of a processor can be defined
3989    as the submatrix which is obtained by extraction the part corresponding to
3990    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3991    first row that belongs to the processor, r2 is the last row belonging to
3992    the this processor, and c1-c2 is range of indices of the local part of a
3993    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3994    common case of a square matrix, the row and column ranges are the same and
3995    the DIAGONAL part is also square. The remaining portion of the local
3996    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3997 
3998    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3999 
4000    You can call MatGetInfo() to get information on how effective the preallocation was;
4001    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4002    You can also run with the option -info and look for messages with the string
4003    malloc in them to see if additional memory allocation was needed.
4004 
4005    Example usage:
4006 
4007    Consider the following 8x8 matrix with 34 non-zero values, that is
4008    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4009    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4010    as follows:
4011 
4012 .vb
4013             1  2  0  |  0  3  0  |  0  4
4014     Proc0   0  5  6  |  7  0  0  |  8  0
4015             9  0 10  | 11  0  0  | 12  0
4016     -------------------------------------
4017            13  0 14  | 15 16 17  |  0  0
4018     Proc1   0 18  0  | 19 20 21  |  0  0
4019             0  0  0  | 22 23  0  | 24  0
4020     -------------------------------------
4021     Proc2  25 26 27  |  0  0 28  | 29  0
4022            30  0  0  | 31 32 33  |  0 34
4023 .ve
4024 
4025    This can be represented as a collection of submatrices as:
4026 
4027 .vb
4028       A B C
4029       D E F
4030       G H I
4031 .ve
4032 
4033    Where the submatrices A,B,C are owned by proc0, D,E,F are
4034    owned by proc1, G,H,I are owned by proc2.
4035 
4036    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4037    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4038    The 'M','N' parameters are 8,8, and have the same values on all procs.
4039 
4040    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4041    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4042    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4043    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4044    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4045    matrix, ans [DF] as another SeqAIJ matrix.
4046 
4047    When d_nz, o_nz parameters are specified, d_nz storage elements are
4048    allocated for every row of the local diagonal submatrix, and o_nz
4049    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4050    One way to choose d_nz and o_nz is to use the max nonzerors per local
4051    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4052    In this case, the values of d_nz,o_nz are:
4053 .vb
4054      proc0 : dnz = 2, o_nz = 2
4055      proc1 : dnz = 3, o_nz = 2
4056      proc2 : dnz = 1, o_nz = 4
4057 .ve
4058    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4059    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4060    for proc3. i.e we are using 12+15+10=37 storage locations to store
4061    34 values.
4062 
4063    When d_nnz, o_nnz parameters are specified, the storage is specified
4064    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4065    In the above case the values for d_nnz,o_nnz are:
4066 .vb
4067      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4068      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4069      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4070 .ve
4071    Here the space allocated is sum of all the above values i.e 34, and
4072    hence pre-allocation is perfect.
4073 
4074    Level: intermediate
4075 
4076 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4077           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4078 @*/
4079 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4080 {
4081   PetscErrorCode ierr;
4082 
4083   PetscFunctionBegin;
4084   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4085   PetscValidType(B,1);
4086   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4087   PetscFunctionReturn(0);
4088 }
4089 
4090 /*@
4091      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4092          CSR format for the local rows.
4093 
4094    Collective
4095 
4096    Input Parameters:
4097 +  comm - MPI communicator
4098 .  m - number of local rows (Cannot be PETSC_DECIDE)
4099 .  n - This value should be the same as the local size used in creating the
4100        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4101        calculated if N is given) For square matrices n is almost always m.
4102 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4103 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4104 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4105 .   j - column indices
4106 -   a - matrix values
4107 
4108    Output Parameter:
4109 .   mat - the matrix
4110 
4111    Level: intermediate
4112 
4113    Notes:
4114        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4115      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4116      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4117 
4118        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4119 
4120        The format which is used for the sparse matrix input, is equivalent to a
4121     row-major ordering.. i.e for the following matrix, the input data expected is
4122     as shown
4123 
4124        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4125 
4126 $        1 0 0
4127 $        2 0 3     P0
4128 $       -------
4129 $        4 5 6     P1
4130 $
4131 $     Process0 [P0]: rows_owned=[0,1]
4132 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4133 $        j =  {0,0,2}  [size = 3]
4134 $        v =  {1,2,3}  [size = 3]
4135 $
4136 $     Process1 [P1]: rows_owned=[2]
4137 $        i =  {0,3}    [size = nrow+1  = 1+1]
4138 $        j =  {0,1,2}  [size = 3]
4139 $        v =  {4,5,6}  [size = 3]
4140 
4141 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4142           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4143 @*/
4144 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4145 {
4146   PetscErrorCode ierr;
4147 
4148   PetscFunctionBegin;
4149   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4150   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4151   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4152   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4153   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4154   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4155   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4156   PetscFunctionReturn(0);
4157 }
4158 
4159 /*@
4160      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4161          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4162 
4163    Collective
4164 
4165    Input Parameters:
4166 +  mat - the matrix
4167 .  m - number of local rows (Cannot be PETSC_DECIDE)
4168 .  n - This value should be the same as the local size used in creating the
4169        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4170        calculated if N is given) For square matrices n is almost always m.
4171 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4172 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4173 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4174 .  J - column indices
4175 -  v - matrix values
4176 
4177    Level: intermediate
4178 
4179 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4180           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4181 @*/
4182 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4183 {
4184   PetscErrorCode ierr;
4185   PetscInt       cstart,nnz,i,j;
4186   PetscInt       *ld;
4187   PetscBool      nooffprocentries;
4188   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4189   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4190   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4191   const PetscInt *Adi = Ad->i;
4192   PetscInt       ldi,Iii,md;
4193 
4194   PetscFunctionBegin;
4195   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4196   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4197   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4198   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4199 
4200   cstart = mat->cmap->rstart;
4201   if (!Aij->ld) {
4202     /* count number of entries below block diagonal */
4203     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4204     Aij->ld = ld;
4205     for (i=0; i<m; i++) {
4206       nnz  = Ii[i+1]- Ii[i];
4207       j     = 0;
4208       while  (J[j] < cstart && j < nnz) {j++;}
4209       J    += nnz;
4210       ld[i] = j;
4211     }
4212   } else {
4213     ld = Aij->ld;
4214   }
4215 
4216   for (i=0; i<m; i++) {
4217     nnz  = Ii[i+1]- Ii[i];
4218     Iii  = Ii[i];
4219     ldi  = ld[i];
4220     md   = Adi[i+1]-Adi[i];
4221     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4222     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4223     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4224     ad  += md;
4225     ao  += nnz - md;
4226   }
4227   nooffprocentries      = mat->nooffprocentries;
4228   mat->nooffprocentries = PETSC_TRUE;
4229   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4230   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4231   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4232   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4233   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4234   mat->nooffprocentries = nooffprocentries;
4235   PetscFunctionReturn(0);
4236 }
4237 
4238 /*@C
4239    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4240    (the default parallel PETSc format).  For good matrix assembly performance
4241    the user should preallocate the matrix storage by setting the parameters
4242    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4243    performance can be increased by more than a factor of 50.
4244 
4245    Collective
4246 
4247    Input Parameters:
4248 +  comm - MPI communicator
4249 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4250            This value should be the same as the local size used in creating the
4251            y vector for the matrix-vector product y = Ax.
4252 .  n - This value should be the same as the local size used in creating the
4253        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4254        calculated if N is given) For square matrices n is almost always m.
4255 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4256 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4257 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4258            (same value is used for all local rows)
4259 .  d_nnz - array containing the number of nonzeros in the various rows of the
4260            DIAGONAL portion of the local submatrix (possibly different for each row)
4261            or NULL, if d_nz is used to specify the nonzero structure.
4262            The size of this array is equal to the number of local rows, i.e 'm'.
4263 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4264            submatrix (same value is used for all local rows).
4265 -  o_nnz - array containing the number of nonzeros in the various rows of the
4266            OFF-DIAGONAL portion of the local submatrix (possibly different for
4267            each row) or NULL, if o_nz is used to specify the nonzero
4268            structure. The size of this array is equal to the number
4269            of local rows, i.e 'm'.
4270 
4271    Output Parameter:
4272 .  A - the matrix
4273 
4274    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4275    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4276    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4277 
4278    Notes:
4279    If the *_nnz parameter is given then the *_nz parameter is ignored
4280 
4281    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4282    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4283    storage requirements for this matrix.
4284 
4285    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4286    processor than it must be used on all processors that share the object for
4287    that argument.
4288 
4289    The user MUST specify either the local or global matrix dimensions
4290    (possibly both).
4291 
4292    The parallel matrix is partitioned across processors such that the
4293    first m0 rows belong to process 0, the next m1 rows belong to
4294    process 1, the next m2 rows belong to process 2 etc.. where
4295    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4296    values corresponding to [m x N] submatrix.
4297 
4298    The columns are logically partitioned with the n0 columns belonging
4299    to 0th partition, the next n1 columns belonging to the next
4300    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4301 
4302    The DIAGONAL portion of the local submatrix on any given processor
4303    is the submatrix corresponding to the rows and columns m,n
4304    corresponding to the given processor. i.e diagonal matrix on
4305    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4306    etc. The remaining portion of the local submatrix [m x (N-n)]
4307    constitute the OFF-DIAGONAL portion. The example below better
4308    illustrates this concept.
4309 
4310    For a square global matrix we define each processor's diagonal portion
4311    to be its local rows and the corresponding columns (a square submatrix);
4312    each processor's off-diagonal portion encompasses the remainder of the
4313    local matrix (a rectangular submatrix).
4314 
4315    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4316 
4317    When calling this routine with a single process communicator, a matrix of
4318    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4319    type of communicator, use the construction mechanism
4320 .vb
4321      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4322 .ve
4323 
4324 $     MatCreate(...,&A);
4325 $     MatSetType(A,MATMPIAIJ);
4326 $     MatSetSizes(A, m,n,M,N);
4327 $     MatMPIAIJSetPreallocation(A,...);
4328 
4329    By default, this format uses inodes (identical nodes) when possible.
4330    We search for consecutive rows with the same nonzero structure, thereby
4331    reusing matrix information to achieve increased efficiency.
4332 
4333    Options Database Keys:
4334 +  -mat_no_inode  - Do not use inodes
4335 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4336 
4337 
4338 
4339    Example usage:
4340 
4341    Consider the following 8x8 matrix with 34 non-zero values, that is
4342    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4343    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4344    as follows
4345 
4346 .vb
4347             1  2  0  |  0  3  0  |  0  4
4348     Proc0   0  5  6  |  7  0  0  |  8  0
4349             9  0 10  | 11  0  0  | 12  0
4350     -------------------------------------
4351            13  0 14  | 15 16 17  |  0  0
4352     Proc1   0 18  0  | 19 20 21  |  0  0
4353             0  0  0  | 22 23  0  | 24  0
4354     -------------------------------------
4355     Proc2  25 26 27  |  0  0 28  | 29  0
4356            30  0  0  | 31 32 33  |  0 34
4357 .ve
4358 
4359    This can be represented as a collection of submatrices as
4360 
4361 .vb
4362       A B C
4363       D E F
4364       G H I
4365 .ve
4366 
4367    Where the submatrices A,B,C are owned by proc0, D,E,F are
4368    owned by proc1, G,H,I are owned by proc2.
4369 
4370    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4371    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4372    The 'M','N' parameters are 8,8, and have the same values on all procs.
4373 
4374    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4375    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4376    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4377    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4378    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4379    matrix, ans [DF] as another SeqAIJ matrix.
4380 
4381    When d_nz, o_nz parameters are specified, d_nz storage elements are
4382    allocated for every row of the local diagonal submatrix, and o_nz
4383    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4384    One way to choose d_nz and o_nz is to use the max nonzerors per local
4385    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4386    In this case, the values of d_nz,o_nz are
4387 .vb
4388      proc0 : dnz = 2, o_nz = 2
4389      proc1 : dnz = 3, o_nz = 2
4390      proc2 : dnz = 1, o_nz = 4
4391 .ve
4392    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4393    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4394    for proc3. i.e we are using 12+15+10=37 storage locations to store
4395    34 values.
4396 
4397    When d_nnz, o_nnz parameters are specified, the storage is specified
4398    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4399    In the above case the values for d_nnz,o_nnz are
4400 .vb
4401      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4402      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4403      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4404 .ve
4405    Here the space allocated is sum of all the above values i.e 34, and
4406    hence pre-allocation is perfect.
4407 
4408    Level: intermediate
4409 
4410 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4411           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4412 @*/
4413 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4414 {
4415   PetscErrorCode ierr;
4416   PetscMPIInt    size;
4417 
4418   PetscFunctionBegin;
4419   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4420   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4421   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4422   if (size > 1) {
4423     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4424     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4425   } else {
4426     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4427     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4428   }
4429   PetscFunctionReturn(0);
4430 }
4431 
4432 /*@C
4433   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4434 
4435   Not collective
4436 
4437   Input Parameter:
4438 . A - The MPIAIJ matrix
4439 
4440   Output Parameters:
4441 + Ad - The local diagonal block as a SeqAIJ matrix
4442 . Ao - The local off-diagonal block as a SeqAIJ matrix
4443 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4444 
4445   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4446   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4447   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4448   local column numbers to global column numbers in the original matrix.
4449 
4450   Level: intermediate
4451 
4452 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4453 @*/
4454 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4455 {
4456   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4457   PetscBool      flg;
4458   PetscErrorCode ierr;
4459 
4460   PetscFunctionBegin;
4461   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4462   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4463   if (Ad)     *Ad     = a->A;
4464   if (Ao)     *Ao     = a->B;
4465   if (colmap) *colmap = a->garray;
4466   PetscFunctionReturn(0);
4467 }
4468 
4469 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4470 {
4471   PetscErrorCode ierr;
4472   PetscInt       m,N,i,rstart,nnz,Ii;
4473   PetscInt       *indx;
4474   PetscScalar    *values;
4475 
4476   PetscFunctionBegin;
4477   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4478   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4479     PetscInt       *dnz,*onz,sum,bs,cbs;
4480 
4481     if (n == PETSC_DECIDE) {
4482       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4483     }
4484     /* Check sum(n) = N */
4485     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4486     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4487 
4488     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4489     rstart -= m;
4490 
4491     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4492     for (i=0; i<m; i++) {
4493       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4494       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4495       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4496     }
4497 
4498     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4499     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4500     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4501     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4502     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4503     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4504     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4505     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4506     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
4507   }
4508 
4509   /* numeric phase */
4510   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4511   for (i=0; i<m; i++) {
4512     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4513     Ii   = i + rstart;
4514     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4515     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4516   }
4517   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4518   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4519   PetscFunctionReturn(0);
4520 }
4521 
4522 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4523 {
4524   PetscErrorCode    ierr;
4525   PetscMPIInt       rank;
4526   PetscInt          m,N,i,rstart,nnz;
4527   size_t            len;
4528   const PetscInt    *indx;
4529   PetscViewer       out;
4530   char              *name;
4531   Mat               B;
4532   const PetscScalar *values;
4533 
4534   PetscFunctionBegin;
4535   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4536   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4537   /* Should this be the type of the diagonal block of A? */
4538   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4539   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4540   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4541   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4542   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4543   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4544   for (i=0; i<m; i++) {
4545     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4546     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4547     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4548   }
4549   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4550   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4551 
4552   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
4553   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4554   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4555   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4556   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4557   ierr = PetscFree(name);CHKERRQ(ierr);
4558   ierr = MatView(B,out);CHKERRQ(ierr);
4559   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4560   ierr = MatDestroy(&B);CHKERRQ(ierr);
4561   PetscFunctionReturn(0);
4562 }
4563 
4564 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4565 {
4566   PetscErrorCode      ierr;
4567   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4568 
4569   PetscFunctionBegin;
4570   if (!merge) PetscFunctionReturn(0);
4571   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4572   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4573   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4574   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4575   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4576   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4577   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4578   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4579   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4580   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4581   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4582   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4583   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4584   ierr = PetscFree(merge);CHKERRQ(ierr);
4585   PetscFunctionReturn(0);
4586 }
4587 
4588 #include <../src/mat/utils/freespace.h>
4589 #include <petscbt.h>
4590 
4591 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4592 {
4593   PetscErrorCode      ierr;
4594   MPI_Comm            comm;
4595   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4596   PetscMPIInt         size,rank,taga,*len_s;
4597   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4598   PetscInt            proc,m;
4599   PetscInt            **buf_ri,**buf_rj;
4600   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4601   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4602   MPI_Request         *s_waits,*r_waits;
4603   MPI_Status          *status;
4604   MatScalar           *aa=a->a;
4605   MatScalar           **abuf_r,*ba_i;
4606   Mat_Merge_SeqsToMPI *merge;
4607   PetscContainer      container;
4608 
4609   PetscFunctionBegin;
4610   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4611   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4612 
4613   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4614   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4615 
4616   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4617   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4618   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4619 
4620   bi     = merge->bi;
4621   bj     = merge->bj;
4622   buf_ri = merge->buf_ri;
4623   buf_rj = merge->buf_rj;
4624 
4625   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4626   owners = merge->rowmap->range;
4627   len_s  = merge->len_s;
4628 
4629   /* send and recv matrix values */
4630   /*-----------------------------*/
4631   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4632   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4633 
4634   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4635   for (proc=0,k=0; proc<size; proc++) {
4636     if (!len_s[proc]) continue;
4637     i    = owners[proc];
4638     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr);
4639     k++;
4640   }
4641 
4642   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);}
4643   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);}
4644   ierr = PetscFree(status);CHKERRQ(ierr);
4645 
4646   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4647   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4648 
4649   /* insert mat values of mpimat */
4650   /*----------------------------*/
4651   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4652   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4653 
4654   for (k=0; k<merge->nrecv; k++) {
4655     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4656     nrows       = *(buf_ri_k[k]);
4657     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4658     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4659   }
4660 
4661   /* set values of ba */
4662   m = merge->rowmap->n;
4663   for (i=0; i<m; i++) {
4664     arow = owners[rank] + i;
4665     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4666     bnzi = bi[i+1] - bi[i];
4667     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4668 
4669     /* add local non-zero vals of this proc's seqmat into ba */
4670     anzi   = ai[arow+1] - ai[arow];
4671     aj     = a->j + ai[arow];
4672     aa     = a->a + ai[arow];
4673     nextaj = 0;
4674     for (j=0; nextaj<anzi; j++) {
4675       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4676         ba_i[j] += aa[nextaj++];
4677       }
4678     }
4679 
4680     /* add received vals into ba */
4681     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4682       /* i-th row */
4683       if (i == *nextrow[k]) {
4684         anzi   = *(nextai[k]+1) - *nextai[k];
4685         aj     = buf_rj[k] + *(nextai[k]);
4686         aa     = abuf_r[k] + *(nextai[k]);
4687         nextaj = 0;
4688         for (j=0; nextaj<anzi; j++) {
4689           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4690             ba_i[j] += aa[nextaj++];
4691           }
4692         }
4693         nextrow[k]++; nextai[k]++;
4694       }
4695     }
4696     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4697   }
4698   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4699   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4700 
4701   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4702   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4703   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4704   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4705   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4706   PetscFunctionReturn(0);
4707 }
4708 
4709 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4710 {
4711   PetscErrorCode      ierr;
4712   Mat                 B_mpi;
4713   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4714   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4715   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4716   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4717   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4718   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4719   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4720   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4721   MPI_Status          *status;
4722   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4723   PetscBT             lnkbt;
4724   Mat_Merge_SeqsToMPI *merge;
4725   PetscContainer      container;
4726 
4727   PetscFunctionBegin;
4728   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4729 
4730   /* make sure it is a PETSc comm */
4731   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4732   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4733   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4734 
4735   ierr = PetscNew(&merge);CHKERRQ(ierr);
4736   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4737 
4738   /* determine row ownership */
4739   /*---------------------------------------------------------*/
4740   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4741   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4742   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4743   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4744   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4745   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4746   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4747 
4748   m      = merge->rowmap->n;
4749   owners = merge->rowmap->range;
4750 
4751   /* determine the number of messages to send, their lengths */
4752   /*---------------------------------------------------------*/
4753   len_s = merge->len_s;
4754 
4755   len          = 0; /* length of buf_si[] */
4756   merge->nsend = 0;
4757   for (proc=0; proc<size; proc++) {
4758     len_si[proc] = 0;
4759     if (proc == rank) {
4760       len_s[proc] = 0;
4761     } else {
4762       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4763       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4764     }
4765     if (len_s[proc]) {
4766       merge->nsend++;
4767       nrows = 0;
4768       for (i=owners[proc]; i<owners[proc+1]; i++) {
4769         if (ai[i+1] > ai[i]) nrows++;
4770       }
4771       len_si[proc] = 2*(nrows+1);
4772       len         += len_si[proc];
4773     }
4774   }
4775 
4776   /* determine the number and length of messages to receive for ij-structure */
4777   /*-------------------------------------------------------------------------*/
4778   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4779   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4780 
4781   /* post the Irecv of j-structure */
4782   /*-------------------------------*/
4783   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4784   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4785 
4786   /* post the Isend of j-structure */
4787   /*--------------------------------*/
4788   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4789 
4790   for (proc=0, k=0; proc<size; proc++) {
4791     if (!len_s[proc]) continue;
4792     i    = owners[proc];
4793     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr);
4794     k++;
4795   }
4796 
4797   /* receives and sends of j-structure are complete */
4798   /*------------------------------------------------*/
4799   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);}
4800   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);}
4801 
4802   /* send and recv i-structure */
4803   /*---------------------------*/
4804   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4805   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4806 
4807   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4808   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4809   for (proc=0,k=0; proc<size; proc++) {
4810     if (!len_s[proc]) continue;
4811     /* form outgoing message for i-structure:
4812          buf_si[0]:                 nrows to be sent
4813                [1:nrows]:           row index (global)
4814                [nrows+1:2*nrows+1]: i-structure index
4815     */
4816     /*-------------------------------------------*/
4817     nrows       = len_si[proc]/2 - 1;
4818     buf_si_i    = buf_si + nrows+1;
4819     buf_si[0]   = nrows;
4820     buf_si_i[0] = 0;
4821     nrows       = 0;
4822     for (i=owners[proc]; i<owners[proc+1]; i++) {
4823       anzi = ai[i+1] - ai[i];
4824       if (anzi) {
4825         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4826         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4827         nrows++;
4828       }
4829     }
4830     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr);
4831     k++;
4832     buf_si += len_si[proc];
4833   }
4834 
4835   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);}
4836   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);}
4837 
4838   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4839   for (i=0; i<merge->nrecv; i++) {
4840     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4841   }
4842 
4843   ierr = PetscFree(len_si);CHKERRQ(ierr);
4844   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4845   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4846   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4847   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4848   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4849   ierr = PetscFree(status);CHKERRQ(ierr);
4850 
4851   /* compute a local seq matrix in each processor */
4852   /*----------------------------------------------*/
4853   /* allocate bi array and free space for accumulating nonzero column info */
4854   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4855   bi[0] = 0;
4856 
4857   /* create and initialize a linked list */
4858   nlnk = N+1;
4859   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4860 
4861   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4862   len  = ai[owners[rank+1]] - ai[owners[rank]];
4863   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4864 
4865   current_space = free_space;
4866 
4867   /* determine symbolic info for each local row */
4868   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4869 
4870   for (k=0; k<merge->nrecv; k++) {
4871     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4872     nrows       = *buf_ri_k[k];
4873     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4874     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4875   }
4876 
4877   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4878   len  = 0;
4879   for (i=0; i<m; i++) {
4880     bnzi = 0;
4881     /* add local non-zero cols of this proc's seqmat into lnk */
4882     arow  = owners[rank] + i;
4883     anzi  = ai[arow+1] - ai[arow];
4884     aj    = a->j + ai[arow];
4885     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4886     bnzi += nlnk;
4887     /* add received col data into lnk */
4888     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4889       if (i == *nextrow[k]) { /* i-th row */
4890         anzi  = *(nextai[k]+1) - *nextai[k];
4891         aj    = buf_rj[k] + *nextai[k];
4892         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4893         bnzi += nlnk;
4894         nextrow[k]++; nextai[k]++;
4895       }
4896     }
4897     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4898 
4899     /* if free space is not available, make more free space */
4900     if (current_space->local_remaining<bnzi) {
4901       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4902       nspacedouble++;
4903     }
4904     /* copy data into free space, then initialize lnk */
4905     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4906     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4907 
4908     current_space->array           += bnzi;
4909     current_space->local_used      += bnzi;
4910     current_space->local_remaining -= bnzi;
4911 
4912     bi[i+1] = bi[i] + bnzi;
4913   }
4914 
4915   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4916 
4917   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4918   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4919   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4920 
4921   /* create symbolic parallel matrix B_mpi */
4922   /*---------------------------------------*/
4923   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4924   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4925   if (n==PETSC_DECIDE) {
4926     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4927   } else {
4928     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4929   }
4930   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4931   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4932   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4933   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4934   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4935 
4936   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4937   B_mpi->assembled  = PETSC_FALSE;
4938   merge->bi         = bi;
4939   merge->bj         = bj;
4940   merge->buf_ri     = buf_ri;
4941   merge->buf_rj     = buf_rj;
4942   merge->coi        = NULL;
4943   merge->coj        = NULL;
4944   merge->owners_co  = NULL;
4945 
4946   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4947 
4948   /* attach the supporting struct to B_mpi for reuse */
4949   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4950   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4951   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
4952   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4953   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4954   *mpimat = B_mpi;
4955 
4956   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4957   PetscFunctionReturn(0);
4958 }
4959 
4960 /*@C
4961       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4962                  matrices from each processor
4963 
4964     Collective
4965 
4966    Input Parameters:
4967 +    comm - the communicators the parallel matrix will live on
4968 .    seqmat - the input sequential matrices
4969 .    m - number of local rows (or PETSC_DECIDE)
4970 .    n - number of local columns (or PETSC_DECIDE)
4971 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4972 
4973    Output Parameter:
4974 .    mpimat - the parallel matrix generated
4975 
4976     Level: advanced
4977 
4978    Notes:
4979      The dimensions of the sequential matrix in each processor MUST be the same.
4980      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4981      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4982 @*/
4983 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4984 {
4985   PetscErrorCode ierr;
4986   PetscMPIInt    size;
4987 
4988   PetscFunctionBegin;
4989   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4990   if (size == 1) {
4991     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4992     if (scall == MAT_INITIAL_MATRIX) {
4993       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4994     } else {
4995       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4996     }
4997     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4998     PetscFunctionReturn(0);
4999   }
5000   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5001   if (scall == MAT_INITIAL_MATRIX) {
5002     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5003   }
5004   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5005   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5006   PetscFunctionReturn(0);
5007 }
5008 
5009 /*@
5010      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5011           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5012           with MatGetSize()
5013 
5014     Not Collective
5015 
5016    Input Parameters:
5017 +    A - the matrix
5018 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5019 
5020    Output Parameter:
5021 .    A_loc - the local sequential matrix generated
5022 
5023     Level: developer
5024 
5025    Notes:
5026      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5027      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5028      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5029      modify the values of the returned A_loc.
5030 
5031 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
5032 @*/
5033 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5034 {
5035   PetscErrorCode    ierr;
5036   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5037   Mat_SeqAIJ        *mat,*a,*b;
5038   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5039   const PetscScalar *aa,*ba,*aav,*bav;
5040   PetscScalar       *ca,*cam;
5041   PetscMPIInt       size;
5042   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5043   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5044   PetscBool         match;
5045 
5046   PetscFunctionBegin;
5047   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5048   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5049   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5050   if (size == 1) {
5051     if (scall == MAT_INITIAL_MATRIX) {
5052       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5053       *A_loc = mpimat->A;
5054     } else if (scall == MAT_REUSE_MATRIX) {
5055       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5056     }
5057     PetscFunctionReturn(0);
5058   }
5059 
5060   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5061   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5062   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5063   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5064   ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5065   ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5066   aa   = aav;
5067   ba   = bav;
5068   if (scall == MAT_INITIAL_MATRIX) {
5069     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5070     ci[0] = 0;
5071     for (i=0; i<am; i++) {
5072       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5073     }
5074     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5075     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5076     k    = 0;
5077     for (i=0; i<am; i++) {
5078       ncols_o = bi[i+1] - bi[i];
5079       ncols_d = ai[i+1] - ai[i];
5080       /* off-diagonal portion of A */
5081       for (jo=0; jo<ncols_o; jo++) {
5082         col = cmap[*bj];
5083         if (col >= cstart) break;
5084         cj[k]   = col; bj++;
5085         ca[k++] = *ba++;
5086       }
5087       /* diagonal portion of A */
5088       for (j=0; j<ncols_d; j++) {
5089         cj[k]   = cstart + *aj++;
5090         ca[k++] = *aa++;
5091       }
5092       /* off-diagonal portion of A */
5093       for (j=jo; j<ncols_o; j++) {
5094         cj[k]   = cmap[*bj++];
5095         ca[k++] = *ba++;
5096       }
5097     }
5098     /* put together the new matrix */
5099     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5100     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5101     /* Since these are PETSc arrays, change flags to free them as necessary. */
5102     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5103     mat->free_a  = PETSC_TRUE;
5104     mat->free_ij = PETSC_TRUE;
5105     mat->nonew   = 0;
5106   } else if (scall == MAT_REUSE_MATRIX) {
5107     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5108 #if defined(PETSC_USE_DEVICE)
5109     (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5110 #endif
5111     ci = mat->i; cj = mat->j; cam = mat->a;
5112     for (i=0; i<am; i++) {
5113       /* off-diagonal portion of A */
5114       ncols_o = bi[i+1] - bi[i];
5115       for (jo=0; jo<ncols_o; jo++) {
5116         col = cmap[*bj];
5117         if (col >= cstart) break;
5118         *cam++ = *ba++; bj++;
5119       }
5120       /* diagonal portion of A */
5121       ncols_d = ai[i+1] - ai[i];
5122       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5123       /* off-diagonal portion of A */
5124       for (j=jo; j<ncols_o; j++) {
5125         *cam++ = *ba++; bj++;
5126       }
5127     }
5128   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5129   ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5130   ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5131   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5132   PetscFunctionReturn(0);
5133 }
5134 
5135 /*@
5136      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5137           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5138 
5139     Not Collective
5140 
5141    Input Parameters:
5142 +    A - the matrix
5143 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5144 
5145    Output Parameter:
5146 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5147 -    A_loc - the local sequential matrix generated
5148 
5149     Level: developer
5150 
5151    Notes:
5152      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5153 
5154 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()
5155 
5156 @*/
5157 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5158 {
5159   PetscErrorCode ierr;
5160   Mat            Ao,Ad;
5161   const PetscInt *cmap;
5162   PetscMPIInt    size;
5163   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5164 
5165   PetscFunctionBegin;
5166   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr);
5167   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5168   if (size == 1) {
5169     if (scall == MAT_INITIAL_MATRIX) {
5170       ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr);
5171       *A_loc = Ad;
5172     } else if (scall == MAT_REUSE_MATRIX) {
5173       ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5174     }
5175     if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); }
5176     PetscFunctionReturn(0);
5177   }
5178   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr);
5179   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5180   if (f) {
5181     ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr);
5182   } else {
5183     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5184     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5185     Mat_SeqAIJ        *c;
5186     PetscInt          *ai = a->i, *aj = a->j;
5187     PetscInt          *bi = b->i, *bj = b->j;
5188     PetscInt          *ci,*cj;
5189     const PetscScalar *aa,*ba;
5190     PetscScalar       *ca;
5191     PetscInt          i,j,am,dn,on;
5192 
5193     ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr);
5194     ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr);
5195     ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr);
5196     ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr);
5197     if (scall == MAT_INITIAL_MATRIX) {
5198       PetscInt k;
5199       ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5200       ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr);
5201       ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr);
5202       ci[0] = 0;
5203       for (i=0,k=0; i<am; i++) {
5204         const PetscInt ncols_o = bi[i+1] - bi[i];
5205         const PetscInt ncols_d = ai[i+1] - ai[i];
5206         ci[i+1] = ci[i] + ncols_o + ncols_d;
5207         /* diagonal portion of A */
5208         for (j=0; j<ncols_d; j++,k++) {
5209           cj[k] = *aj++;
5210           ca[k] = *aa++;
5211         }
5212         /* off-diagonal portion of A */
5213         for (j=0; j<ncols_o; j++,k++) {
5214           cj[k] = dn + *bj++;
5215           ca[k] = *ba++;
5216         }
5217       }
5218       /* put together the new matrix */
5219       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr);
5220       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5221       /* Since these are PETSc arrays, change flags to free them as necessary. */
5222       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5223       c->free_a  = PETSC_TRUE;
5224       c->free_ij = PETSC_TRUE;
5225       c->nonew   = 0;
5226       ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr);
5227     } else if (scall == MAT_REUSE_MATRIX) {
5228 #if defined(PETSC_HAVE_DEVICE)
5229       (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5230 #endif
5231       c  = (Mat_SeqAIJ*)(*A_loc)->data;
5232       ca = c->a;
5233       for (i=0; i<am; i++) {
5234         const PetscInt ncols_d = ai[i+1] - ai[i];
5235         const PetscInt ncols_o = bi[i+1] - bi[i];
5236         /* diagonal portion of A */
5237         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5238         /* off-diagonal portion of A */
5239         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5240       }
5241     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5242     ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr);
5243     ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr);
5244     if (glob) {
5245       PetscInt cst, *gidx;
5246 
5247       ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr);
5248       ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr);
5249       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5250       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5251       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr);
5252     }
5253   }
5254   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5255   PetscFunctionReturn(0);
5256 }
5257 
5258 /*@C
5259      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5260 
5261     Not Collective
5262 
5263    Input Parameters:
5264 +    A - the matrix
5265 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5266 -    row, col - index sets of rows and columns to extract (or NULL)
5267 
5268    Output Parameter:
5269 .    A_loc - the local sequential matrix generated
5270 
5271     Level: developer
5272 
5273 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5274 
5275 @*/
5276 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5277 {
5278   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5279   PetscErrorCode ierr;
5280   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5281   IS             isrowa,iscola;
5282   Mat            *aloc;
5283   PetscBool      match;
5284 
5285   PetscFunctionBegin;
5286   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5287   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5288   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5289   if (!row) {
5290     start = A->rmap->rstart; end = A->rmap->rend;
5291     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5292   } else {
5293     isrowa = *row;
5294   }
5295   if (!col) {
5296     start = A->cmap->rstart;
5297     cmap  = a->garray;
5298     nzA   = a->A->cmap->n;
5299     nzB   = a->B->cmap->n;
5300     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5301     ncols = 0;
5302     for (i=0; i<nzB; i++) {
5303       if (cmap[i] < start) idx[ncols++] = cmap[i];
5304       else break;
5305     }
5306     imark = i;
5307     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5308     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5309     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5310   } else {
5311     iscola = *col;
5312   }
5313   if (scall != MAT_INITIAL_MATRIX) {
5314     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5315     aloc[0] = *A_loc;
5316   }
5317   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5318   if (!col) { /* attach global id of condensed columns */
5319     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5320   }
5321   *A_loc = aloc[0];
5322   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5323   if (!row) {
5324     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5325   }
5326   if (!col) {
5327     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5328   }
5329   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5330   PetscFunctionReturn(0);
5331 }
5332 
5333 /*
5334  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5335  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5336  * on a global size.
5337  * */
5338 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5339 {
5340   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5341   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5342   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5343   PetscMPIInt              owner;
5344   PetscSFNode              *iremote,*oiremote;
5345   const PetscInt           *lrowindices;
5346   PetscErrorCode           ierr;
5347   PetscSF                  sf,osf;
5348   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5349   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5350   MPI_Comm                 comm;
5351   ISLocalToGlobalMapping   mapping;
5352 
5353   PetscFunctionBegin;
5354   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5355   /* plocalsize is the number of roots
5356    * nrows is the number of leaves
5357    * */
5358   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5359   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5360   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5361   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5362   for (i=0;i<nrows;i++) {
5363     /* Find a remote index and an owner for a row
5364      * The row could be local or remote
5365      * */
5366     owner = 0;
5367     lidx  = 0;
5368     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5369     iremote[i].index = lidx;
5370     iremote[i].rank  = owner;
5371   }
5372   /* Create SF to communicate how many nonzero columns for each row */
5373   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5374   /* SF will figure out the number of nonzero colunms for each row, and their
5375    * offsets
5376    * */
5377   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5378   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5379   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5380 
5381   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5382   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5383   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5384   roffsets[0] = 0;
5385   roffsets[1] = 0;
5386   for (i=0;i<plocalsize;i++) {
5387     /* diag */
5388     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5389     /* off diag */
5390     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5391     /* compute offsets so that we relative location for each row */
5392     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5393     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5394   }
5395   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5396   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5397   /* 'r' means root, and 'l' means leaf */
5398   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5399   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5400   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5401   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5402   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5403   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5404   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5405   dntotalcols = 0;
5406   ontotalcols = 0;
5407   ncol = 0;
5408   for (i=0;i<nrows;i++) {
5409     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5410     ncol = PetscMax(pnnz[i],ncol);
5411     /* diag */
5412     dntotalcols += nlcols[i*2+0];
5413     /* off diag */
5414     ontotalcols += nlcols[i*2+1];
5415   }
5416   /* We do not need to figure the right number of columns
5417    * since all the calculations will be done by going through the raw data
5418    * */
5419   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5420   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5421   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5422   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5423   /* diag */
5424   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5425   /* off diag */
5426   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5427   /* diag */
5428   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5429   /* off diag */
5430   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5431   dntotalcols = 0;
5432   ontotalcols = 0;
5433   ntotalcols  = 0;
5434   for (i=0;i<nrows;i++) {
5435     owner = 0;
5436     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5437     /* Set iremote for diag matrix */
5438     for (j=0;j<nlcols[i*2+0];j++) {
5439       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5440       iremote[dntotalcols].rank    = owner;
5441       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5442       ilocal[dntotalcols++]        = ntotalcols++;
5443     }
5444     /* off diag */
5445     for (j=0;j<nlcols[i*2+1];j++) {
5446       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5447       oiremote[ontotalcols].rank    = owner;
5448       oilocal[ontotalcols++]        = ntotalcols++;
5449     }
5450   }
5451   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5452   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5453   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5454   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5455   /* P serves as roots and P_oth is leaves
5456    * Diag matrix
5457    * */
5458   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5459   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5460   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5461 
5462   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5463   /* Off diag */
5464   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5465   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5466   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5467   /* We operate on the matrix internal data for saving memory */
5468   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5469   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5470   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5471   /* Convert to global indices for diag matrix */
5472   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5473   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5474   /* We want P_oth store global indices */
5475   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5476   /* Use memory scalable approach */
5477   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5478   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5479   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5480   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5481   /* Convert back to local indices */
5482   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5483   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5484   nout = 0;
5485   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5486   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5487   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5488   /* Exchange values */
5489   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5490   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5491   /* Stop PETSc from shrinking memory */
5492   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5493   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5494   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5495   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5496   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5497   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5498   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5499   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5500   PetscFunctionReturn(0);
5501 }
5502 
5503 /*
5504  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5505  * This supports MPIAIJ and MAIJ
5506  * */
5507 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5508 {
5509   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5510   Mat_SeqAIJ            *p_oth;
5511   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5512   IS                    rows,map;
5513   PetscHMapI            hamp;
5514   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5515   MPI_Comm              comm;
5516   PetscSF               sf,osf;
5517   PetscBool             has;
5518   PetscErrorCode        ierr;
5519 
5520   PetscFunctionBegin;
5521   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5522   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5523   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5524    *  and then create a submatrix (that often is an overlapping matrix)
5525    * */
5526   if (reuse == MAT_INITIAL_MATRIX) {
5527     /* Use a hash table to figure out unique keys */
5528     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5529     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5530     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5531     count = 0;
5532     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5533     for (i=0;i<a->B->cmap->n;i++) {
5534       key  = a->garray[i]/dof;
5535       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5536       if (!has) {
5537         mapping[i] = count;
5538         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5539       } else {
5540         /* Current 'i' has the same value the previous step */
5541         mapping[i] = count-1;
5542       }
5543     }
5544     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5545     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5546     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5547     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5548     off = 0;
5549     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5550     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5551     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5552     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5553     /* In case, the matrix was already created but users want to recreate the matrix */
5554     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5555     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5556     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5557     ierr = ISDestroy(&map);CHKERRQ(ierr);
5558     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5559   } else if (reuse == MAT_REUSE_MATRIX) {
5560     /* If matrix was already created, we simply update values using SF objects
5561      * that as attached to the matrix ealier.
5562      *  */
5563     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5564     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5565     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5566     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5567     /* Update values in place */
5568     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5569     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5570     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5571     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5572   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5573   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5574   PetscFunctionReturn(0);
5575 }
5576 
5577 /*@C
5578     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5579 
5580     Collective on Mat
5581 
5582    Input Parameters:
5583 +    A,B - the matrices in mpiaij format
5584 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5585 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5586 
5587    Output Parameter:
5588 +    rowb, colb - index sets of rows and columns of B to extract
5589 -    B_seq - the sequential matrix generated
5590 
5591     Level: developer
5592 
5593 @*/
5594 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5595 {
5596   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5597   PetscErrorCode ierr;
5598   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5599   IS             isrowb,iscolb;
5600   Mat            *bseq=NULL;
5601 
5602   PetscFunctionBegin;
5603   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5604     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5605   }
5606   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5607 
5608   if (scall == MAT_INITIAL_MATRIX) {
5609     start = A->cmap->rstart;
5610     cmap  = a->garray;
5611     nzA   = a->A->cmap->n;
5612     nzB   = a->B->cmap->n;
5613     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5614     ncols = 0;
5615     for (i=0; i<nzB; i++) {  /* row < local row index */
5616       if (cmap[i] < start) idx[ncols++] = cmap[i];
5617       else break;
5618     }
5619     imark = i;
5620     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5621     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5622     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5623     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5624   } else {
5625     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5626     isrowb  = *rowb; iscolb = *colb;
5627     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5628     bseq[0] = *B_seq;
5629   }
5630   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5631   *B_seq = bseq[0];
5632   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5633   if (!rowb) {
5634     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5635   } else {
5636     *rowb = isrowb;
5637   }
5638   if (!colb) {
5639     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5640   } else {
5641     *colb = iscolb;
5642   }
5643   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5644   PetscFunctionReturn(0);
5645 }
5646 
5647 /*
5648     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5649     of the OFF-DIAGONAL portion of local A
5650 
5651     Collective on Mat
5652 
5653    Input Parameters:
5654 +    A,B - the matrices in mpiaij format
5655 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5656 
5657    Output Parameter:
5658 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5659 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5660 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5661 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5662 
5663     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5664      for this matrix. This is not desirable..
5665 
5666     Level: developer
5667 
5668 */
5669 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5670 {
5671   PetscErrorCode         ierr;
5672   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5673   Mat_SeqAIJ             *b_oth;
5674   VecScatter             ctx;
5675   MPI_Comm               comm;
5676   const PetscMPIInt      *rprocs,*sprocs;
5677   const PetscInt         *srow,*rstarts,*sstarts;
5678   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5679   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5680   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5681   MPI_Request            *rwaits = NULL,*swaits = NULL;
5682   MPI_Status             rstatus;
5683   PetscMPIInt            size,tag,rank,nsends_mpi,nrecvs_mpi;
5684   PETSC_UNUSED PetscMPIInt jj;
5685 
5686   PetscFunctionBegin;
5687   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5688   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5689 
5690   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5691     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5692   }
5693   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5694   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
5695 
5696   if (size == 1) {
5697     startsj_s = NULL;
5698     bufa_ptr  = NULL;
5699     *B_oth    = NULL;
5700     PetscFunctionReturn(0);
5701   }
5702 
5703   ctx = a->Mvctx;
5704   tag = ((PetscObject)ctx)->tag;
5705 
5706   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5707   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5708   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5709   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5710   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5711   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5712 
5713   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5714   if (scall == MAT_INITIAL_MATRIX) {
5715     /* i-array */
5716     /*---------*/
5717     /*  post receives */
5718     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5719     for (i=0; i<nrecvs; i++) {
5720       rowlen = rvalues + rstarts[i]*rbs;
5721       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5722       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5723     }
5724 
5725     /* pack the outgoing message */
5726     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5727 
5728     sstartsj[0] = 0;
5729     rstartsj[0] = 0;
5730     len         = 0; /* total length of j or a array to be sent */
5731     if (nsends) {
5732       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5733       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5734     }
5735     for (i=0; i<nsends; i++) {
5736       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5737       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5738       for (j=0; j<nrows; j++) {
5739         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5740         for (l=0; l<sbs; l++) {
5741           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5742 
5743           rowlen[j*sbs+l] = ncols;
5744 
5745           len += ncols;
5746           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5747         }
5748         k++;
5749       }
5750       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5751 
5752       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5753     }
5754     /* recvs and sends of i-array are completed */
5755     i = nrecvs;
5756     while (i--) {
5757       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr);
5758     }
5759     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5760     ierr = PetscFree(svalues);CHKERRQ(ierr);
5761 
5762     /* allocate buffers for sending j and a arrays */
5763     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5764     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5765 
5766     /* create i-array of B_oth */
5767     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5768 
5769     b_othi[0] = 0;
5770     len       = 0; /* total length of j or a array to be received */
5771     k         = 0;
5772     for (i=0; i<nrecvs; i++) {
5773       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5774       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5775       for (j=0; j<nrows; j++) {
5776         b_othi[k+1] = b_othi[k] + rowlen[j];
5777         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5778         k++;
5779       }
5780       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5781     }
5782     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5783 
5784     /* allocate space for j and a arrrays of B_oth */
5785     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5786     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5787 
5788     /* j-array */
5789     /*---------*/
5790     /*  post receives of j-array */
5791     for (i=0; i<nrecvs; i++) {
5792       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5793       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5794     }
5795 
5796     /* pack the outgoing message j-array */
5797     if (nsends) k = sstarts[0];
5798     for (i=0; i<nsends; i++) {
5799       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5800       bufJ  = bufj+sstartsj[i];
5801       for (j=0; j<nrows; j++) {
5802         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5803         for (ll=0; ll<sbs; ll++) {
5804           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5805           for (l=0; l<ncols; l++) {
5806             *bufJ++ = cols[l];
5807           }
5808           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5809         }
5810       }
5811       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5812     }
5813 
5814     /* recvs and sends of j-array are completed */
5815     i = nrecvs;
5816     while (i--) {
5817       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr);
5818     }
5819     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5820   } else if (scall == MAT_REUSE_MATRIX) {
5821     sstartsj = *startsj_s;
5822     rstartsj = *startsj_r;
5823     bufa     = *bufa_ptr;
5824     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5825     b_otha   = b_oth->a;
5826 #if defined(PETSC_HAVE_DEVICE)
5827     (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU;
5828 #endif
5829   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5830 
5831   /* a-array */
5832   /*---------*/
5833   /*  post receives of a-array */
5834   for (i=0; i<nrecvs; i++) {
5835     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5836     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5837   }
5838 
5839   /* pack the outgoing message a-array */
5840   if (nsends) k = sstarts[0];
5841   for (i=0; i<nsends; i++) {
5842     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5843     bufA  = bufa+sstartsj[i];
5844     for (j=0; j<nrows; j++) {
5845       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5846       for (ll=0; ll<sbs; ll++) {
5847         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5848         for (l=0; l<ncols; l++) {
5849           *bufA++ = vals[l];
5850         }
5851         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5852       }
5853     }
5854     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5855   }
5856   /* recvs and sends of a-array are completed */
5857   i = nrecvs;
5858   while (i--) {
5859     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr);
5860   }
5861   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5862   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5863 
5864   if (scall == MAT_INITIAL_MATRIX) {
5865     /* put together the new matrix */
5866     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5867 
5868     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5869     /* Since these are PETSc arrays, change flags to free them as necessary. */
5870     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5871     b_oth->free_a  = PETSC_TRUE;
5872     b_oth->free_ij = PETSC_TRUE;
5873     b_oth->nonew   = 0;
5874 
5875     ierr = PetscFree(bufj);CHKERRQ(ierr);
5876     if (!startsj_s || !bufa_ptr) {
5877       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5878       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5879     } else {
5880       *startsj_s = sstartsj;
5881       *startsj_r = rstartsj;
5882       *bufa_ptr  = bufa;
5883     }
5884   }
5885 
5886   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5887   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5888   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5889   PetscFunctionReturn(0);
5890 }
5891 
5892 /*@C
5893   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5894 
5895   Not Collective
5896 
5897   Input Parameters:
5898 . A - The matrix in mpiaij format
5899 
5900   Output Parameter:
5901 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5902 . colmap - A map from global column index to local index into lvec
5903 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5904 
5905   Level: developer
5906 
5907 @*/
5908 #if defined(PETSC_USE_CTABLE)
5909 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5910 #else
5911 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5912 #endif
5913 {
5914   Mat_MPIAIJ *a;
5915 
5916   PetscFunctionBegin;
5917   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5918   PetscValidPointer(lvec, 2);
5919   PetscValidPointer(colmap, 3);
5920   PetscValidPointer(multScatter, 4);
5921   a = (Mat_MPIAIJ*) A->data;
5922   if (lvec) *lvec = a->lvec;
5923   if (colmap) *colmap = a->colmap;
5924   if (multScatter) *multScatter = a->Mvctx;
5925   PetscFunctionReturn(0);
5926 }
5927 
5928 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5929 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5930 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5931 #if defined(PETSC_HAVE_MKL_SPARSE)
5932 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5933 #endif
5934 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5935 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5936 #if defined(PETSC_HAVE_ELEMENTAL)
5937 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5938 #endif
5939 #if defined(PETSC_HAVE_SCALAPACK)
5940 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5941 #endif
5942 #if defined(PETSC_HAVE_HYPRE)
5943 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5944 #endif
5945 #if defined(PETSC_HAVE_CUDA)
5946 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5947 #endif
5948 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5949 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5950 #endif
5951 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5952 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5953 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5954 
5955 /*
5956     Computes (B'*A')' since computing B*A directly is untenable
5957 
5958                n                       p                          p
5959         [             ]       [             ]         [                 ]
5960       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5961         [             ]       [             ]         [                 ]
5962 
5963 */
5964 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5965 {
5966   PetscErrorCode ierr;
5967   Mat            At,Bt,Ct;
5968 
5969   PetscFunctionBegin;
5970   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5971   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5972   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5973   ierr = MatDestroy(&At);CHKERRQ(ierr);
5974   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5975   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5976   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5977   PetscFunctionReturn(0);
5978 }
5979 
5980 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5981 {
5982   PetscErrorCode ierr;
5983   PetscBool      cisdense;
5984 
5985   PetscFunctionBegin;
5986   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5987   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
5988   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5989   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
5990   if (!cisdense) {
5991     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
5992   }
5993   ierr = MatSetUp(C);CHKERRQ(ierr);
5994 
5995   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5996   PetscFunctionReturn(0);
5997 }
5998 
5999 /* ----------------------------------------------------------------*/
6000 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6001 {
6002   Mat_Product *product = C->product;
6003   Mat         A = product->A,B=product->B;
6004 
6005   PetscFunctionBegin;
6006   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6007     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6008 
6009   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6010   C->ops->productsymbolic = MatProductSymbolic_AB;
6011   PetscFunctionReturn(0);
6012 }
6013 
6014 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6015 {
6016   PetscErrorCode ierr;
6017   Mat_Product    *product = C->product;
6018 
6019   PetscFunctionBegin;
6020   if (product->type == MATPRODUCT_AB) {
6021     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
6022   }
6023   PetscFunctionReturn(0);
6024 }
6025 /* ----------------------------------------------------------------*/
6026 
6027 /*MC
6028    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6029 
6030    Options Database Keys:
6031 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6032 
6033    Level: beginner
6034 
6035    Notes:
6036     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6037     in this case the values associated with the rows and columns one passes in are set to zero
6038     in the matrix
6039 
6040     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6041     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6042 
6043 .seealso: MatCreateAIJ()
6044 M*/
6045 
6046 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6047 {
6048   Mat_MPIAIJ     *b;
6049   PetscErrorCode ierr;
6050   PetscMPIInt    size;
6051 
6052   PetscFunctionBegin;
6053   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
6054 
6055   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6056   B->data       = (void*)b;
6057   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6058   B->assembled  = PETSC_FALSE;
6059   B->insertmode = NOT_SET_VALUES;
6060   b->size       = size;
6061 
6062   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
6063 
6064   /* build cache for off array entries formed */
6065   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6066 
6067   b->donotstash  = PETSC_FALSE;
6068   b->colmap      = NULL;
6069   b->garray      = NULL;
6070   b->roworiented = PETSC_TRUE;
6071 
6072   /* stuff used for matrix vector multiply */
6073   b->lvec  = NULL;
6074   b->Mvctx = NULL;
6075 
6076   /* stuff for MatGetRow() */
6077   b->rowindices   = NULL;
6078   b->rowvalues    = NULL;
6079   b->getrowactive = PETSC_FALSE;
6080 
6081   /* flexible pointer used in CUSPARSE classes */
6082   b->spptr = NULL;
6083 
6084   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6085   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6086   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6087   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6088   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6089   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6090   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6091   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6092   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6093   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6094 #if defined(PETSC_HAVE_CUDA)
6095   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr);
6096 #endif
6097 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6098   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr);
6099 #endif
6100 #if defined(PETSC_HAVE_MKL_SPARSE)
6101   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6102 #endif
6103   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6104   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6105   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6106 #if defined(PETSC_HAVE_ELEMENTAL)
6107   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6108 #endif
6109 #if defined(PETSC_HAVE_SCALAPACK)
6110   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6111 #endif
6112   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6113   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6114 #if defined(PETSC_HAVE_HYPRE)
6115   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6116   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6117 #endif
6118   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6119   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6120   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6121   PetscFunctionReturn(0);
6122 }
6123 
6124 /*@C
6125      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6126          and "off-diagonal" part of the matrix in CSR format.
6127 
6128    Collective
6129 
6130    Input Parameters:
6131 +  comm - MPI communicator
6132 .  m - number of local rows (Cannot be PETSC_DECIDE)
6133 .  n - This value should be the same as the local size used in creating the
6134        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6135        calculated if N is given) For square matrices n is almost always m.
6136 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6137 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6138 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6139 .   j - column indices
6140 .   a - matrix values
6141 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6142 .   oj - column indices
6143 -   oa - matrix values
6144 
6145    Output Parameter:
6146 .   mat - the matrix
6147 
6148    Level: advanced
6149 
6150    Notes:
6151        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6152        must free the arrays once the matrix has been destroyed and not before.
6153 
6154        The i and j indices are 0 based
6155 
6156        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6157 
6158        This sets local rows and cannot be used to set off-processor values.
6159 
6160        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6161        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6162        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6163        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6164        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6165        communication if it is known that only local entries will be set.
6166 
6167 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6168           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6169 @*/
6170 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6171 {
6172   PetscErrorCode ierr;
6173   Mat_MPIAIJ     *maij;
6174 
6175   PetscFunctionBegin;
6176   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6177   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6178   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6179   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6180   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6181   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6182   maij = (Mat_MPIAIJ*) (*mat)->data;
6183 
6184   (*mat)->preallocated = PETSC_TRUE;
6185 
6186   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6187   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6188 
6189   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6190   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6191 
6192   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6193   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6194   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6195   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6196 
6197   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6198   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6199   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6200   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6201   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6202   PetscFunctionReturn(0);
6203 }
6204 
6205 /*
6206     Special version for direct calls from Fortran
6207 */
6208 #include <petsc/private/fortranimpl.h>
6209 
6210 /* Change these macros so can be used in void function */
6211 #undef CHKERRQ
6212 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6213 #undef SETERRQ2
6214 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6215 #undef SETERRQ3
6216 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6217 #undef SETERRQ
6218 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6219 
6220 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6221 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6222 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6223 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6224 #else
6225 #endif
6226 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6227 {
6228   Mat            mat  = *mmat;
6229   PetscInt       m    = *mm, n = *mn;
6230   InsertMode     addv = *maddv;
6231   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6232   PetscScalar    value;
6233   PetscErrorCode ierr;
6234 
6235   MatCheckPreallocated(mat,1);
6236   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6237   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6238   {
6239     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6240     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6241     PetscBool roworiented = aij->roworiented;
6242 
6243     /* Some Variables required in the macro */
6244     Mat        A                    = aij->A;
6245     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6246     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6247     MatScalar  *aa                  = a->a;
6248     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6249     Mat        B                    = aij->B;
6250     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6251     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6252     MatScalar  *ba                  = b->a;
6253     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6254      * cannot use "#if defined" inside a macro. */
6255     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6256 
6257     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6258     PetscInt  nonew = a->nonew;
6259     MatScalar *ap1,*ap2;
6260 
6261     PetscFunctionBegin;
6262     for (i=0; i<m; i++) {
6263       if (im[i] < 0) continue;
6264       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6265       if (im[i] >= rstart && im[i] < rend) {
6266         row      = im[i] - rstart;
6267         lastcol1 = -1;
6268         rp1      = aj + ai[row];
6269         ap1      = aa + ai[row];
6270         rmax1    = aimax[row];
6271         nrow1    = ailen[row];
6272         low1     = 0;
6273         high1    = nrow1;
6274         lastcol2 = -1;
6275         rp2      = bj + bi[row];
6276         ap2      = ba + bi[row];
6277         rmax2    = bimax[row];
6278         nrow2    = bilen[row];
6279         low2     = 0;
6280         high2    = nrow2;
6281 
6282         for (j=0; j<n; j++) {
6283           if (roworiented) value = v[i*n+j];
6284           else value = v[i+j*m];
6285           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6286           if (in[j] >= cstart && in[j] < cend) {
6287             col = in[j] - cstart;
6288             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6289 #if defined(PETSC_HAVE_DEVICE)
6290             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6291 #endif
6292           } else if (in[j] < 0) continue;
6293           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6294             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6295             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6296           } else {
6297             if (mat->was_assembled) {
6298               if (!aij->colmap) {
6299                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6300               }
6301 #if defined(PETSC_USE_CTABLE)
6302               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6303               col--;
6304 #else
6305               col = aij->colmap[in[j]] - 1;
6306 #endif
6307               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6308                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6309                 col  =  in[j];
6310                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6311                 B        = aij->B;
6312                 b        = (Mat_SeqAIJ*)B->data;
6313                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6314                 rp2      = bj + bi[row];
6315                 ap2      = ba + bi[row];
6316                 rmax2    = bimax[row];
6317                 nrow2    = bilen[row];
6318                 low2     = 0;
6319                 high2    = nrow2;
6320                 bm       = aij->B->rmap->n;
6321                 ba       = b->a;
6322                 inserted = PETSC_FALSE;
6323               }
6324             } else col = in[j];
6325             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6326 #if defined(PETSC_HAVE_DEVICE)
6327             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6328 #endif
6329           }
6330         }
6331       } else if (!aij->donotstash) {
6332         if (roworiented) {
6333           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6334         } else {
6335           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6336         }
6337       }
6338     }
6339   }
6340   PetscFunctionReturnVoid();
6341 }
6342 
6343 typedef struct {
6344   Mat       *mp;    /* intermediate products */
6345   PetscBool *mptmp; /* is the intermediate product temporary ? */
6346   PetscInt  cp;     /* number of intermediate products */
6347 
6348   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6349   PetscInt    *startsj_s,*startsj_r;
6350   PetscScalar *bufa;
6351   Mat         P_oth;
6352 
6353   /* may take advantage of merging product->B */
6354   Mat Bloc;
6355 
6356   /* cusparse does not have support to split between symbolic and numeric phases
6357      When api_user is true, we don't need to update the numerical values
6358      of the temporary storage */
6359   PetscBool reusesym;
6360 
6361   /* support for COO values insertion */
6362   PetscScalar  *coo_v,*coo_w;
6363   PetscInt     **own;
6364   PetscInt     **off;
6365   PetscBool    hasoffproc; /* if true, non-local values insertion (i.e. AtB or PtAP) */
6366   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6367   PetscMemType mtype;
6368 
6369   /* customization */
6370   PetscBool abmerge;
6371   PetscBool P_oth_bind;
6372 } MatMatMPIAIJBACKEND;
6373 
6374 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6375 {
6376   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6377   PetscInt            i;
6378   PetscErrorCode      ierr;
6379 
6380   PetscFunctionBegin;
6381   ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr);
6382   ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr);
6383   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr);
6384   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr);
6385   ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr);
6386   ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr);
6387   ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr);
6388   for (i = 0; i < mmdata->cp; i++) {
6389     ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr);
6390   }
6391   ierr = PetscFree(mmdata->mp);CHKERRQ(ierr);
6392   ierr = PetscFree(mmdata->mptmp);CHKERRQ(ierr);
6393   ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr);
6394   ierr = PetscFree(mmdata->own);CHKERRQ(ierr);
6395   ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr);
6396   ierr = PetscFree(mmdata->off);CHKERRQ(ierr);
6397   ierr = PetscFree(mmdata);CHKERRQ(ierr);
6398   PetscFunctionReturn(0);
6399 }
6400 
6401 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6402 {
6403   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6404   PetscErrorCode ierr;
6405 
6406   PetscFunctionBegin;
6407   ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr);
6408   if (f) {
6409     ierr = (*f)(A,n,idx,v);CHKERRQ(ierr);
6410   } else {
6411     const PetscScalar *vv;
6412 
6413     ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr);
6414     if (n && idx) {
6415       PetscScalar    *w = v;
6416       const PetscInt *oi = idx;
6417       PetscInt       j;
6418 
6419       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6420     } else {
6421       ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr);
6422     }
6423     ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr);
6424   }
6425   PetscFunctionReturn(0);
6426 }
6427 
6428 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6429 {
6430   MatMatMPIAIJBACKEND *mmdata;
6431   PetscInt            i,n_d,n_o;
6432   PetscErrorCode      ierr;
6433 
6434   PetscFunctionBegin;
6435   MatCheckProduct(C,1);
6436   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6437   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6438   if (!mmdata->reusesym) { /* update temporary matrices */
6439     if (mmdata->P_oth) {
6440       ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6441     }
6442     if (mmdata->Bloc) {
6443       ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr);
6444     }
6445   }
6446   mmdata->reusesym = PETSC_FALSE;
6447 
6448   for (i = 0; i < mmdata->cp; i++) {
6449     if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6450     ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr);
6451   }
6452   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6453     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6454 
6455     if (mmdata->mptmp[i]) continue;
6456     if (noff) {
6457       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6458 
6459       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr);
6460       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr);
6461       n_o += noff;
6462       n_d += nown;
6463     } else {
6464       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6465 
6466       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr);
6467       n_d += mm->nz;
6468     }
6469   }
6470   if (mmdata->hasoffproc) { /* offprocess insertion */
6471     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6472     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6473   }
6474   ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr);
6475   PetscFunctionReturn(0);
6476 }
6477 
6478 /* Support for Pt * A, A * P, or Pt * A * P */
6479 #define MAX_NUMBER_INTERMEDIATE 4
6480 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6481 {
6482   Mat_Product            *product = C->product;
6483   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE];
6484   Mat_MPIAIJ             *a,*p;
6485   MatMatMPIAIJBACKEND    *mmdata;
6486   ISLocalToGlobalMapping P_oth_l2g = NULL;
6487   IS                     glob = NULL;
6488   const char             *prefix;
6489   char                   pprefix[256];
6490   const PetscInt         *globidx,*P_oth_idx;
6491   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE];
6492   PetscInt               cp = 0,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j,cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE],i,j;
6493   MatProductType         ptype;
6494   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6495   PetscMPIInt            size;
6496   PetscErrorCode         ierr;
6497 
6498   PetscFunctionBegin;
6499   MatCheckProduct(C,1);
6500   if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6501   ptype = product->type;
6502   if (product->A->symmetric && ptype == MATPRODUCT_AtB) ptype = MATPRODUCT_AB;
6503   switch (ptype) {
6504   case MATPRODUCT_AB:
6505     A = product->A;
6506     P = product->B;
6507     m = A->rmap->n;
6508     n = P->cmap->n;
6509     M = A->rmap->N;
6510     N = P->cmap->N;
6511     break;
6512   case MATPRODUCT_AtB:
6513     P = product->A;
6514     A = product->B;
6515     m = P->cmap->n;
6516     n = A->cmap->n;
6517     M = P->cmap->N;
6518     N = A->cmap->N;
6519     hasoffproc = PETSC_TRUE;
6520     break;
6521   case MATPRODUCT_PtAP:
6522     A = product->A;
6523     P = product->B;
6524     m = P->cmap->n;
6525     n = P->cmap->n;
6526     M = P->cmap->N;
6527     N = P->cmap->N;
6528     hasoffproc = PETSC_TRUE;
6529     break;
6530   default:
6531     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6532   }
6533   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr);
6534   if (size == 1) hasoffproc = PETSC_FALSE;
6535 
6536   /* defaults */
6537   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6538     mp[i]    = NULL;
6539     mptmp[i] = PETSC_FALSE;
6540     rmapt[i] = -1;
6541     cmapt[i] = -1;
6542     rmapa[i] = NULL;
6543     cmapa[i] = NULL;
6544   }
6545 
6546   /* customization */
6547   ierr = PetscNew(&mmdata);CHKERRQ(ierr);
6548   mmdata->reusesym = product->api_user;
6549   if (ptype == MATPRODUCT_AB) {
6550     if (product->api_user) {
6551       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
6552       ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6553       ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6554       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6555     } else {
6556       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
6557       ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6558       ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6559       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6560     }
6561   } else if (ptype == MATPRODUCT_PtAP) {
6562     if (product->api_user) {
6563       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
6564       ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6565       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6566     } else {
6567       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
6568       ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6569       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6570     }
6571   }
6572   a = (Mat_MPIAIJ*)A->data;
6573   p = (Mat_MPIAIJ*)P->data;
6574   ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr);
6575   ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr);
6576   ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr);
6577   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6578   ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr);
6579   switch (ptype) {
6580   case MATPRODUCT_AB: /* A * P */
6581     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6582 
6583     if (mmdata->abmerge) { /* A_diag * P_loc and A_off * P_oth */
6584       /* P is product->B */
6585       ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6586       ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6587       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6588       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6589       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6590       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6591       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6592       mp[cp]->product->api_user = product->api_user;
6593       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6594       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6595       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6596       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6597       rmapt[cp] = 1;
6598       cmapt[cp] = 2;
6599       cmapa[cp] = globidx;
6600       mptmp[cp] = PETSC_FALSE;
6601       cp++;
6602     } else { /* A_diag * P_diag and A_diag * P_off and A_off * P_oth */
6603       ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr);
6604       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6605       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6606       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6607       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6608       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6609       mp[cp]->product->api_user = product->api_user;
6610       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6611       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6612       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6613       rmapt[cp] = 1;
6614       cmapt[cp] = 1;
6615       mptmp[cp] = PETSC_FALSE;
6616       cp++;
6617       ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr);
6618       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6619       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6620       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6621       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6622       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6623       mp[cp]->product->api_user = product->api_user;
6624       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6625       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6626       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6627       rmapt[cp] = 1;
6628       cmapt[cp] = 2;
6629       cmapa[cp] = p->garray;
6630       mptmp[cp] = PETSC_FALSE;
6631       cp++;
6632     }
6633     if (mmdata->P_oth) {
6634       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr);
6635       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6636       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6637       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6638       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6639       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6640       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6641       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6642       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6643       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6644       mp[cp]->product->api_user = product->api_user;
6645       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6646       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6647       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6648       rmapt[cp] = 1;
6649       cmapt[cp] = 2;
6650       cmapa[cp] = P_oth_idx;
6651       mptmp[cp] = PETSC_FALSE;
6652       cp++;
6653     }
6654     break;
6655   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
6656     /* A is product->B */
6657     ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6658     if (A == P) {
6659       ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6660       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6661       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6662       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6663       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6664       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6665       mp[cp]->product->api_user = product->api_user;
6666       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6667       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6668       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6669       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6670       rmapt[cp] = 2;
6671       rmapa[cp] = globidx;
6672       cmapt[cp] = 2;
6673       cmapa[cp] = globidx;
6674       mptmp[cp] = PETSC_FALSE;
6675       cp++;
6676     } else {
6677       ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6678       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6679       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6680       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6681       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6682       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6683       mp[cp]->product->api_user = product->api_user;
6684       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6685       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6686       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6687       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6688       rmapt[cp] = 1;
6689       cmapt[cp] = 2;
6690       cmapa[cp] = globidx;
6691       mptmp[cp] = PETSC_FALSE;
6692       cp++;
6693       ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6694       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6695       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6696       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6697       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6698       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6699       mp[cp]->product->api_user = product->api_user;
6700       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6701       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6702       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6703       rmapt[cp] = 2;
6704       rmapa[cp] = p->garray;
6705       cmapt[cp] = 2;
6706       cmapa[cp] = globidx;
6707       mptmp[cp] = PETSC_FALSE;
6708       cp++;
6709     }
6710     break;
6711   case MATPRODUCT_PtAP:
6712     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6713     /* P is product->B */
6714     ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6715     ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6716     ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr);
6717     ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6718     ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6719     ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6720     ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6721     mp[cp]->product->api_user = product->api_user;
6722     ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6723     if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6724     ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6725     ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6726     rmapt[cp] = 2;
6727     rmapa[cp] = globidx;
6728     cmapt[cp] = 2;
6729     cmapa[cp] = globidx;
6730     mptmp[cp] = PETSC_FALSE;
6731     cp++;
6732     if (mmdata->P_oth) {
6733       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr);
6734       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6735       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6736       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6737       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6738       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6739       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6740       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6741       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6742       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6743       mp[cp]->product->api_user = product->api_user;
6744       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6745       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6746       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6747       mptmp[cp] = PETSC_TRUE;
6748       cp++;
6749       ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr);
6750       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6751       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6752       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6753       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6754       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6755       mp[cp]->product->api_user = product->api_user;
6756       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6757       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6758       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6759       rmapt[cp] = 2;
6760       rmapa[cp] = globidx;
6761       cmapt[cp] = 2;
6762       cmapa[cp] = P_oth_idx;
6763       mptmp[cp] = PETSC_FALSE;
6764       cp++;
6765     }
6766     break;
6767   default:
6768     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6769   }
6770   /* sanity check */
6771   if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i);
6772 
6773   ierr = PetscMalloc1(cp,&mmdata->mp);CHKERRQ(ierr);
6774   for (i = 0; i < cp; i++) mmdata->mp[i] = mp[i];
6775   ierr = PetscMalloc1(cp,&mmdata->mptmp);CHKERRQ(ierr);
6776   for (i = 0; i < cp; i++) mmdata->mptmp[i] = mptmp[i];
6777   mmdata->cp = cp;
6778   C->product->data       = mmdata;
6779   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
6780   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
6781 
6782   /* memory type */
6783   mmdata->mtype = PETSC_MEMTYPE_HOST;
6784   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr);
6785   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr);
6786   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
6787   // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented
6788   //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE;
6789 
6790   /* prepare coo coordinates for values insertion */
6791   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
6792     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6793     if (mptmp[cp]) continue;
6794     if (rmapt[cp] == 2 && hasoffproc) {
6795       const PetscInt *rmap = rmapa[cp];
6796       const PetscInt mr = mp[cp]->rmap->n;
6797       const PetscInt rs = C->rmap->rstart;
6798       const PetscInt re = C->rmap->rend;
6799       const PetscInt *ii  = mm->i;
6800       for (i = 0; i < mr; i++) {
6801         const PetscInt gr = rmap[i];
6802         const PetscInt nz = ii[i+1] - ii[i];
6803         if (gr < rs || gr >= re) ncoo_o += nz;
6804         else ncoo_oown += nz;
6805       }
6806     } else ncoo_d += mm->nz;
6807   }
6808   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr);
6809   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr);
6810   if (hasoffproc) { /* handle offproc values insertion */
6811     PetscSF  msf;
6812     PetscInt ncoo2,*coo_i2,*coo_j2;
6813 
6814     ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr);
6815     ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr);
6816     ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr);
6817     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
6818       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6819       PetscInt   *idxoff = mmdata->off[cp];
6820       PetscInt   *idxown = mmdata->own[cp];
6821       if (!mptmp[cp] && rmapt[cp] == 2) {
6822         const PetscInt *rmap = rmapa[cp];
6823         const PetscInt *cmap = cmapa[cp];
6824         const PetscInt *ii  = mm->i;
6825         PetscInt       *coi = coo_i + ncoo_o;
6826         PetscInt       *coj = coo_j + ncoo_o;
6827         const PetscInt mr = mp[cp]->rmap->n;
6828         const PetscInt rs = C->rmap->rstart;
6829         const PetscInt re = C->rmap->rend;
6830         const PetscInt cs = C->cmap->rstart;
6831         for (i = 0; i < mr; i++) {
6832           const PetscInt *jj = mm->j + ii[i];
6833           const PetscInt gr  = rmap[i];
6834           const PetscInt nz  = ii[i+1] - ii[i];
6835           if (gr < rs || gr >= re) {
6836             for (j = ii[i]; j < ii[i+1]; j++) {
6837               *coi++ = gr;
6838               *idxoff++ = j;
6839             }
6840             if (!cmapt[cp]) { /* already global */
6841               for (j = 0; j < nz; j++) *coj++ = jj[j];
6842             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6843               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6844             } else { /* offdiag */
6845               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6846             }
6847             ncoo_o += nz;
6848           } else {
6849             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
6850           }
6851         }
6852       }
6853       mmdata->off[cp + 1] = idxoff;
6854       mmdata->own[cp + 1] = idxown;
6855     }
6856 
6857     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6858     ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o,NULL,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr);
6859     ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr);
6860     ierr = PetscSFGetGraph(msf,&ncoo2,NULL,NULL,NULL);CHKERRQ(ierr);
6861     ncoo = ncoo_d + ncoo_oown + ncoo2;
6862     ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr);
6863     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6864     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6865     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6866     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6867     ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6868     ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr);
6869     coo_i = coo_i2;
6870     coo_j = coo_j2;
6871   } else { /* no offproc values insertion */
6872     ncoo = ncoo_d;
6873     ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr);
6874 
6875     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6876     ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr);
6877     ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr);
6878   }
6879   mmdata->hasoffproc = hasoffproc;
6880 
6881   /* on-process indices */
6882   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
6883     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
6884     PetscInt       *coi = coo_i + ncoo_d;
6885     PetscInt       *coj = coo_j + ncoo_d;
6886     const PetscInt *jj  = mm->j;
6887     const PetscInt *ii  = mm->i;
6888     const PetscInt *cmap = cmapa[cp];
6889     const PetscInt *rmap = rmapa[cp];
6890     const PetscInt mr = mp[cp]->rmap->n;
6891     const PetscInt rs = C->rmap->rstart;
6892     const PetscInt re = C->rmap->rend;
6893     const PetscInt cs = C->cmap->rstart;
6894 
6895     if (mptmp[cp]) continue;
6896     if (rmapt[cp] == 1) {
6897       for (i = 0; i < mr; i++) {
6898         const PetscInt gr = i + rs;
6899         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
6900       }
6901       /* columns coo */
6902       if (!cmapt[cp]) {
6903         ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr);
6904       } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6905         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs;
6906       } else { /* offdiag */
6907         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
6908       }
6909       ncoo_d += mm->nz;
6910     } else if (rmapt[cp] == 2) {
6911       for (i = 0; i < mr; i++) {
6912         const PetscInt *jj = mm->j + ii[i];
6913         const PetscInt gr  = rmap[i];
6914         const PetscInt nz  = ii[i+1] - ii[i];
6915         if (gr >= rs && gr < re) {
6916           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
6917           if (!cmapt[cp]) { /* already global */
6918             for (j = 0; j < nz; j++) *coj++ = jj[j];
6919           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6920             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6921           } else { /* offdiag */
6922             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6923           }
6924           ncoo_d += nz;
6925         }
6926       }
6927     }
6928   }
6929   if (glob) {
6930     ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr);
6931   }
6932   ierr = ISDestroy(&glob);CHKERRQ(ierr);
6933   if (P_oth_l2g) {
6934     ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6935   }
6936   ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr);
6937   ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr);
6938 
6939   /* preallocate with COO data */
6940   ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr);
6941   ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6942   PetscFunctionReturn(0);
6943 }
6944 
6945 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
6946 {
6947   Mat_Product    *product = mat->product;
6948   PetscErrorCode ierr;
6949 #if defined(PETSC_HAVE_DEVICE)
6950   PetscBool      match = PETSC_FALSE;
6951   PetscBool      usecpu = PETSC_FALSE;
6952 #else
6953   PetscBool      match = PETSC_TRUE;
6954 #endif
6955 
6956   PetscFunctionBegin;
6957   MatCheckProduct(mat,1);
6958 #if defined(PETSC_HAVE_DEVICE)
6959   if (!product->A->boundtocpu && !product->B->boundtocpu) {
6960     ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr);
6961   }
6962   if (match) { /* we can always fallback to CPU in case an operation is not performing on the device */
6963     switch (product->type) {
6964     case MATPRODUCT_AB:
6965       if (product->api_user) {
6966         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
6967         ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6968         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6969       } else {
6970         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
6971         ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6972         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6973       }
6974       break;
6975     case MATPRODUCT_AtB:
6976       if (product->api_user) {
6977         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr);
6978         ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6979         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6980       } else {
6981         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr);
6982         ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6983         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6984       }
6985       break;
6986     case MATPRODUCT_PtAP:
6987       if (product->api_user) {
6988         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
6989         ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6990         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6991       } else {
6992         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
6993         ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6994         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6995       }
6996       break;
6997     default:
6998       break;
6999     }
7000     match = (PetscBool)!usecpu;
7001   }
7002 #endif
7003   if (match) {
7004     switch (product->type) {
7005     case MATPRODUCT_AB:
7006     case MATPRODUCT_AtB:
7007     case MATPRODUCT_PtAP:
7008       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7009       break;
7010     default:
7011       break;
7012     }
7013   }
7014   /* fallback to MPIAIJ ops */
7015   if (!mat->ops->productsymbolic) {
7016     ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr);
7017   }
7018   PetscFunctionReturn(0);
7019 }
7020