xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 5f80ce2ab25dff0f4601e710601cbbcecf323266)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50 
51   PetscFunctionBegin;
52 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
53   A->boundtocpu = flg;
54 #endif
55   if (a->A) {
56     CHKERRQ(MatBindToCPU(a->A,flg));
57   }
58   if (a->B) {
59     CHKERRQ(MatBindToCPU(a->B,flg));
60   }
61 
62   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
63    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
64    * to differ from the parent matrix. */
65   if (a->lvec) {
66     CHKERRQ(VecBindToCPU(a->lvec,flg));
67   }
68   if (a->diag) {
69     CHKERRQ(VecBindToCPU(a->diag,flg));
70   }
71 
72   PetscFunctionReturn(0);
73 }
74 
75 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
76 {
77   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
78 
79   PetscFunctionBegin;
80   if (mat->A) {
81     CHKERRQ(MatSetBlockSizes(mat->A,rbs,cbs));
82     CHKERRQ(MatSetBlockSizes(mat->B,rbs,1));
83   }
84   PetscFunctionReturn(0);
85 }
86 
87 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
88 {
89   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
90   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
91   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
92   const PetscInt  *ia,*ib;
93   const MatScalar *aa,*bb,*aav,*bav;
94   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
95   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
96 
97   PetscFunctionBegin;
98   *keptrows = NULL;
99 
100   ia   = a->i;
101   ib   = b->i;
102   CHKERRQ(MatSeqAIJGetArrayRead(mat->A,&aav));
103   CHKERRQ(MatSeqAIJGetArrayRead(mat->B,&bav));
104   for (i=0; i<m; i++) {
105     na = ia[i+1] - ia[i];
106     nb = ib[i+1] - ib[i];
107     if (!na && !nb) {
108       cnt++;
109       goto ok1;
110     }
111     aa = aav + ia[i];
112     for (j=0; j<na; j++) {
113       if (aa[j] != 0.0) goto ok1;
114     }
115     bb = bav + ib[i];
116     for (j=0; j <nb; j++) {
117       if (bb[j] != 0.0) goto ok1;
118     }
119     cnt++;
120 ok1:;
121   }
122   CHKERRMPI(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M)));
123   if (!n0rows) {
124     CHKERRQ(MatSeqAIJRestoreArrayRead(mat->A,&aav));
125     CHKERRQ(MatSeqAIJRestoreArrayRead(mat->B,&bav));
126     PetscFunctionReturn(0);
127   }
128   CHKERRQ(PetscMalloc1(M->rmap->n-cnt,&rows));
129   cnt  = 0;
130   for (i=0; i<m; i++) {
131     na = ia[i+1] - ia[i];
132     nb = ib[i+1] - ib[i];
133     if (!na && !nb) continue;
134     aa = aav + ia[i];
135     for (j=0; j<na;j++) {
136       if (aa[j] != 0.0) {
137         rows[cnt++] = rstart + i;
138         goto ok2;
139       }
140     }
141     bb = bav + ib[i];
142     for (j=0; j<nb; j++) {
143       if (bb[j] != 0.0) {
144         rows[cnt++] = rstart + i;
145         goto ok2;
146       }
147     }
148 ok2:;
149   }
150   CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows));
151   CHKERRQ(MatSeqAIJRestoreArrayRead(mat->A,&aav));
152   CHKERRQ(MatSeqAIJRestoreArrayRead(mat->B,&bav));
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
157 {
158   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
159   PetscBool         cong;
160 
161   PetscFunctionBegin;
162   CHKERRQ(MatHasCongruentLayouts(Y,&cong));
163   if (Y->assembled && cong) {
164     CHKERRQ(MatDiagonalSet(aij->A,D,is));
165   } else {
166     CHKERRQ(MatDiagonalSet_Default(Y,D,is));
167   }
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
172 {
173   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
174   PetscInt       i,rstart,nrows,*rows;
175 
176   PetscFunctionBegin;
177   *zrows = NULL;
178   CHKERRQ(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows));
179   CHKERRQ(MatGetOwnershipRange(M,&rstart,NULL));
180   for (i=0; i<nrows; i++) rows[i] += rstart;
181   CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows));
182   PetscFunctionReturn(0);
183 }
184 
185 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
186 {
187   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
188   PetscInt          i,m,n,*garray = aij->garray;
189   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
190   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
191   PetscReal         *work;
192   const PetscScalar *dummy;
193 
194   PetscFunctionBegin;
195   CHKERRQ(MatGetSize(A,&m,&n));
196   CHKERRQ(PetscCalloc1(n,&work));
197   CHKERRQ(MatSeqAIJGetArrayRead(aij->A,&dummy));
198   CHKERRQ(MatSeqAIJRestoreArrayRead(aij->A,&dummy));
199   CHKERRQ(MatSeqAIJGetArrayRead(aij->B,&dummy));
200   CHKERRQ(MatSeqAIJRestoreArrayRead(aij->B,&dummy));
201   if (type == NORM_2) {
202     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
203       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
204     }
205     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
206       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
207     }
208   } else if (type == NORM_1) {
209     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
210       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
211     }
212     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
213       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
214     }
215   } else if (type == NORM_INFINITY) {
216     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
217       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
218     }
219     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
220       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
221     }
222   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
223     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
224       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
225     }
226     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
227       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
228     }
229   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
230     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
231       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
232     }
233     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
234       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
235     }
236   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
237   if (type == NORM_INFINITY) {
238     CHKERRMPI(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
239   } else {
240     CHKERRMPI(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
241   }
242   CHKERRQ(PetscFree(work));
243   if (type == NORM_2) {
244     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
245   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
246     for (i=0; i<n; i++) reductions[i] /= m;
247   }
248   PetscFunctionReturn(0);
249 }
250 
251 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
252 {
253   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
254   IS              sis,gis;
255   const PetscInt  *isis,*igis;
256   PetscInt        n,*iis,nsis,ngis,rstart,i;
257 
258   PetscFunctionBegin;
259   CHKERRQ(MatFindOffBlockDiagonalEntries(a->A,&sis));
260   CHKERRQ(MatFindNonzeroRows(a->B,&gis));
261   CHKERRQ(ISGetSize(gis,&ngis));
262   CHKERRQ(ISGetSize(sis,&nsis));
263   CHKERRQ(ISGetIndices(sis,&isis));
264   CHKERRQ(ISGetIndices(gis,&igis));
265 
266   CHKERRQ(PetscMalloc1(ngis+nsis,&iis));
267   CHKERRQ(PetscArraycpy(iis,igis,ngis));
268   CHKERRQ(PetscArraycpy(iis+ngis,isis,nsis));
269   n    = ngis + nsis;
270   CHKERRQ(PetscSortRemoveDupsInt(&n,iis));
271   CHKERRQ(MatGetOwnershipRange(A,&rstart,NULL));
272   for (i=0; i<n; i++) iis[i] += rstart;
273   CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is));
274 
275   CHKERRQ(ISRestoreIndices(sis,&isis));
276   CHKERRQ(ISRestoreIndices(gis,&igis));
277   CHKERRQ(ISDestroy(&sis));
278   CHKERRQ(ISDestroy(&gis));
279   PetscFunctionReturn(0);
280 }
281 
282 /*
283   Local utility routine that creates a mapping from the global column
284 number to the local number in the off-diagonal part of the local
285 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
286 a slightly higher hash table cost; without it it is not scalable (each processor
287 has an order N integer array but is fast to access.
288 */
289 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
290 {
291   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
292   PetscInt       n = aij->B->cmap->n,i;
293 
294   PetscFunctionBegin;
295   PetscCheckFalse(n && !aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
296 #if defined(PETSC_USE_CTABLE)
297   CHKERRQ(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap));
298   for (i=0; i<n; i++) {
299     CHKERRQ(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES));
300   }
301 #else
302   CHKERRQ(PetscCalloc1(mat->cmap->N+1,&aij->colmap));
303   CHKERRQ(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt)));
304   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
305 #endif
306   PetscFunctionReturn(0);
307 }
308 
309 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
310 { \
311     if (col <= lastcol1)  low1 = 0;     \
312     else                 high1 = nrow1; \
313     lastcol1 = col;\
314     while (high1-low1 > 5) { \
315       t = (low1+high1)/2; \
316       if (rp1[t] > col) high1 = t; \
317       else              low1  = t; \
318     } \
319       for (_i=low1; _i<high1; _i++) { \
320         if (rp1[_i] > col) break; \
321         if (rp1[_i] == col) { \
322           if (addv == ADD_VALUES) { \
323             ap1[_i] += value;   \
324             /* Not sure LogFlops will slow dow the code or not */ \
325             (void)PetscLogFlops(1.0);   \
326            } \
327           else                    ap1[_i] = value; \
328           goto a_noinsert; \
329         } \
330       }  \
331       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
332       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
333       PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
334       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
335       N = nrow1++ - 1; a->nz++; high1++; \
336       /* shift up all the later entries in this row */ \
337       CHKERRQ(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\
338       CHKERRQ(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\
339       rp1[_i] = col;  \
340       ap1[_i] = value;  \
341       A->nonzerostate++;\
342       a_noinsert: ; \
343       ailen[row] = nrow1; \
344 }
345 
346 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
347   { \
348     if (col <= lastcol2) low2 = 0;                        \
349     else high2 = nrow2;                                   \
350     lastcol2 = col;                                       \
351     while (high2-low2 > 5) {                              \
352       t = (low2+high2)/2;                                 \
353       if (rp2[t] > col) high2 = t;                        \
354       else             low2  = t;                         \
355     }                                                     \
356     for (_i=low2; _i<high2; _i++) {                       \
357       if (rp2[_i] > col) break;                           \
358       if (rp2[_i] == col) {                               \
359         if (addv == ADD_VALUES) {                         \
360           ap2[_i] += value;                               \
361           (void)PetscLogFlops(1.0);                       \
362         }                                                 \
363         else                    ap2[_i] = value;          \
364         goto b_noinsert;                                  \
365       }                                                   \
366     }                                                     \
367     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
368     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
369     PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
370     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
371     N = nrow2++ - 1; b->nz++; high2++;                    \
372     /* shift up all the later entries in this row */      \
373     CHKERRQ(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\
374     CHKERRQ(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\
375     rp2[_i] = col;                                        \
376     ap2[_i] = value;                                      \
377     B->nonzerostate++;                                    \
378     b_noinsert: ;                                         \
379     bilen[row] = nrow2;                                   \
380   }
381 
382 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
383 {
384   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
385   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
386   PetscInt       l,*garray = mat->garray,diag;
387   PetscScalar    *aa,*ba;
388 
389   PetscFunctionBegin;
390   /* code only works for square matrices A */
391 
392   /* find size of row to the left of the diagonal part */
393   CHKERRQ(MatGetOwnershipRange(A,&diag,NULL));
394   row  = row - diag;
395   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
396     if (garray[b->j[b->i[row]+l]] > diag) break;
397   }
398   if (l) {
399     CHKERRQ(MatSeqAIJGetArray(mat->B,&ba));
400     CHKERRQ(PetscArraycpy(ba+b->i[row],v,l));
401     CHKERRQ(MatSeqAIJRestoreArray(mat->B,&ba));
402   }
403 
404   /* diagonal part */
405   if (a->i[row+1]-a->i[row]) {
406     CHKERRQ(MatSeqAIJGetArray(mat->A,&aa));
407     CHKERRQ(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row])));
408     CHKERRQ(MatSeqAIJRestoreArray(mat->A,&aa));
409   }
410 
411   /* right of diagonal part */
412   if (b->i[row+1]-b->i[row]-l) {
413     CHKERRQ(MatSeqAIJGetArray(mat->B,&ba));
414     CHKERRQ(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l));
415     CHKERRQ(MatSeqAIJRestoreArray(mat->B,&ba));
416   }
417   PetscFunctionReturn(0);
418 }
419 
420 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
421 {
422   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
423   PetscScalar    value = 0.0;
424   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
425   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
426   PetscBool      roworiented = aij->roworiented;
427 
428   /* Some Variables required in the macro */
429   Mat        A                    = aij->A;
430   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
431   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
432   PetscBool  ignorezeroentries    = a->ignorezeroentries;
433   Mat        B                    = aij->B;
434   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
435   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
436   MatScalar  *aa,*ba;
437   PetscInt   *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
438   PetscInt   nonew;
439   MatScalar  *ap1,*ap2;
440 
441   PetscFunctionBegin;
442   CHKERRQ(MatSeqAIJGetArray(A,&aa));
443   CHKERRQ(MatSeqAIJGetArray(B,&ba));
444   for (i=0; i<m; i++) {
445     if (im[i] < 0) continue;
446     PetscCheckFalse(im[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
447     if (im[i] >= rstart && im[i] < rend) {
448       row      = im[i] - rstart;
449       lastcol1 = -1;
450       rp1      = aj + ai[row];
451       ap1      = aa + ai[row];
452       rmax1    = aimax[row];
453       nrow1    = ailen[row];
454       low1     = 0;
455       high1    = nrow1;
456       lastcol2 = -1;
457       rp2      = bj + bi[row];
458       ap2      = ba + bi[row];
459       rmax2    = bimax[row];
460       nrow2    = bilen[row];
461       low2     = 0;
462       high2    = nrow2;
463 
464       for (j=0; j<n; j++) {
465         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
466         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
467         if (in[j] >= cstart && in[j] < cend) {
468           col   = in[j] - cstart;
469           nonew = a->nonew;
470           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
471         } else if (in[j] < 0) continue;
472         else PetscCheckFalse(in[j] >= mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
473         else {
474           if (mat->was_assembled) {
475             if (!aij->colmap) {
476               CHKERRQ(MatCreateColmap_MPIAIJ_Private(mat));
477             }
478 #if defined(PETSC_USE_CTABLE)
479             CHKERRQ(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */
480             col--;
481 #else
482             col = aij->colmap[in[j]] - 1;
483 #endif
484             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
485               CHKERRQ(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */
486               col  =  in[j];
487               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
488               B        = aij->B;
489               b        = (Mat_SeqAIJ*)B->data;
490               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
491               rp2      = bj + bi[row];
492               ap2      = ba + bi[row];
493               rmax2    = bimax[row];
494               nrow2    = bilen[row];
495               low2     = 0;
496               high2    = nrow2;
497               bm       = aij->B->rmap->n;
498               ba       = b->a;
499             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
500               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
501                 CHKERRQ(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j]));
502               } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
503             }
504           } else col = in[j];
505           nonew = b->nonew;
506           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
507         }
508       }
509     } else {
510       PetscCheckFalse(mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
511       if (!aij->donotstash) {
512         mat->assembled = PETSC_FALSE;
513         if (roworiented) {
514           CHKERRQ(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
515         } else {
516           CHKERRQ(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
517         }
518       }
519     }
520   }
521   CHKERRQ(MatSeqAIJRestoreArray(A,&aa));
522   CHKERRQ(MatSeqAIJRestoreArray(B,&ba));
523   PetscFunctionReturn(0);
524 }
525 
526 /*
527     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
528     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
529     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
530 */
531 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
532 {
533   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
534   Mat            A           = aij->A; /* diagonal part of the matrix */
535   Mat            B           = aij->B; /* offdiagonal part of the matrix */
536   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
537   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
538   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
539   PetscInt       *ailen      = a->ilen,*aj = a->j;
540   PetscInt       *bilen      = b->ilen,*bj = b->j;
541   PetscInt       am          = aij->A->rmap->n,j;
542   PetscInt       diag_so_far = 0,dnz;
543   PetscInt       offd_so_far = 0,onz;
544 
545   PetscFunctionBegin;
546   /* Iterate over all rows of the matrix */
547   for (j=0; j<am; j++) {
548     dnz = onz = 0;
549     /*  Iterate over all non-zero columns of the current row */
550     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
551       /* If column is in the diagonal */
552       if (mat_j[col] >= cstart && mat_j[col] < cend) {
553         aj[diag_so_far++] = mat_j[col] - cstart;
554         dnz++;
555       } else { /* off-diagonal entries */
556         bj[offd_so_far++] = mat_j[col];
557         onz++;
558       }
559     }
560     ailen[j] = dnz;
561     bilen[j] = onz;
562   }
563   PetscFunctionReturn(0);
564 }
565 
566 /*
567     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
568     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
569     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
570     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
571     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
572 */
573 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
574 {
575   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
576   Mat            A      = aij->A; /* diagonal part of the matrix */
577   Mat            B      = aij->B; /* offdiagonal part of the matrix */
578   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
579   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
580   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
581   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
582   PetscInt       *ailen = a->ilen,*aj = a->j;
583   PetscInt       *bilen = b->ilen,*bj = b->j;
584   PetscInt       am     = aij->A->rmap->n,j;
585   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
586   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
587   PetscScalar    *aa = a->a,*ba = b->a;
588 
589   PetscFunctionBegin;
590   /* Iterate over all rows of the matrix */
591   for (j=0; j<am; j++) {
592     dnz_row = onz_row = 0;
593     rowstart_offd = full_offd_i[j];
594     rowstart_diag = full_diag_i[j];
595     /*  Iterate over all non-zero columns of the current row */
596     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
597       /* If column is in the diagonal */
598       if (mat_j[col] >= cstart && mat_j[col] < cend) {
599         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
600         aa[rowstart_diag+dnz_row] = mat_a[col];
601         dnz_row++;
602       } else { /* off-diagonal entries */
603         bj[rowstart_offd+onz_row] = mat_j[col];
604         ba[rowstart_offd+onz_row] = mat_a[col];
605         onz_row++;
606       }
607     }
608     ailen[j] = dnz_row;
609     bilen[j] = onz_row;
610   }
611   PetscFunctionReturn(0);
612 }
613 
614 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
615 {
616   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
617   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
618   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
619 
620   PetscFunctionBegin;
621   for (i=0; i<m; i++) {
622     if (idxm[i] < 0) continue; /* negative row */
623     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
624     if (idxm[i] >= rstart && idxm[i] < rend) {
625       row = idxm[i] - rstart;
626       for (j=0; j<n; j++) {
627         if (idxn[j] < 0) continue; /* negative column */
628         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
629         if (idxn[j] >= cstart && idxn[j] < cend) {
630           col  = idxn[j] - cstart;
631           CHKERRQ(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j));
632         } else {
633           if (!aij->colmap) {
634             CHKERRQ(MatCreateColmap_MPIAIJ_Private(mat));
635           }
636 #if defined(PETSC_USE_CTABLE)
637           CHKERRQ(PetscTableFind(aij->colmap,idxn[j]+1,&col));
638           col--;
639 #else
640           col = aij->colmap[idxn[j]] - 1;
641 #endif
642           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
643           else {
644             CHKERRQ(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j));
645           }
646         }
647       }
648     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
649   }
650   PetscFunctionReturn(0);
651 }
652 
653 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
654 {
655   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
656   PetscInt       nstash,reallocs;
657 
658   PetscFunctionBegin;
659   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
660 
661   CHKERRQ(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
662   CHKERRQ(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
663   CHKERRQ(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
664   PetscFunctionReturn(0);
665 }
666 
667 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
668 {
669   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
670   PetscMPIInt    n;
671   PetscInt       i,j,rstart,ncols,flg;
672   PetscInt       *row,*col;
673   PetscBool      other_disassembled;
674   PetscScalar    *val;
675 
676   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
677 
678   PetscFunctionBegin;
679   if (!aij->donotstash && !mat->nooffprocentries) {
680     while (1) {
681       CHKERRQ(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
682       if (!flg) break;
683 
684       for (i=0; i<n;) {
685         /* Now identify the consecutive vals belonging to the same row */
686         for (j=i,rstart=row[j]; j<n; j++) {
687           if (row[j] != rstart) break;
688         }
689         if (j < n) ncols = j-i;
690         else       ncols = n-i;
691         /* Now assemble all these values with a single function call */
692         CHKERRQ(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
693         i    = j;
694       }
695     }
696     CHKERRQ(MatStashScatterEnd_Private(&mat->stash));
697   }
698 #if defined(PETSC_HAVE_DEVICE)
699   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
700   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
701   if (mat->boundtocpu) {
702     CHKERRQ(MatBindToCPU(aij->A,PETSC_TRUE));
703     CHKERRQ(MatBindToCPU(aij->B,PETSC_TRUE));
704   }
705 #endif
706   CHKERRQ(MatAssemblyBegin(aij->A,mode));
707   CHKERRQ(MatAssemblyEnd(aij->A,mode));
708 
709   /* determine if any processor has disassembled, if so we must
710      also disassemble ourself, in order that we may reassemble. */
711   /*
712      if nonzero structure of submatrix B cannot change then we know that
713      no processor disassembled thus we can skip this stuff
714   */
715   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
716     CHKERRMPI(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat)));
717     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
718       CHKERRQ(MatDisAssemble_MPIAIJ(mat));
719     }
720   }
721   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
722     CHKERRQ(MatSetUpMultiply_MPIAIJ(mat));
723   }
724   CHKERRQ(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE));
725 #if defined(PETSC_HAVE_DEVICE)
726   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
727 #endif
728   CHKERRQ(MatAssemblyBegin(aij->B,mode));
729   CHKERRQ(MatAssemblyEnd(aij->B,mode));
730 
731   CHKERRQ(PetscFree2(aij->rowvalues,aij->rowindices));
732 
733   aij->rowvalues = NULL;
734 
735   CHKERRQ(VecDestroy(&aij->diag));
736 
737   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
738   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
739     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
740     CHKERRMPI(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
741   }
742 #if defined(PETSC_HAVE_DEVICE)
743   mat->offloadmask = PETSC_OFFLOAD_BOTH;
744 #endif
745   PetscFunctionReturn(0);
746 }
747 
748 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
749 {
750   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
751 
752   PetscFunctionBegin;
753   CHKERRQ(MatZeroEntries(l->A));
754   CHKERRQ(MatZeroEntries(l->B));
755   PetscFunctionReturn(0);
756 }
757 
758 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
759 {
760   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
761   PetscObjectState sA, sB;
762   PetscInt        *lrows;
763   PetscInt         r, len;
764   PetscBool        cong, lch, gch;
765 
766   PetscFunctionBegin;
767   /* get locally owned rows */
768   CHKERRQ(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows));
769   CHKERRQ(MatHasCongruentLayouts(A,&cong));
770   /* fix right hand side if needed */
771   if (x && b) {
772     const PetscScalar *xx;
773     PetscScalar       *bb;
774 
775     PetscCheckFalse(!cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
776     CHKERRQ(VecGetArrayRead(x, &xx));
777     CHKERRQ(VecGetArray(b, &bb));
778     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
779     CHKERRQ(VecRestoreArrayRead(x, &xx));
780     CHKERRQ(VecRestoreArray(b, &bb));
781   }
782 
783   sA = mat->A->nonzerostate;
784   sB = mat->B->nonzerostate;
785 
786   if (diag != 0.0 && cong) {
787     CHKERRQ(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
788     CHKERRQ(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
789   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
790     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
791     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
792     PetscInt   nnwA, nnwB;
793     PetscBool  nnzA, nnzB;
794 
795     nnwA = aijA->nonew;
796     nnwB = aijB->nonew;
797     nnzA = aijA->keepnonzeropattern;
798     nnzB = aijB->keepnonzeropattern;
799     if (!nnzA) {
800       CHKERRQ(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
801       aijA->nonew = 0;
802     }
803     if (!nnzB) {
804       CHKERRQ(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
805       aijB->nonew = 0;
806     }
807     /* Must zero here before the next loop */
808     CHKERRQ(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
809     CHKERRQ(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
810     for (r = 0; r < len; ++r) {
811       const PetscInt row = lrows[r] + A->rmap->rstart;
812       if (row >= A->cmap->N) continue;
813       CHKERRQ(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
814     }
815     aijA->nonew = nnwA;
816     aijB->nonew = nnwB;
817   } else {
818     CHKERRQ(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
819     CHKERRQ(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
820   }
821   CHKERRQ(PetscFree(lrows));
822   CHKERRQ(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
823   CHKERRQ(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
824 
825   /* reduce nonzerostate */
826   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
827   CHKERRMPI(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A)));
828   if (gch) A->nonzerostate++;
829   PetscFunctionReturn(0);
830 }
831 
832 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
833 {
834   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
835   PetscMPIInt       n = A->rmap->n;
836   PetscInt          i,j,r,m,len = 0;
837   PetscInt          *lrows,*owners = A->rmap->range;
838   PetscMPIInt       p = 0;
839   PetscSFNode       *rrows;
840   PetscSF           sf;
841   const PetscScalar *xx;
842   PetscScalar       *bb,*mask,*aij_a;
843   Vec               xmask,lmask;
844   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
845   const PetscInt    *aj, *ii,*ridx;
846   PetscScalar       *aa;
847 
848   PetscFunctionBegin;
849   /* Create SF where leaves are input rows and roots are owned rows */
850   CHKERRQ(PetscMalloc1(n, &lrows));
851   for (r = 0; r < n; ++r) lrows[r] = -1;
852   CHKERRQ(PetscMalloc1(N, &rrows));
853   for (r = 0; r < N; ++r) {
854     const PetscInt idx   = rows[r];
855     PetscCheckFalse(idx < 0 || A->rmap->N <= idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
856     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
857       CHKERRQ(PetscLayoutFindOwner(A->rmap,idx,&p));
858     }
859     rrows[r].rank  = p;
860     rrows[r].index = rows[r] - owners[p];
861   }
862   CHKERRQ(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf));
863   CHKERRQ(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
864   /* Collect flags for rows to be zeroed */
865   CHKERRQ(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
866   CHKERRQ(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
867   CHKERRQ(PetscSFDestroy(&sf));
868   /* Compress and put in row numbers */
869   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
870   /* zero diagonal part of matrix */
871   CHKERRQ(MatZeroRowsColumns(l->A,len,lrows,diag,x,b));
872   /* handle off diagonal part of matrix */
873   CHKERRQ(MatCreateVecs(A,&xmask,NULL));
874   CHKERRQ(VecDuplicate(l->lvec,&lmask));
875   CHKERRQ(VecGetArray(xmask,&bb));
876   for (i=0; i<len; i++) bb[lrows[i]] = 1;
877   CHKERRQ(VecRestoreArray(xmask,&bb));
878   CHKERRQ(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
879   CHKERRQ(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
880   CHKERRQ(VecDestroy(&xmask));
881   if (x && b) { /* this code is buggy when the row and column layout don't match */
882     PetscBool cong;
883 
884     CHKERRQ(MatHasCongruentLayouts(A,&cong));
885     PetscCheckFalse(!cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
886     CHKERRQ(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
887     CHKERRQ(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
888     CHKERRQ(VecGetArrayRead(l->lvec,&xx));
889     CHKERRQ(VecGetArray(b,&bb));
890   }
891   CHKERRQ(VecGetArray(lmask,&mask));
892   /* remove zeroed rows of off diagonal matrix */
893   CHKERRQ(MatSeqAIJGetArray(l->B,&aij_a));
894   ii = aij->i;
895   for (i=0; i<len; i++) {
896     CHKERRQ(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]));
897   }
898   /* loop over all elements of off process part of matrix zeroing removed columns*/
899   if (aij->compressedrow.use) {
900     m    = aij->compressedrow.nrows;
901     ii   = aij->compressedrow.i;
902     ridx = aij->compressedrow.rindex;
903     for (i=0; i<m; i++) {
904       n  = ii[i+1] - ii[i];
905       aj = aij->j + ii[i];
906       aa = aij_a + ii[i];
907 
908       for (j=0; j<n; j++) {
909         if (PetscAbsScalar(mask[*aj])) {
910           if (b) bb[*ridx] -= *aa*xx[*aj];
911           *aa = 0.0;
912         }
913         aa++;
914         aj++;
915       }
916       ridx++;
917     }
918   } else { /* do not use compressed row format */
919     m = l->B->rmap->n;
920     for (i=0; i<m; i++) {
921       n  = ii[i+1] - ii[i];
922       aj = aij->j + ii[i];
923       aa = aij_a + ii[i];
924       for (j=0; j<n; j++) {
925         if (PetscAbsScalar(mask[*aj])) {
926           if (b) bb[i] -= *aa*xx[*aj];
927           *aa = 0.0;
928         }
929         aa++;
930         aj++;
931       }
932     }
933   }
934   if (x && b) {
935     CHKERRQ(VecRestoreArray(b,&bb));
936     CHKERRQ(VecRestoreArrayRead(l->lvec,&xx));
937   }
938   CHKERRQ(MatSeqAIJRestoreArray(l->B,&aij_a));
939   CHKERRQ(VecRestoreArray(lmask,&mask));
940   CHKERRQ(VecDestroy(&lmask));
941   CHKERRQ(PetscFree(lrows));
942 
943   /* only change matrix nonzero state if pattern was allowed to be changed */
944   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
945     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
946     CHKERRMPI(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A)));
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
952 {
953   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
954   PetscInt       nt;
955   VecScatter     Mvctx = a->Mvctx;
956 
957   PetscFunctionBegin;
958   CHKERRQ(VecGetLocalSize(xx,&nt));
959   PetscCheckFalse(nt != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt);
960   CHKERRQ(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
961   CHKERRQ((*a->A->ops->mult)(a->A,xx,yy));
962   CHKERRQ(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
963   CHKERRQ((*a->B->ops->multadd)(a->B,a->lvec,yy,yy));
964   PetscFunctionReturn(0);
965 }
966 
967 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
968 {
969   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
970 
971   PetscFunctionBegin;
972   CHKERRQ(MatMultDiagonalBlock(a->A,bb,xx));
973   PetscFunctionReturn(0);
974 }
975 
976 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
977 {
978   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
979   VecScatter     Mvctx = a->Mvctx;
980 
981   PetscFunctionBegin;
982   CHKERRQ(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
983   CHKERRQ((*a->A->ops->multadd)(a->A,xx,yy,zz));
984   CHKERRQ(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
985   CHKERRQ((*a->B->ops->multadd)(a->B,a->lvec,zz,zz));
986   PetscFunctionReturn(0);
987 }
988 
989 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
990 {
991   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
992 
993   PetscFunctionBegin;
994   /* do nondiagonal part */
995   CHKERRQ((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
996   /* do local part */
997   CHKERRQ((*a->A->ops->multtranspose)(a->A,xx,yy));
998   /* add partial results together */
999   CHKERRQ(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1000   CHKERRQ(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1005 {
1006   MPI_Comm       comm;
1007   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1008   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1009   IS             Me,Notme;
1010   PetscInt       M,N,first,last,*notme,i;
1011   PetscBool      lf;
1012   PetscMPIInt    size;
1013 
1014   PetscFunctionBegin;
1015   /* Easy test: symmetric diagonal block */
1016   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1017   CHKERRQ(MatIsTranspose(Adia,Bdia,tol,&lf));
1018   CHKERRMPI(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat)));
1019   if (!*f) PetscFunctionReturn(0);
1020   CHKERRQ(PetscObjectGetComm((PetscObject)Amat,&comm));
1021   CHKERRMPI(MPI_Comm_size(comm,&size));
1022   if (size == 1) PetscFunctionReturn(0);
1023 
1024   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1025   CHKERRQ(MatGetSize(Amat,&M,&N));
1026   CHKERRQ(MatGetOwnershipRange(Amat,&first,&last));
1027   CHKERRQ(PetscMalloc1(N-last+first,&notme));
1028   for (i=0; i<first; i++) notme[i] = i;
1029   for (i=last; i<M; i++) notme[i-last+first] = i;
1030   CHKERRQ(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme));
1031   CHKERRQ(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me));
1032   CHKERRQ(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs));
1033   Aoff = Aoffs[0];
1034   CHKERRQ(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs));
1035   Boff = Boffs[0];
1036   CHKERRQ(MatIsTranspose(Aoff,Boff,tol,f));
1037   CHKERRQ(MatDestroyMatrices(1,&Aoffs));
1038   CHKERRQ(MatDestroyMatrices(1,&Boffs));
1039   CHKERRQ(ISDestroy(&Me));
1040   CHKERRQ(ISDestroy(&Notme));
1041   CHKERRQ(PetscFree(notme));
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1046 {
1047   PetscFunctionBegin;
1048   CHKERRQ(MatIsTranspose_MPIAIJ(A,A,tol,f));
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1053 {
1054   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1055 
1056   PetscFunctionBegin;
1057   /* do nondiagonal part */
1058   CHKERRQ((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1059   /* do local part */
1060   CHKERRQ((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz));
1061   /* add partial results together */
1062   CHKERRQ(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1063   CHKERRQ(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 /*
1068   This only works correctly for square matrices where the subblock A->A is the
1069    diagonal block
1070 */
1071 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1072 {
1073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1074 
1075   PetscFunctionBegin;
1076   PetscCheckFalse(A->rmap->N != A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1077   PetscCheckFalse(A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1078   CHKERRQ(MatGetDiagonal(a->A,v));
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1083 {
1084   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1085 
1086   PetscFunctionBegin;
1087   CHKERRQ(MatScale(a->A,aa));
1088   CHKERRQ(MatScale(a->B,aa));
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1093 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1094 {
1095   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1096 
1097   PetscFunctionBegin;
1098   CHKERRQ(PetscSFDestroy(&aij->coo_sf));
1099   CHKERRQ(PetscFree4(aij->Aperm1,aij->Bperm1,aij->Ajmap1,aij->Bjmap1));
1100   CHKERRQ(PetscFree4(aij->Aperm2,aij->Bperm2,aij->Ajmap2,aij->Bjmap2));
1101   CHKERRQ(PetscFree4(aij->Aimap1,aij->Bimap1,aij->Aimap2,aij->Bimap2));
1102   CHKERRQ(PetscFree2(aij->sendbuf,aij->recvbuf));
1103   CHKERRQ(PetscFree(aij->Cperm1));
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1108 {
1109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1110 
1111   PetscFunctionBegin;
1112 #if defined(PETSC_USE_LOG)
1113   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1114 #endif
1115   CHKERRQ(MatStashDestroy_Private(&mat->stash));
1116   CHKERRQ(VecDestroy(&aij->diag));
1117   CHKERRQ(MatDestroy(&aij->A));
1118   CHKERRQ(MatDestroy(&aij->B));
1119 #if defined(PETSC_USE_CTABLE)
1120   CHKERRQ(PetscTableDestroy(&aij->colmap));
1121 #else
1122   CHKERRQ(PetscFree(aij->colmap));
1123 #endif
1124   CHKERRQ(PetscFree(aij->garray));
1125   CHKERRQ(VecDestroy(&aij->lvec));
1126   CHKERRQ(VecScatterDestroy(&aij->Mvctx));
1127   CHKERRQ(PetscFree2(aij->rowvalues,aij->rowindices));
1128   CHKERRQ(PetscFree(aij->ld));
1129 
1130   /* Free COO */
1131   CHKERRQ(MatResetPreallocationCOO_MPIAIJ(mat));
1132 
1133   CHKERRQ(PetscFree(mat->data));
1134 
1135   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1136   CHKERRQ(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL));
1137 
1138   CHKERRQ(PetscObjectChangeTypeName((PetscObject)mat,NULL));
1139   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
1140   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
1141   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL));
1142   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL));
1143   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL));
1144   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL));
1145   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL));
1146   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL));
1147   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL));
1148 #if defined(PETSC_HAVE_CUDA)
1149   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL));
1150 #endif
1151 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1152   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL));
1153 #endif
1154   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
1155 #if defined(PETSC_HAVE_ELEMENTAL)
1156   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL));
1157 #endif
1158 #if defined(PETSC_HAVE_SCALAPACK)
1159   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL));
1160 #endif
1161 #if defined(PETSC_HAVE_HYPRE)
1162   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL));
1163   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL));
1164 #endif
1165   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1166   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL));
1167   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL));
1168   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL));
1169   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL));
1170   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL));
1171 #if defined(PETSC_HAVE_MKL_SPARSE)
1172   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL));
1173 #endif
1174   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL));
1175   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1176   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL));
1177   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL));
1178   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL));
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1183 {
1184   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1185   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1186   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1187   const PetscInt    *garray = aij->garray;
1188   const PetscScalar *aa,*ba;
1189   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1190   PetscInt          *rowlens;
1191   PetscInt          *colidxs;
1192   PetscScalar       *matvals;
1193 
1194   PetscFunctionBegin;
1195   CHKERRQ(PetscViewerSetUp(viewer));
1196 
1197   M  = mat->rmap->N;
1198   N  = mat->cmap->N;
1199   m  = mat->rmap->n;
1200   rs = mat->rmap->rstart;
1201   cs = mat->cmap->rstart;
1202   nz = A->nz + B->nz;
1203 
1204   /* write matrix header */
1205   header[0] = MAT_FILE_CLASSID;
1206   header[1] = M; header[2] = N; header[3] = nz;
1207   CHKERRMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat)));
1208   CHKERRQ(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT));
1209 
1210   /* fill in and store row lengths  */
1211   CHKERRQ(PetscMalloc1(m,&rowlens));
1212   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1213   CHKERRQ(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT));
1214   CHKERRQ(PetscFree(rowlens));
1215 
1216   /* fill in and store column indices */
1217   CHKERRQ(PetscMalloc1(nz,&colidxs));
1218   for (cnt=0, i=0; i<m; i++) {
1219     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1220       if (garray[B->j[jb]] > cs) break;
1221       colidxs[cnt++] = garray[B->j[jb]];
1222     }
1223     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1224       colidxs[cnt++] = A->j[ja] + cs;
1225     for (; jb<B->i[i+1]; jb++)
1226       colidxs[cnt++] = garray[B->j[jb]];
1227   }
1228   PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1229   CHKERRQ(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
1230   CHKERRQ(PetscFree(colidxs));
1231 
1232   /* fill in and store nonzero values */
1233   CHKERRQ(MatSeqAIJGetArrayRead(aij->A,&aa));
1234   CHKERRQ(MatSeqAIJGetArrayRead(aij->B,&ba));
1235   CHKERRQ(PetscMalloc1(nz,&matvals));
1236   for (cnt=0, i=0; i<m; i++) {
1237     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1238       if (garray[B->j[jb]] > cs) break;
1239       matvals[cnt++] = ba[jb];
1240     }
1241     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1242       matvals[cnt++] = aa[ja];
1243     for (; jb<B->i[i+1]; jb++)
1244       matvals[cnt++] = ba[jb];
1245   }
1246   CHKERRQ(MatSeqAIJRestoreArrayRead(aij->A,&aa));
1247   CHKERRQ(MatSeqAIJRestoreArrayRead(aij->B,&ba));
1248   PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1249   CHKERRQ(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
1250   CHKERRQ(PetscFree(matvals));
1251 
1252   /* write block size option to the viewer's .info file */
1253   CHKERRQ(MatView_Binary_BlockSizes(mat,viewer));
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #include <petscdraw.h>
1258 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1259 {
1260   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1261   PetscMPIInt       rank = aij->rank,size = aij->size;
1262   PetscBool         isdraw,iascii,isbinary;
1263   PetscViewer       sviewer;
1264   PetscViewerFormat format;
1265 
1266   PetscFunctionBegin;
1267   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1268   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1269   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1270   if (iascii) {
1271     CHKERRQ(PetscViewerGetFormat(viewer,&format));
1272     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1273       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1274       CHKERRQ(PetscMalloc1(size,&nz));
1275       CHKERRMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat)));
1276       for (i=0; i<(PetscInt)size; i++) {
1277         nmax = PetscMax(nmax,nz[i]);
1278         nmin = PetscMin(nmin,nz[i]);
1279         navg += nz[i];
1280       }
1281       CHKERRQ(PetscFree(nz));
1282       navg = navg/size;
1283       CHKERRQ(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n",nmin,navg,nmax));
1284       PetscFunctionReturn(0);
1285     }
1286     CHKERRQ(PetscViewerGetFormat(viewer,&format));
1287     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1288       MatInfo   info;
1289       PetscInt *inodes=NULL;
1290 
1291       CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
1292       CHKERRQ(MatGetInfo(mat,MAT_LOCAL,&info));
1293       CHKERRQ(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL));
1294       CHKERRQ(PetscViewerASCIIPushSynchronized(viewer));
1295       if (!inodes) {
1296         CHKERRQ(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n",
1297                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1298       } else {
1299         CHKERRQ(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n",
1300                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1301       }
1302       CHKERRQ(MatGetInfo(aij->A,MAT_LOCAL,&info));
1303       CHKERRQ(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1304       CHKERRQ(MatGetInfo(aij->B,MAT_LOCAL,&info));
1305       CHKERRQ(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1306       CHKERRQ(PetscViewerFlush(viewer));
1307       CHKERRQ(PetscViewerASCIIPopSynchronized(viewer));
1308       CHKERRQ(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
1309       CHKERRQ(VecScatterView(aij->Mvctx,viewer));
1310       PetscFunctionReturn(0);
1311     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1312       PetscInt inodecount,inodelimit,*inodes;
1313       CHKERRQ(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit));
1314       if (inodes) {
1315         CHKERRQ(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit));
1316       } else {
1317         CHKERRQ(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n"));
1318       }
1319       PetscFunctionReturn(0);
1320     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1321       PetscFunctionReturn(0);
1322     }
1323   } else if (isbinary) {
1324     if (size == 1) {
1325       CHKERRQ(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1326       CHKERRQ(MatView(aij->A,viewer));
1327     } else {
1328       CHKERRQ(MatView_MPIAIJ_Binary(mat,viewer));
1329     }
1330     PetscFunctionReturn(0);
1331   } else if (iascii && size == 1) {
1332     CHKERRQ(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1333     CHKERRQ(MatView(aij->A,viewer));
1334     PetscFunctionReturn(0);
1335   } else if (isdraw) {
1336     PetscDraw draw;
1337     PetscBool isnull;
1338     CHKERRQ(PetscViewerDrawGetDraw(viewer,0,&draw));
1339     CHKERRQ(PetscDrawIsNull(draw,&isnull));
1340     if (isnull) PetscFunctionReturn(0);
1341   }
1342 
1343   { /* assemble the entire matrix onto first processor */
1344     Mat A = NULL, Av;
1345     IS  isrow,iscol;
1346 
1347     CHKERRQ(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1348     CHKERRQ(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1349     CHKERRQ(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A));
1350     CHKERRQ(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL));
1351 /*  The commented code uses MatCreateSubMatrices instead */
1352 /*
1353     Mat *AA, A = NULL, Av;
1354     IS  isrow,iscol;
1355 
1356     CHKERRQ(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1357     CHKERRQ(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1358     CHKERRQ(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1359     if (rank == 0) {
1360        CHKERRQ(PetscObjectReference((PetscObject)AA[0]));
1361        A    = AA[0];
1362        Av   = AA[0];
1363     }
1364     CHKERRQ(MatDestroySubMatrices(1,&AA));
1365 */
1366     CHKERRQ(ISDestroy(&iscol));
1367     CHKERRQ(ISDestroy(&isrow));
1368     /*
1369        Everyone has to call to draw the matrix since the graphics waits are
1370        synchronized across all processors that share the PetscDraw object
1371     */
1372     CHKERRQ(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1373     if (rank == 0) {
1374       if (((PetscObject)mat)->name) {
1375         CHKERRQ(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name));
1376       }
1377       CHKERRQ(MatView_SeqAIJ(Av,sviewer));
1378     }
1379     CHKERRQ(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1380     CHKERRQ(PetscViewerFlush(viewer));
1381     CHKERRQ(MatDestroy(&A));
1382   }
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1387 {
1388   PetscBool      iascii,isdraw,issocket,isbinary;
1389 
1390   PetscFunctionBegin;
1391   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1392   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1393   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1394   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
1395   if (iascii || isdraw || isbinary || issocket) {
1396     CHKERRQ(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer));
1397   }
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1402 {
1403   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1404   Vec            bb1 = NULL;
1405   PetscBool      hasop;
1406 
1407   PetscFunctionBegin;
1408   if (flag == SOR_APPLY_UPPER) {
1409     CHKERRQ((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1410     PetscFunctionReturn(0);
1411   }
1412 
1413   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1414     CHKERRQ(VecDuplicate(bb,&bb1));
1415   }
1416 
1417   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1418     if (flag & SOR_ZERO_INITIAL_GUESS) {
1419       CHKERRQ((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1420       its--;
1421     }
1422 
1423     while (its--) {
1424       CHKERRQ(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1425       CHKERRQ(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1426 
1427       /* update rhs: bb1 = bb - B*x */
1428       CHKERRQ(VecScale(mat->lvec,-1.0));
1429       CHKERRQ((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1430 
1431       /* local sweep */
1432       CHKERRQ((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx));
1433     }
1434   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1435     if (flag & SOR_ZERO_INITIAL_GUESS) {
1436       CHKERRQ((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1437       its--;
1438     }
1439     while (its--) {
1440       CHKERRQ(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1441       CHKERRQ(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1442 
1443       /* update rhs: bb1 = bb - B*x */
1444       CHKERRQ(VecScale(mat->lvec,-1.0));
1445       CHKERRQ((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1446 
1447       /* local sweep */
1448       CHKERRQ((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx));
1449     }
1450   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1451     if (flag & SOR_ZERO_INITIAL_GUESS) {
1452       CHKERRQ((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1453       its--;
1454     }
1455     while (its--) {
1456       CHKERRQ(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1457       CHKERRQ(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1458 
1459       /* update rhs: bb1 = bb - B*x */
1460       CHKERRQ(VecScale(mat->lvec,-1.0));
1461       CHKERRQ((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1462 
1463       /* local sweep */
1464       CHKERRQ((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx));
1465     }
1466   } else if (flag & SOR_EISENSTAT) {
1467     Vec xx1;
1468 
1469     CHKERRQ(VecDuplicate(bb,&xx1));
1470     CHKERRQ((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx));
1471 
1472     CHKERRQ(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1473     CHKERRQ(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1474     if (!mat->diag) {
1475       CHKERRQ(MatCreateVecs(matin,&mat->diag,NULL));
1476       CHKERRQ(MatGetDiagonal(matin,mat->diag));
1477     }
1478     CHKERRQ(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop));
1479     if (hasop) {
1480       CHKERRQ(MatMultDiagonalBlock(matin,xx,bb1));
1481     } else {
1482       CHKERRQ(VecPointwiseMult(bb1,mat->diag,xx));
1483     }
1484     CHKERRQ(VecAYPX(bb1,(omega-2.0)/omega,bb));
1485 
1486     CHKERRQ(MatMultAdd(mat->B,mat->lvec,bb1,bb1));
1487 
1488     /* local sweep */
1489     CHKERRQ((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1));
1490     CHKERRQ(VecAXPY(xx,1.0,xx1));
1491     CHKERRQ(VecDestroy(&xx1));
1492   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1493 
1494   CHKERRQ(VecDestroy(&bb1));
1495 
1496   matin->factorerrortype = mat->A->factorerrortype;
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1501 {
1502   Mat            aA,aB,Aperm;
1503   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1504   PetscScalar    *aa,*ba;
1505   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1506   PetscSF        rowsf,sf;
1507   IS             parcolp = NULL;
1508   PetscBool      done;
1509 
1510   PetscFunctionBegin;
1511   CHKERRQ(MatGetLocalSize(A,&m,&n));
1512   CHKERRQ(ISGetIndices(rowp,&rwant));
1513   CHKERRQ(ISGetIndices(colp,&cwant));
1514   CHKERRQ(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest));
1515 
1516   /* Invert row permutation to find out where my rows should go */
1517   CHKERRQ(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf));
1518   CHKERRQ(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant));
1519   CHKERRQ(PetscSFSetFromOptions(rowsf));
1520   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1521   CHKERRQ(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1522   CHKERRQ(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1523 
1524   /* Invert column permutation to find out where my columns should go */
1525   CHKERRQ(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1526   CHKERRQ(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant));
1527   CHKERRQ(PetscSFSetFromOptions(sf));
1528   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1529   CHKERRQ(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1530   CHKERRQ(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1531   CHKERRQ(PetscSFDestroy(&sf));
1532 
1533   CHKERRQ(ISRestoreIndices(rowp,&rwant));
1534   CHKERRQ(ISRestoreIndices(colp,&cwant));
1535   CHKERRQ(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols));
1536 
1537   /* Find out where my gcols should go */
1538   CHKERRQ(MatGetSize(aB,NULL,&ng));
1539   CHKERRQ(PetscMalloc1(ng,&gcdest));
1540   CHKERRQ(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1541   CHKERRQ(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols));
1542   CHKERRQ(PetscSFSetFromOptions(sf));
1543   CHKERRQ(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1544   CHKERRQ(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1545   CHKERRQ(PetscSFDestroy(&sf));
1546 
1547   CHKERRQ(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz));
1548   CHKERRQ(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1549   CHKERRQ(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1550   for (i=0; i<m; i++) {
1551     PetscInt    row = rdest[i];
1552     PetscMPIInt rowner;
1553     CHKERRQ(PetscLayoutFindOwner(A->rmap,row,&rowner));
1554     for (j=ai[i]; j<ai[i+1]; j++) {
1555       PetscInt    col = cdest[aj[j]];
1556       PetscMPIInt cowner;
1557       CHKERRQ(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */
1558       if (rowner == cowner) dnnz[i]++;
1559       else onnz[i]++;
1560     }
1561     for (j=bi[i]; j<bi[i+1]; j++) {
1562       PetscInt    col = gcdest[bj[j]];
1563       PetscMPIInt cowner;
1564       CHKERRQ(PetscLayoutFindOwner(A->cmap,col,&cowner));
1565       if (rowner == cowner) dnnz[i]++;
1566       else onnz[i]++;
1567     }
1568   }
1569   CHKERRQ(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1570   CHKERRQ(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1571   CHKERRQ(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1572   CHKERRQ(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1573   CHKERRQ(PetscSFDestroy(&rowsf));
1574 
1575   CHKERRQ(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm));
1576   CHKERRQ(MatSeqAIJGetArray(aA,&aa));
1577   CHKERRQ(MatSeqAIJGetArray(aB,&ba));
1578   for (i=0; i<m; i++) {
1579     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1580     PetscInt j0,rowlen;
1581     rowlen = ai[i+1] - ai[i];
1582     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1583       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1584       CHKERRQ(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES));
1585     }
1586     rowlen = bi[i+1] - bi[i];
1587     for (j0=j=0; j<rowlen; j0=j) {
1588       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1589       CHKERRQ(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES));
1590     }
1591   }
1592   CHKERRQ(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY));
1593   CHKERRQ(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY));
1594   CHKERRQ(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1595   CHKERRQ(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1596   CHKERRQ(MatSeqAIJRestoreArray(aA,&aa));
1597   CHKERRQ(MatSeqAIJRestoreArray(aB,&ba));
1598   CHKERRQ(PetscFree4(dnnz,onnz,tdnnz,tonnz));
1599   CHKERRQ(PetscFree3(work,rdest,cdest));
1600   CHKERRQ(PetscFree(gcdest));
1601   if (parcolp) CHKERRQ(ISDestroy(&colp));
1602   *B = Aperm;
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1607 {
1608   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1609 
1610   PetscFunctionBegin;
1611   CHKERRQ(MatGetSize(aij->B,NULL,nghosts));
1612   if (ghosts) *ghosts = aij->garray;
1613   PetscFunctionReturn(0);
1614 }
1615 
1616 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1617 {
1618   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1619   Mat            A    = mat->A,B = mat->B;
1620   PetscLogDouble isend[5],irecv[5];
1621 
1622   PetscFunctionBegin;
1623   info->block_size = 1.0;
1624   CHKERRQ(MatGetInfo(A,MAT_LOCAL,info));
1625 
1626   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1627   isend[3] = info->memory;  isend[4] = info->mallocs;
1628 
1629   CHKERRQ(MatGetInfo(B,MAT_LOCAL,info));
1630 
1631   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1632   isend[3] += info->memory;  isend[4] += info->mallocs;
1633   if (flag == MAT_LOCAL) {
1634     info->nz_used      = isend[0];
1635     info->nz_allocated = isend[1];
1636     info->nz_unneeded  = isend[2];
1637     info->memory       = isend[3];
1638     info->mallocs      = isend[4];
1639   } else if (flag == MAT_GLOBAL_MAX) {
1640     CHKERRMPI(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
1641 
1642     info->nz_used      = irecv[0];
1643     info->nz_allocated = irecv[1];
1644     info->nz_unneeded  = irecv[2];
1645     info->memory       = irecv[3];
1646     info->mallocs      = irecv[4];
1647   } else if (flag == MAT_GLOBAL_SUM) {
1648     CHKERRMPI(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
1649 
1650     info->nz_used      = irecv[0];
1651     info->nz_allocated = irecv[1];
1652     info->nz_unneeded  = irecv[2];
1653     info->memory       = irecv[3];
1654     info->mallocs      = irecv[4];
1655   }
1656   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1657   info->fill_ratio_needed = 0;
1658   info->factor_mallocs    = 0;
1659   PetscFunctionReturn(0);
1660 }
1661 
1662 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1663 {
1664   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1665 
1666   PetscFunctionBegin;
1667   switch (op) {
1668   case MAT_NEW_NONZERO_LOCATIONS:
1669   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1670   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1671   case MAT_KEEP_NONZERO_PATTERN:
1672   case MAT_NEW_NONZERO_LOCATION_ERR:
1673   case MAT_USE_INODES:
1674   case MAT_IGNORE_ZERO_ENTRIES:
1675   case MAT_FORM_EXPLICIT_TRANSPOSE:
1676     MatCheckPreallocated(A,1);
1677     CHKERRQ(MatSetOption(a->A,op,flg));
1678     CHKERRQ(MatSetOption(a->B,op,flg));
1679     break;
1680   case MAT_ROW_ORIENTED:
1681     MatCheckPreallocated(A,1);
1682     a->roworiented = flg;
1683 
1684     CHKERRQ(MatSetOption(a->A,op,flg));
1685     CHKERRQ(MatSetOption(a->B,op,flg));
1686     break;
1687   case MAT_FORCE_DIAGONAL_ENTRIES:
1688   case MAT_SORTED_FULL:
1689     CHKERRQ(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1690     break;
1691   case MAT_IGNORE_OFF_PROC_ENTRIES:
1692     a->donotstash = flg;
1693     break;
1694   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1695   case MAT_SPD:
1696   case MAT_SYMMETRIC:
1697   case MAT_STRUCTURALLY_SYMMETRIC:
1698   case MAT_HERMITIAN:
1699   case MAT_SYMMETRY_ETERNAL:
1700     break;
1701   case MAT_SUBMAT_SINGLEIS:
1702     A->submat_singleis = flg;
1703     break;
1704   case MAT_STRUCTURE_ONLY:
1705     /* The option is handled directly by MatSetOption() */
1706     break;
1707   default:
1708     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1709   }
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1714 {
1715   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1716   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1717   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1718   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1719   PetscInt       *cmap,*idx_p;
1720 
1721   PetscFunctionBegin;
1722   PetscCheckFalse(mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1723   mat->getrowactive = PETSC_TRUE;
1724 
1725   if (!mat->rowvalues && (idx || v)) {
1726     /*
1727         allocate enough space to hold information from the longest row.
1728     */
1729     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1730     PetscInt   max = 1,tmp;
1731     for (i=0; i<matin->rmap->n; i++) {
1732       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1733       if (max < tmp) max = tmp;
1734     }
1735     CHKERRQ(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices));
1736   }
1737 
1738   PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1739   lrow = row - rstart;
1740 
1741   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1742   if (!v)   {pvA = NULL; pvB = NULL;}
1743   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1744   CHKERRQ((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
1745   CHKERRQ((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1746   nztot = nzA + nzB;
1747 
1748   cmap = mat->garray;
1749   if (v  || idx) {
1750     if (nztot) {
1751       /* Sort by increasing column numbers, assuming A and B already sorted */
1752       PetscInt imark = -1;
1753       if (v) {
1754         *v = v_p = mat->rowvalues;
1755         for (i=0; i<nzB; i++) {
1756           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1757           else break;
1758         }
1759         imark = i;
1760         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1761         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1762       }
1763       if (idx) {
1764         *idx = idx_p = mat->rowindices;
1765         if (imark > -1) {
1766           for (i=0; i<imark; i++) {
1767             idx_p[i] = cmap[cworkB[i]];
1768           }
1769         } else {
1770           for (i=0; i<nzB; i++) {
1771             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1772             else break;
1773           }
1774           imark = i;
1775         }
1776         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1777         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1778       }
1779     } else {
1780       if (idx) *idx = NULL;
1781       if (v)   *v   = NULL;
1782     }
1783   }
1784   *nz  = nztot;
1785   CHKERRQ((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
1786   CHKERRQ((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1791 {
1792   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1793 
1794   PetscFunctionBegin;
1795   PetscCheckFalse(!aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1796   aij->getrowactive = PETSC_FALSE;
1797   PetscFunctionReturn(0);
1798 }
1799 
1800 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1801 {
1802   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ*)mat->data;
1803   Mat_SeqAIJ      *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1804   PetscInt        i,j,cstart = mat->cmap->rstart;
1805   PetscReal       sum = 0.0;
1806   const MatScalar *v,*amata,*bmata;
1807 
1808   PetscFunctionBegin;
1809   if (aij->size == 1) {
1810     CHKERRQ(MatNorm(aij->A,type,norm));
1811   } else {
1812     CHKERRQ(MatSeqAIJGetArrayRead(aij->A,&amata));
1813     CHKERRQ(MatSeqAIJGetArrayRead(aij->B,&bmata));
1814     if (type == NORM_FROBENIUS) {
1815       v = amata;
1816       for (i=0; i<amat->nz; i++) {
1817         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1818       }
1819       v = bmata;
1820       for (i=0; i<bmat->nz; i++) {
1821         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1822       }
1823       CHKERRMPI(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1824       *norm = PetscSqrtReal(*norm);
1825       CHKERRQ(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz));
1826     } else if (type == NORM_1) { /* max column norm */
1827       PetscReal *tmp,*tmp2;
1828       PetscInt  *jj,*garray = aij->garray;
1829       CHKERRQ(PetscCalloc1(mat->cmap->N+1,&tmp));
1830       CHKERRQ(PetscMalloc1(mat->cmap->N+1,&tmp2));
1831       *norm = 0.0;
1832       v     = amata; jj = amat->j;
1833       for (j=0; j<amat->nz; j++) {
1834         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1835       }
1836       v = bmata; jj = bmat->j;
1837       for (j=0; j<bmat->nz; j++) {
1838         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1839       }
1840       CHKERRMPI(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1841       for (j=0; j<mat->cmap->N; j++) {
1842         if (tmp2[j] > *norm) *norm = tmp2[j];
1843       }
1844       CHKERRQ(PetscFree(tmp));
1845       CHKERRQ(PetscFree(tmp2));
1846       CHKERRQ(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1847     } else if (type == NORM_INFINITY) { /* max row norm */
1848       PetscReal ntemp = 0.0;
1849       for (j=0; j<aij->A->rmap->n; j++) {
1850         v   = amata + amat->i[j];
1851         sum = 0.0;
1852         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1853           sum += PetscAbsScalar(*v); v++;
1854         }
1855         v = bmata + bmat->i[j];
1856         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1857           sum += PetscAbsScalar(*v); v++;
1858         }
1859         if (sum > ntemp) ntemp = sum;
1860       }
1861       CHKERRMPI(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat)));
1862       CHKERRQ(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1863     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1864     CHKERRQ(MatSeqAIJRestoreArrayRead(aij->A,&amata));
1865     CHKERRQ(MatSeqAIJRestoreArrayRead(aij->B,&bmata));
1866   }
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1871 {
1872   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1873   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1874   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1875   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1876   Mat             B,A_diag,*B_diag;
1877   const MatScalar *pbv,*bv;
1878 
1879   PetscFunctionBegin;
1880   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1881   ai = Aloc->i; aj = Aloc->j;
1882   bi = Bloc->i; bj = Bloc->j;
1883   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1884     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1885     PetscSFNode          *oloc;
1886     PETSC_UNUSED PetscSF sf;
1887 
1888     CHKERRQ(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc));
1889     /* compute d_nnz for preallocation */
1890     CHKERRQ(PetscArrayzero(d_nnz,na));
1891     for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++;
1892     /* compute local off-diagonal contributions */
1893     CHKERRQ(PetscArrayzero(g_nnz,nb));
1894     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1895     /* map those to global */
1896     CHKERRQ(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1897     CHKERRQ(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray));
1898     CHKERRQ(PetscSFSetFromOptions(sf));
1899     CHKERRQ(PetscArrayzero(o_nnz,na));
1900     CHKERRQ(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1901     CHKERRQ(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1902     CHKERRQ(PetscSFDestroy(&sf));
1903 
1904     CHKERRQ(MatCreate(PetscObjectComm((PetscObject)A),&B));
1905     CHKERRQ(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
1906     CHKERRQ(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs)));
1907     CHKERRQ(MatSetType(B,((PetscObject)A)->type_name));
1908     CHKERRQ(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
1909     CHKERRQ(PetscFree4(d_nnz,o_nnz,g_nnz,oloc));
1910   } else {
1911     B    = *matout;
1912     CHKERRQ(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
1913   }
1914 
1915   b           = (Mat_MPIAIJ*)B->data;
1916   A_diag      = a->A;
1917   B_diag      = &b->A;
1918   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1919   A_diag_ncol = A_diag->cmap->N;
1920   B_diag_ilen = sub_B_diag->ilen;
1921   B_diag_i    = sub_B_diag->i;
1922 
1923   /* Set ilen for diagonal of B */
1924   for (i=0; i<A_diag_ncol; i++) {
1925     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1926   }
1927 
1928   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1929   very quickly (=without using MatSetValues), because all writes are local. */
1930   CHKERRQ(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag));
1931 
1932   /* copy over the B part */
1933   CHKERRQ(PetscMalloc1(bi[mb],&cols));
1934   CHKERRQ(MatSeqAIJGetArrayRead(a->B,&bv));
1935   pbv  = bv;
1936   row  = A->rmap->rstart;
1937   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1938   cols_tmp = cols;
1939   for (i=0; i<mb; i++) {
1940     ncol = bi[i+1]-bi[i];
1941     CHKERRQ(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES));
1942     row++;
1943     pbv += ncol; cols_tmp += ncol;
1944   }
1945   CHKERRQ(PetscFree(cols));
1946   CHKERRQ(MatSeqAIJRestoreArrayRead(a->B,&bv));
1947 
1948   CHKERRQ(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
1949   CHKERRQ(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
1950   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1951     *matout = B;
1952   } else {
1953     CHKERRQ(MatHeaderMerge(A,&B));
1954   }
1955   PetscFunctionReturn(0);
1956 }
1957 
1958 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1959 {
1960   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1961   Mat            a    = aij->A,b = aij->B;
1962   PetscInt       s1,s2,s3;
1963 
1964   PetscFunctionBegin;
1965   CHKERRQ(MatGetLocalSize(mat,&s2,&s3));
1966   if (rr) {
1967     CHKERRQ(VecGetLocalSize(rr,&s1));
1968     PetscCheckFalse(s1!=s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1969     /* Overlap communication with computation. */
1970     CHKERRQ(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1971   }
1972   if (ll) {
1973     CHKERRQ(VecGetLocalSize(ll,&s1));
1974     PetscCheckFalse(s1!=s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1975     CHKERRQ((*b->ops->diagonalscale)(b,ll,NULL));
1976   }
1977   /* scale  the diagonal block */
1978   CHKERRQ((*a->ops->diagonalscale)(a,ll,rr));
1979 
1980   if (rr) {
1981     /* Do a scatter end and then right scale the off-diagonal block */
1982     CHKERRQ(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1983     CHKERRQ((*b->ops->diagonalscale)(b,NULL,aij->lvec));
1984   }
1985   PetscFunctionReturn(0);
1986 }
1987 
1988 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1989 {
1990   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1991 
1992   PetscFunctionBegin;
1993   CHKERRQ(MatSetUnfactored(a->A));
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
1998 {
1999   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2000   Mat            a,b,c,d;
2001   PetscBool      flg;
2002 
2003   PetscFunctionBegin;
2004   a = matA->A; b = matA->B;
2005   c = matB->A; d = matB->B;
2006 
2007   CHKERRQ(MatEqual(a,c,&flg));
2008   if (flg) {
2009     CHKERRQ(MatEqual(b,d,&flg));
2010   }
2011   CHKERRMPI(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2012   PetscFunctionReturn(0);
2013 }
2014 
2015 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2016 {
2017   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2018   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2019 
2020   PetscFunctionBegin;
2021   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2022   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2023     /* because of the column compression in the off-processor part of the matrix a->B,
2024        the number of columns in a->B and b->B may be different, hence we cannot call
2025        the MatCopy() directly on the two parts. If need be, we can provide a more
2026        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2027        then copying the submatrices */
2028     CHKERRQ(MatCopy_Basic(A,B,str));
2029   } else {
2030     CHKERRQ(MatCopy(a->A,b->A,str));
2031     CHKERRQ(MatCopy(a->B,b->B,str));
2032   }
2033   CHKERRQ(PetscObjectStateIncrease((PetscObject)B));
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2038 {
2039   PetscFunctionBegin;
2040   CHKERRQ(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
2041   PetscFunctionReturn(0);
2042 }
2043 
2044 /*
2045    Computes the number of nonzeros per row needed for preallocation when X and Y
2046    have different nonzero structure.
2047 */
2048 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2049 {
2050   PetscInt       i,j,k,nzx,nzy;
2051 
2052   PetscFunctionBegin;
2053   /* Set the number of nonzeros in the new matrix */
2054   for (i=0; i<m; i++) {
2055     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2056     nzx = xi[i+1] - xi[i];
2057     nzy = yi[i+1] - yi[i];
2058     nnz[i] = 0;
2059     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2060       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2061       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2062       nnz[i]++;
2063     }
2064     for (; k<nzy; k++) nnz[i]++;
2065   }
2066   PetscFunctionReturn(0);
2067 }
2068 
2069 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2070 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2071 {
2072   PetscInt       m = Y->rmap->N;
2073   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2074   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2075 
2076   PetscFunctionBegin;
2077   CHKERRQ(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz));
2078   PetscFunctionReturn(0);
2079 }
2080 
2081 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2082 {
2083   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2084 
2085   PetscFunctionBegin;
2086   if (str == SAME_NONZERO_PATTERN) {
2087     CHKERRQ(MatAXPY(yy->A,a,xx->A,str));
2088     CHKERRQ(MatAXPY(yy->B,a,xx->B,str));
2089   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2090     CHKERRQ(MatAXPY_Basic(Y,a,X,str));
2091   } else {
2092     Mat      B;
2093     PetscInt *nnz_d,*nnz_o;
2094 
2095     CHKERRQ(PetscMalloc1(yy->A->rmap->N,&nnz_d));
2096     CHKERRQ(PetscMalloc1(yy->B->rmap->N,&nnz_o));
2097     CHKERRQ(MatCreate(PetscObjectComm((PetscObject)Y),&B));
2098     CHKERRQ(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
2099     CHKERRQ(MatSetLayouts(B,Y->rmap,Y->cmap));
2100     CHKERRQ(MatSetType(B,((PetscObject)Y)->type_name));
2101     CHKERRQ(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d));
2102     CHKERRQ(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
2103     CHKERRQ(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o));
2104     CHKERRQ(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
2105     CHKERRQ(MatHeaderMerge(Y,&B));
2106     CHKERRQ(PetscFree(nnz_d));
2107     CHKERRQ(PetscFree(nnz_o));
2108   }
2109   PetscFunctionReturn(0);
2110 }
2111 
2112 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2113 
2114 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2115 {
2116   PetscFunctionBegin;
2117   if (PetscDefined(USE_COMPLEX)) {
2118     Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2119 
2120     CHKERRQ(MatConjugate_SeqAIJ(aij->A));
2121     CHKERRQ(MatConjugate_SeqAIJ(aij->B));
2122   }
2123   PetscFunctionReturn(0);
2124 }
2125 
2126 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2127 {
2128   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2129 
2130   PetscFunctionBegin;
2131   CHKERRQ(MatRealPart(a->A));
2132   CHKERRQ(MatRealPart(a->B));
2133   PetscFunctionReturn(0);
2134 }
2135 
2136 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2137 {
2138   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2139 
2140   PetscFunctionBegin;
2141   CHKERRQ(MatImaginaryPart(a->A));
2142   CHKERRQ(MatImaginaryPart(a->B));
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2147 {
2148   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2149   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2150   PetscScalar       *va,*vv;
2151   Vec               vB,vA;
2152   const PetscScalar *vb;
2153 
2154   PetscFunctionBegin;
2155   CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,m,&vA));
2156   CHKERRQ(MatGetRowMaxAbs(a->A,vA,idx));
2157 
2158   CHKERRQ(VecGetArrayWrite(vA,&va));
2159   if (idx) {
2160     for (i=0; i<m; i++) {
2161       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2162     }
2163   }
2164 
2165   CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,m,&vB));
2166   CHKERRQ(PetscMalloc1(m,&idxb));
2167   CHKERRQ(MatGetRowMaxAbs(a->B,vB,idxb));
2168 
2169   CHKERRQ(VecGetArrayWrite(v,&vv));
2170   CHKERRQ(VecGetArrayRead(vB,&vb));
2171   for (i=0; i<m; i++) {
2172     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2173       vv[i] = vb[i];
2174       if (idx) idx[i] = a->garray[idxb[i]];
2175     } else {
2176       vv[i] = va[i];
2177       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2178         idx[i] = a->garray[idxb[i]];
2179     }
2180   }
2181   CHKERRQ(VecRestoreArrayWrite(vA,&vv));
2182   CHKERRQ(VecRestoreArrayWrite(vA,&va));
2183   CHKERRQ(VecRestoreArrayRead(vB,&vb));
2184   CHKERRQ(PetscFree(idxb));
2185   CHKERRQ(VecDestroy(&vA));
2186   CHKERRQ(VecDestroy(&vB));
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2191 {
2192   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2193   PetscInt          m = A->rmap->n,n = A->cmap->n;
2194   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2195   PetscInt          *cmap  = mat->garray;
2196   PetscInt          *diagIdx, *offdiagIdx;
2197   Vec               diagV, offdiagV;
2198   PetscScalar       *a, *diagA, *offdiagA;
2199   const PetscScalar *ba,*bav;
2200   PetscInt          r,j,col,ncols,*bi,*bj;
2201   Mat               B = mat->B;
2202   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2203 
2204   PetscFunctionBegin;
2205   /* When a process holds entire A and other processes have no entry */
2206   if (A->cmap->N == n) {
2207     CHKERRQ(VecGetArrayWrite(v,&diagA));
2208     CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2209     CHKERRQ(MatGetRowMinAbs(mat->A,diagV,idx));
2210     CHKERRQ(VecDestroy(&diagV));
2211     CHKERRQ(VecRestoreArrayWrite(v,&diagA));
2212     PetscFunctionReturn(0);
2213   } else if (n == 0) {
2214     if (m) {
2215       CHKERRQ(VecGetArrayWrite(v,&a));
2216       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2217       CHKERRQ(VecRestoreArrayWrite(v,&a));
2218     }
2219     PetscFunctionReturn(0);
2220   }
2221 
2222   CHKERRQ(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2223   CHKERRQ(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2224   CHKERRQ(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2225   CHKERRQ(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2226 
2227   /* Get offdiagIdx[] for implicit 0.0 */
2228   CHKERRQ(MatSeqAIJGetArrayRead(B,&bav));
2229   ba   = bav;
2230   bi   = b->i;
2231   bj   = b->j;
2232   CHKERRQ(VecGetArrayWrite(offdiagV, &offdiagA));
2233   for (r = 0; r < m; r++) {
2234     ncols = bi[r+1] - bi[r];
2235     if (ncols == A->cmap->N - n) { /* Brow is dense */
2236       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2237     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2238       offdiagA[r] = 0.0;
2239 
2240       /* Find first hole in the cmap */
2241       for (j=0; j<ncols; j++) {
2242         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2243         if (col > j && j < cstart) {
2244           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2245           break;
2246         } else if (col > j + n && j >= cstart) {
2247           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2248           break;
2249         }
2250       }
2251       if (j == ncols && ncols < A->cmap->N - n) {
2252         /* a hole is outside compressed Bcols */
2253         if (ncols == 0) {
2254           if (cstart) {
2255             offdiagIdx[r] = 0;
2256           } else offdiagIdx[r] = cend;
2257         } else { /* ncols > 0 */
2258           offdiagIdx[r] = cmap[ncols-1] + 1;
2259           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2260         }
2261       }
2262     }
2263 
2264     for (j=0; j<ncols; j++) {
2265       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2266       ba++; bj++;
2267     }
2268   }
2269 
2270   CHKERRQ(VecGetArrayWrite(v, &a));
2271   CHKERRQ(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2272   for (r = 0; r < m; ++r) {
2273     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2274       a[r]   = diagA[r];
2275       if (idx) idx[r] = cstart + diagIdx[r];
2276     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2277       a[r] = diagA[r];
2278       if (idx) {
2279         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2280           idx[r] = cstart + diagIdx[r];
2281         } else idx[r] = offdiagIdx[r];
2282       }
2283     } else {
2284       a[r]   = offdiagA[r];
2285       if (idx) idx[r] = offdiagIdx[r];
2286     }
2287   }
2288   CHKERRQ(MatSeqAIJRestoreArrayRead(B,&bav));
2289   CHKERRQ(VecRestoreArrayWrite(v, &a));
2290   CHKERRQ(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2291   CHKERRQ(VecRestoreArrayWrite(offdiagV, &offdiagA));
2292   CHKERRQ(VecDestroy(&diagV));
2293   CHKERRQ(VecDestroy(&offdiagV));
2294   CHKERRQ(PetscFree2(diagIdx, offdiagIdx));
2295   PetscFunctionReturn(0);
2296 }
2297 
2298 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2299 {
2300   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2301   PetscInt          m = A->rmap->n,n = A->cmap->n;
2302   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2303   PetscInt          *cmap  = mat->garray;
2304   PetscInt          *diagIdx, *offdiagIdx;
2305   Vec               diagV, offdiagV;
2306   PetscScalar       *a, *diagA, *offdiagA;
2307   const PetscScalar *ba,*bav;
2308   PetscInt          r,j,col,ncols,*bi,*bj;
2309   Mat               B = mat->B;
2310   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2311 
2312   PetscFunctionBegin;
2313   /* When a process holds entire A and other processes have no entry */
2314   if (A->cmap->N == n) {
2315     CHKERRQ(VecGetArrayWrite(v,&diagA));
2316     CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2317     CHKERRQ(MatGetRowMin(mat->A,diagV,idx));
2318     CHKERRQ(VecDestroy(&diagV));
2319     CHKERRQ(VecRestoreArrayWrite(v,&diagA));
2320     PetscFunctionReturn(0);
2321   } else if (n == 0) {
2322     if (m) {
2323       CHKERRQ(VecGetArrayWrite(v,&a));
2324       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2325       CHKERRQ(VecRestoreArrayWrite(v,&a));
2326     }
2327     PetscFunctionReturn(0);
2328   }
2329 
2330   CHKERRQ(PetscCalloc2(m,&diagIdx,m,&offdiagIdx));
2331   CHKERRQ(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2332   CHKERRQ(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2333   CHKERRQ(MatGetRowMin(mat->A, diagV, diagIdx));
2334 
2335   /* Get offdiagIdx[] for implicit 0.0 */
2336   CHKERRQ(MatSeqAIJGetArrayRead(B,&bav));
2337   ba   = bav;
2338   bi   = b->i;
2339   bj   = b->j;
2340   CHKERRQ(VecGetArrayWrite(offdiagV, &offdiagA));
2341   for (r = 0; r < m; r++) {
2342     ncols = bi[r+1] - bi[r];
2343     if (ncols == A->cmap->N - n) { /* Brow is dense */
2344       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2345     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2346       offdiagA[r] = 0.0;
2347 
2348       /* Find first hole in the cmap */
2349       for (j=0; j<ncols; j++) {
2350         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2351         if (col > j && j < cstart) {
2352           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2353           break;
2354         } else if (col > j + n && j >= cstart) {
2355           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2356           break;
2357         }
2358       }
2359       if (j == ncols && ncols < A->cmap->N - n) {
2360         /* a hole is outside compressed Bcols */
2361         if (ncols == 0) {
2362           if (cstart) {
2363             offdiagIdx[r] = 0;
2364           } else offdiagIdx[r] = cend;
2365         } else { /* ncols > 0 */
2366           offdiagIdx[r] = cmap[ncols-1] + 1;
2367           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2368         }
2369       }
2370     }
2371 
2372     for (j=0; j<ncols; j++) {
2373       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2374       ba++; bj++;
2375     }
2376   }
2377 
2378   CHKERRQ(VecGetArrayWrite(v, &a));
2379   CHKERRQ(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2380   for (r = 0; r < m; ++r) {
2381     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2382       a[r]   = diagA[r];
2383       if (idx) idx[r] = cstart + diagIdx[r];
2384     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2385       a[r] = diagA[r];
2386       if (idx) {
2387         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2388           idx[r] = cstart + diagIdx[r];
2389         } else idx[r] = offdiagIdx[r];
2390       }
2391     } else {
2392       a[r]   = offdiagA[r];
2393       if (idx) idx[r] = offdiagIdx[r];
2394     }
2395   }
2396   CHKERRQ(MatSeqAIJRestoreArrayRead(B,&bav));
2397   CHKERRQ(VecRestoreArrayWrite(v, &a));
2398   CHKERRQ(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2399   CHKERRQ(VecRestoreArrayWrite(offdiagV, &offdiagA));
2400   CHKERRQ(VecDestroy(&diagV));
2401   CHKERRQ(VecDestroy(&offdiagV));
2402   CHKERRQ(PetscFree2(diagIdx, offdiagIdx));
2403   PetscFunctionReturn(0);
2404 }
2405 
2406 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2407 {
2408   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2409   PetscInt          m = A->rmap->n,n = A->cmap->n;
2410   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2411   PetscInt          *cmap  = mat->garray;
2412   PetscInt          *diagIdx, *offdiagIdx;
2413   Vec               diagV, offdiagV;
2414   PetscScalar       *a, *diagA, *offdiagA;
2415   const PetscScalar *ba,*bav;
2416   PetscInt          r,j,col,ncols,*bi,*bj;
2417   Mat               B = mat->B;
2418   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2419 
2420   PetscFunctionBegin;
2421   /* When a process holds entire A and other processes have no entry */
2422   if (A->cmap->N == n) {
2423     CHKERRQ(VecGetArrayWrite(v,&diagA));
2424     CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2425     CHKERRQ(MatGetRowMax(mat->A,diagV,idx));
2426     CHKERRQ(VecDestroy(&diagV));
2427     CHKERRQ(VecRestoreArrayWrite(v,&diagA));
2428     PetscFunctionReturn(0);
2429   } else if (n == 0) {
2430     if (m) {
2431       CHKERRQ(VecGetArrayWrite(v,&a));
2432       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2433       CHKERRQ(VecRestoreArrayWrite(v,&a));
2434     }
2435     PetscFunctionReturn(0);
2436   }
2437 
2438   CHKERRQ(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2439   CHKERRQ(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2440   CHKERRQ(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2441   CHKERRQ(MatGetRowMax(mat->A, diagV, diagIdx));
2442 
2443   /* Get offdiagIdx[] for implicit 0.0 */
2444   CHKERRQ(MatSeqAIJGetArrayRead(B,&bav));
2445   ba   = bav;
2446   bi   = b->i;
2447   bj   = b->j;
2448   CHKERRQ(VecGetArrayWrite(offdiagV, &offdiagA));
2449   for (r = 0; r < m; r++) {
2450     ncols = bi[r+1] - bi[r];
2451     if (ncols == A->cmap->N - n) { /* Brow is dense */
2452       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2453     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2454       offdiagA[r] = 0.0;
2455 
2456       /* Find first hole in the cmap */
2457       for (j=0; j<ncols; j++) {
2458         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2459         if (col > j && j < cstart) {
2460           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2461           break;
2462         } else if (col > j + n && j >= cstart) {
2463           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2464           break;
2465         }
2466       }
2467       if (j == ncols && ncols < A->cmap->N - n) {
2468         /* a hole is outside compressed Bcols */
2469         if (ncols == 0) {
2470           if (cstart) {
2471             offdiagIdx[r] = 0;
2472           } else offdiagIdx[r] = cend;
2473         } else { /* ncols > 0 */
2474           offdiagIdx[r] = cmap[ncols-1] + 1;
2475           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2476         }
2477       }
2478     }
2479 
2480     for (j=0; j<ncols; j++) {
2481       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2482       ba++; bj++;
2483     }
2484   }
2485 
2486   CHKERRQ(VecGetArrayWrite(v,    &a));
2487   CHKERRQ(VecGetArrayRead(diagV,(const PetscScalar**)&diagA));
2488   for (r = 0; r < m; ++r) {
2489     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2490       a[r] = diagA[r];
2491       if (idx) idx[r] = cstart + diagIdx[r];
2492     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2493       a[r] = diagA[r];
2494       if (idx) {
2495         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2496           idx[r] = cstart + diagIdx[r];
2497         } else idx[r] = offdiagIdx[r];
2498       }
2499     } else {
2500       a[r] = offdiagA[r];
2501       if (idx) idx[r] = offdiagIdx[r];
2502     }
2503   }
2504   CHKERRQ(MatSeqAIJRestoreArrayRead(B,&bav));
2505   CHKERRQ(VecRestoreArrayWrite(v,       &a));
2506   CHKERRQ(VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA));
2507   CHKERRQ(VecRestoreArrayWrite(offdiagV,&offdiagA));
2508   CHKERRQ(VecDestroy(&diagV));
2509   CHKERRQ(VecDestroy(&offdiagV));
2510   CHKERRQ(PetscFree2(diagIdx, offdiagIdx));
2511   PetscFunctionReturn(0);
2512 }
2513 
2514 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2515 {
2516   Mat            *dummy;
2517 
2518   PetscFunctionBegin;
2519   CHKERRQ(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy));
2520   *newmat = *dummy;
2521   CHKERRQ(PetscFree(dummy));
2522   PetscFunctionReturn(0);
2523 }
2524 
2525 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2526 {
2527   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2528 
2529   PetscFunctionBegin;
2530   CHKERRQ(MatInvertBlockDiagonal(a->A,values));
2531   A->factorerrortype = a->A->factorerrortype;
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2536 {
2537   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2538 
2539   PetscFunctionBegin;
2540   PetscCheckFalse(!x->assembled && !x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2541   CHKERRQ(MatSetRandom(aij->A,rctx));
2542   if (x->assembled) {
2543     CHKERRQ(MatSetRandom(aij->B,rctx));
2544   } else {
2545     CHKERRQ(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx));
2546   }
2547   CHKERRQ(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY));
2548   CHKERRQ(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY));
2549   PetscFunctionReturn(0);
2550 }
2551 
2552 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2553 {
2554   PetscFunctionBegin;
2555   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2556   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 /*@
2561    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2562 
2563    Collective on Mat
2564 
2565    Input Parameters:
2566 +    A - the matrix
2567 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2568 
2569  Level: advanced
2570 
2571 @*/
2572 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2573 {
2574   PetscFunctionBegin;
2575   CHKERRQ(PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc)));
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2580 {
2581   PetscBool            sc = PETSC_FALSE,flg;
2582 
2583   PetscFunctionBegin;
2584   CHKERRQ(PetscOptionsHead(PetscOptionsObject,"MPIAIJ options"));
2585   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2586   CHKERRQ(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg));
2587   if (flg) {
2588     CHKERRQ(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc));
2589   }
2590   CHKERRQ(PetscOptionsTail());
2591   PetscFunctionReturn(0);
2592 }
2593 
2594 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2595 {
2596   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2597   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2598 
2599   PetscFunctionBegin;
2600   if (!Y->preallocated) {
2601     CHKERRQ(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL));
2602   } else if (!aij->nz) {
2603     PetscInt nonew = aij->nonew;
2604     CHKERRQ(MatSeqAIJSetPreallocation(maij->A,1,NULL));
2605     aij->nonew = nonew;
2606   }
2607   CHKERRQ(MatShift_Basic(Y,a));
2608   PetscFunctionReturn(0);
2609 }
2610 
2611 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2612 {
2613   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2614 
2615   PetscFunctionBegin;
2616   PetscCheckFalse(A->rmap->n != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2617   CHKERRQ(MatMissingDiagonal(a->A,missing,d));
2618   if (d) {
2619     PetscInt rstart;
2620     CHKERRQ(MatGetOwnershipRange(A,&rstart,NULL));
2621     *d += rstart;
2622 
2623   }
2624   PetscFunctionReturn(0);
2625 }
2626 
2627 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2628 {
2629   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2630 
2631   PetscFunctionBegin;
2632   CHKERRQ(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag));
2633   PetscFunctionReturn(0);
2634 }
2635 
2636 /* -------------------------------------------------------------------*/
2637 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2638                                        MatGetRow_MPIAIJ,
2639                                        MatRestoreRow_MPIAIJ,
2640                                        MatMult_MPIAIJ,
2641                                 /* 4*/ MatMultAdd_MPIAIJ,
2642                                        MatMultTranspose_MPIAIJ,
2643                                        MatMultTransposeAdd_MPIAIJ,
2644                                        NULL,
2645                                        NULL,
2646                                        NULL,
2647                                 /*10*/ NULL,
2648                                        NULL,
2649                                        NULL,
2650                                        MatSOR_MPIAIJ,
2651                                        MatTranspose_MPIAIJ,
2652                                 /*15*/ MatGetInfo_MPIAIJ,
2653                                        MatEqual_MPIAIJ,
2654                                        MatGetDiagonal_MPIAIJ,
2655                                        MatDiagonalScale_MPIAIJ,
2656                                        MatNorm_MPIAIJ,
2657                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2658                                        MatAssemblyEnd_MPIAIJ,
2659                                        MatSetOption_MPIAIJ,
2660                                        MatZeroEntries_MPIAIJ,
2661                                 /*24*/ MatZeroRows_MPIAIJ,
2662                                        NULL,
2663                                        NULL,
2664                                        NULL,
2665                                        NULL,
2666                                 /*29*/ MatSetUp_MPIAIJ,
2667                                        NULL,
2668                                        NULL,
2669                                        MatGetDiagonalBlock_MPIAIJ,
2670                                        NULL,
2671                                 /*34*/ MatDuplicate_MPIAIJ,
2672                                        NULL,
2673                                        NULL,
2674                                        NULL,
2675                                        NULL,
2676                                 /*39*/ MatAXPY_MPIAIJ,
2677                                        MatCreateSubMatrices_MPIAIJ,
2678                                        MatIncreaseOverlap_MPIAIJ,
2679                                        MatGetValues_MPIAIJ,
2680                                        MatCopy_MPIAIJ,
2681                                 /*44*/ MatGetRowMax_MPIAIJ,
2682                                        MatScale_MPIAIJ,
2683                                        MatShift_MPIAIJ,
2684                                        MatDiagonalSet_MPIAIJ,
2685                                        MatZeroRowsColumns_MPIAIJ,
2686                                 /*49*/ MatSetRandom_MPIAIJ,
2687                                        NULL,
2688                                        NULL,
2689                                        NULL,
2690                                        NULL,
2691                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2692                                        NULL,
2693                                        MatSetUnfactored_MPIAIJ,
2694                                        MatPermute_MPIAIJ,
2695                                        NULL,
2696                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2697                                        MatDestroy_MPIAIJ,
2698                                        MatView_MPIAIJ,
2699                                        NULL,
2700                                        NULL,
2701                                 /*64*/ NULL,
2702                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2703                                        NULL,
2704                                        NULL,
2705                                        NULL,
2706                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2707                                        MatGetRowMinAbs_MPIAIJ,
2708                                        NULL,
2709                                        NULL,
2710                                        NULL,
2711                                        NULL,
2712                                 /*75*/ MatFDColoringApply_AIJ,
2713                                        MatSetFromOptions_MPIAIJ,
2714                                        NULL,
2715                                        NULL,
2716                                        MatFindZeroDiagonals_MPIAIJ,
2717                                 /*80*/ NULL,
2718                                        NULL,
2719                                        NULL,
2720                                 /*83*/ MatLoad_MPIAIJ,
2721                                        MatIsSymmetric_MPIAIJ,
2722                                        NULL,
2723                                        NULL,
2724                                        NULL,
2725                                        NULL,
2726                                 /*89*/ NULL,
2727                                        NULL,
2728                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2729                                        NULL,
2730                                        NULL,
2731                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2732                                        NULL,
2733                                        NULL,
2734                                        NULL,
2735                                        MatBindToCPU_MPIAIJ,
2736                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2737                                        NULL,
2738                                        NULL,
2739                                        MatConjugate_MPIAIJ,
2740                                        NULL,
2741                                 /*104*/MatSetValuesRow_MPIAIJ,
2742                                        MatRealPart_MPIAIJ,
2743                                        MatImaginaryPart_MPIAIJ,
2744                                        NULL,
2745                                        NULL,
2746                                 /*109*/NULL,
2747                                        NULL,
2748                                        MatGetRowMin_MPIAIJ,
2749                                        NULL,
2750                                        MatMissingDiagonal_MPIAIJ,
2751                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2752                                        NULL,
2753                                        MatGetGhosts_MPIAIJ,
2754                                        NULL,
2755                                        NULL,
2756                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2757                                        NULL,
2758                                        NULL,
2759                                        NULL,
2760                                        MatGetMultiProcBlock_MPIAIJ,
2761                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2762                                        MatGetColumnReductions_MPIAIJ,
2763                                        MatInvertBlockDiagonal_MPIAIJ,
2764                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2765                                        MatCreateSubMatricesMPI_MPIAIJ,
2766                                 /*129*/NULL,
2767                                        NULL,
2768                                        NULL,
2769                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2770                                        NULL,
2771                                 /*134*/NULL,
2772                                        NULL,
2773                                        NULL,
2774                                        NULL,
2775                                        NULL,
2776                                 /*139*/MatSetBlockSizes_MPIAIJ,
2777                                        NULL,
2778                                        NULL,
2779                                        MatFDColoringSetUp_MPIXAIJ,
2780                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2781                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2782                                 /*145*/NULL,
2783                                        NULL,
2784                                        NULL
2785 };
2786 
2787 /* ----------------------------------------------------------------------------------------*/
2788 
2789 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2790 {
2791   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2792 
2793   PetscFunctionBegin;
2794   CHKERRQ(MatStoreValues(aij->A));
2795   CHKERRQ(MatStoreValues(aij->B));
2796   PetscFunctionReturn(0);
2797 }
2798 
2799 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2800 {
2801   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2802 
2803   PetscFunctionBegin;
2804   CHKERRQ(MatRetrieveValues(aij->A));
2805   CHKERRQ(MatRetrieveValues(aij->B));
2806   PetscFunctionReturn(0);
2807 }
2808 
2809 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2810 {
2811   Mat_MPIAIJ     *b;
2812   PetscMPIInt    size;
2813 
2814   PetscFunctionBegin;
2815   CHKERRQ(PetscLayoutSetUp(B->rmap));
2816   CHKERRQ(PetscLayoutSetUp(B->cmap));
2817   b = (Mat_MPIAIJ*)B->data;
2818 
2819 #if defined(PETSC_USE_CTABLE)
2820   CHKERRQ(PetscTableDestroy(&b->colmap));
2821 #else
2822   CHKERRQ(PetscFree(b->colmap));
2823 #endif
2824   CHKERRQ(PetscFree(b->garray));
2825   CHKERRQ(VecDestroy(&b->lvec));
2826   CHKERRQ(VecScatterDestroy(&b->Mvctx));
2827 
2828   /* Because the B will have been resized we simply destroy it and create a new one each time */
2829   CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
2830   CHKERRQ(MatDestroy(&b->B));
2831   CHKERRQ(MatCreate(PETSC_COMM_SELF,&b->B));
2832   CHKERRQ(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
2833   CHKERRQ(MatSetBlockSizesFromMats(b->B,B,B));
2834   CHKERRQ(MatSetType(b->B,MATSEQAIJ));
2835   CHKERRQ(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
2836 
2837   if (!B->preallocated) {
2838     CHKERRQ(MatCreate(PETSC_COMM_SELF,&b->A));
2839     CHKERRQ(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
2840     CHKERRQ(MatSetBlockSizesFromMats(b->A,B,B));
2841     CHKERRQ(MatSetType(b->A,MATSEQAIJ));
2842     CHKERRQ(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
2843   }
2844 
2845   CHKERRQ(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz));
2846   CHKERRQ(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz));
2847   B->preallocated  = PETSC_TRUE;
2848   B->was_assembled = PETSC_FALSE;
2849   B->assembled     = PETSC_FALSE;
2850   PetscFunctionReturn(0);
2851 }
2852 
2853 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2854 {
2855   Mat_MPIAIJ     *b;
2856 
2857   PetscFunctionBegin;
2858   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2859   CHKERRQ(PetscLayoutSetUp(B->rmap));
2860   CHKERRQ(PetscLayoutSetUp(B->cmap));
2861   b = (Mat_MPIAIJ*)B->data;
2862 
2863 #if defined(PETSC_USE_CTABLE)
2864   CHKERRQ(PetscTableDestroy(&b->colmap));
2865 #else
2866   CHKERRQ(PetscFree(b->colmap));
2867 #endif
2868   CHKERRQ(PetscFree(b->garray));
2869   CHKERRQ(VecDestroy(&b->lvec));
2870   CHKERRQ(VecScatterDestroy(&b->Mvctx));
2871 
2872   CHKERRQ(MatResetPreallocation(b->A));
2873   CHKERRQ(MatResetPreallocation(b->B));
2874   B->preallocated  = PETSC_TRUE;
2875   B->was_assembled = PETSC_FALSE;
2876   B->assembled = PETSC_FALSE;
2877   PetscFunctionReturn(0);
2878 }
2879 
2880 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2881 {
2882   Mat            mat;
2883   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2884 
2885   PetscFunctionBegin;
2886   *newmat = NULL;
2887   CHKERRQ(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
2888   CHKERRQ(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
2889   CHKERRQ(MatSetBlockSizesFromMats(mat,matin,matin));
2890   CHKERRQ(MatSetType(mat,((PetscObject)matin)->type_name));
2891   a       = (Mat_MPIAIJ*)mat->data;
2892 
2893   mat->factortype   = matin->factortype;
2894   mat->assembled    = matin->assembled;
2895   mat->insertmode   = NOT_SET_VALUES;
2896   mat->preallocated = matin->preallocated;
2897 
2898   a->size         = oldmat->size;
2899   a->rank         = oldmat->rank;
2900   a->donotstash   = oldmat->donotstash;
2901   a->roworiented  = oldmat->roworiented;
2902   a->rowindices   = NULL;
2903   a->rowvalues    = NULL;
2904   a->getrowactive = PETSC_FALSE;
2905 
2906   CHKERRQ(PetscLayoutReference(matin->rmap,&mat->rmap));
2907   CHKERRQ(PetscLayoutReference(matin->cmap,&mat->cmap));
2908 
2909   if (oldmat->colmap) {
2910 #if defined(PETSC_USE_CTABLE)
2911     CHKERRQ(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
2912 #else
2913     CHKERRQ(PetscMalloc1(mat->cmap->N,&a->colmap));
2914     CHKERRQ(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt)));
2915     CHKERRQ(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N));
2916 #endif
2917   } else a->colmap = NULL;
2918   if (oldmat->garray) {
2919     PetscInt len;
2920     len  = oldmat->B->cmap->n;
2921     CHKERRQ(PetscMalloc1(len+1,&a->garray));
2922     CHKERRQ(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
2923     if (len) CHKERRQ(PetscArraycpy(a->garray,oldmat->garray,len));
2924   } else a->garray = NULL;
2925 
2926   /* It may happen MatDuplicate is called with a non-assembled matrix
2927      In fact, MatDuplicate only requires the matrix to be preallocated
2928      This may happen inside a DMCreateMatrix_Shell */
2929   if (oldmat->lvec) {
2930     CHKERRQ(VecDuplicate(oldmat->lvec,&a->lvec));
2931     CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
2932   }
2933   if (oldmat->Mvctx) {
2934     CHKERRQ(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
2935     CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
2936   }
2937   CHKERRQ(MatDuplicate(oldmat->A,cpvalues,&a->A));
2938   CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2939   CHKERRQ(MatDuplicate(oldmat->B,cpvalues,&a->B));
2940   CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
2941   CHKERRQ(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
2942   *newmat = mat;
2943   PetscFunctionReturn(0);
2944 }
2945 
2946 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2947 {
2948   PetscBool      isbinary, ishdf5;
2949 
2950   PetscFunctionBegin;
2951   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2952   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2953   /* force binary viewer to load .info file if it has not yet done so */
2954   CHKERRQ(PetscViewerSetUp(viewer));
2955   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2956   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
2957   if (isbinary) {
2958     CHKERRQ(MatLoad_MPIAIJ_Binary(newMat,viewer));
2959   } else if (ishdf5) {
2960 #if defined(PETSC_HAVE_HDF5)
2961     CHKERRQ(MatLoad_AIJ_HDF5(newMat,viewer));
2962 #else
2963     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2964 #endif
2965   } else {
2966     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2967   }
2968   PetscFunctionReturn(0);
2969 }
2970 
2971 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
2972 {
2973   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
2974   PetscInt       *rowidxs,*colidxs;
2975   PetscScalar    *matvals;
2976 
2977   PetscFunctionBegin;
2978   CHKERRQ(PetscViewerSetUp(viewer));
2979 
2980   /* read in matrix header */
2981   CHKERRQ(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT));
2982   PetscCheckFalse(header[0] != MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
2983   M  = header[1]; N = header[2]; nz = header[3];
2984   PetscCheckFalse(M < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
2985   PetscCheckFalse(N < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
2986   PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
2987 
2988   /* set block sizes from the viewer's .info file */
2989   CHKERRQ(MatLoad_Binary_BlockSizes(mat,viewer));
2990   /* set global sizes if not set already */
2991   if (mat->rmap->N < 0) mat->rmap->N = M;
2992   if (mat->cmap->N < 0) mat->cmap->N = N;
2993   CHKERRQ(PetscLayoutSetUp(mat->rmap));
2994   CHKERRQ(PetscLayoutSetUp(mat->cmap));
2995 
2996   /* check if the matrix sizes are correct */
2997   CHKERRQ(MatGetSize(mat,&rows,&cols));
2998   PetscCheckFalse(M != rows || N != cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
2999 
3000   /* read in row lengths and build row indices */
3001   CHKERRQ(MatGetLocalSize(mat,&m,NULL));
3002   CHKERRQ(PetscMalloc1(m+1,&rowidxs));
3003   CHKERRQ(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT));
3004   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3005   CHKERRMPI(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer)));
3006   PetscCheckFalse(sum != nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3007   /* read in column indices and matrix values */
3008   CHKERRQ(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals));
3009   CHKERRQ(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
3010   CHKERRQ(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
3011   /* store matrix indices and values */
3012   CHKERRQ(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals));
3013   CHKERRQ(PetscFree(rowidxs));
3014   CHKERRQ(PetscFree2(colidxs,matvals));
3015   PetscFunctionReturn(0);
3016 }
3017 
3018 /* Not scalable because of ISAllGather() unless getting all columns. */
3019 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3020 {
3021   IS             iscol_local;
3022   PetscBool      isstride;
3023   PetscMPIInt    lisstride=0,gisstride;
3024 
3025   PetscFunctionBegin;
3026   /* check if we are grabbing all columns*/
3027   CHKERRQ(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride));
3028 
3029   if (isstride) {
3030     PetscInt  start,len,mstart,mlen;
3031     CHKERRQ(ISStrideGetInfo(iscol,&start,NULL));
3032     CHKERRQ(ISGetLocalSize(iscol,&len));
3033     CHKERRQ(MatGetOwnershipRangeColumn(mat,&mstart,&mlen));
3034     if (mstart == start && mlen-mstart == len) lisstride = 1;
3035   }
3036 
3037   CHKERRMPI(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat)));
3038   if (gisstride) {
3039     PetscInt N;
3040     CHKERRQ(MatGetSize(mat,NULL,&N));
3041     CHKERRQ(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local));
3042     CHKERRQ(ISSetIdentity(iscol_local));
3043     CHKERRQ(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3044   } else {
3045     PetscInt cbs;
3046     CHKERRQ(ISGetBlockSize(iscol,&cbs));
3047     CHKERRQ(ISAllGather(iscol,&iscol_local));
3048     CHKERRQ(ISSetBlockSize(iscol_local,cbs));
3049   }
3050 
3051   *isseq = iscol_local;
3052   PetscFunctionReturn(0);
3053 }
3054 
3055 /*
3056  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3057  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3058 
3059  Input Parameters:
3060    mat - matrix
3061    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3062            i.e., mat->rstart <= isrow[i] < mat->rend
3063    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3064            i.e., mat->cstart <= iscol[i] < mat->cend
3065  Output Parameter:
3066    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3067    iscol_o - sequential column index set for retrieving mat->B
3068    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3069  */
3070 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3071 {
3072   Vec            x,cmap;
3073   const PetscInt *is_idx;
3074   PetscScalar    *xarray,*cmaparray;
3075   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3076   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3077   Mat            B=a->B;
3078   Vec            lvec=a->lvec,lcmap;
3079   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3080   MPI_Comm       comm;
3081   VecScatter     Mvctx=a->Mvctx;
3082 
3083   PetscFunctionBegin;
3084   CHKERRQ(PetscObjectGetComm((PetscObject)mat,&comm));
3085   CHKERRQ(ISGetLocalSize(iscol,&ncols));
3086 
3087   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3088   CHKERRQ(MatCreateVecs(mat,&x,NULL));
3089   CHKERRQ(VecSet(x,-1.0));
3090   CHKERRQ(VecDuplicate(x,&cmap));
3091   CHKERRQ(VecSet(cmap,-1.0));
3092 
3093   /* Get start indices */
3094   CHKERRMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm));
3095   isstart -= ncols;
3096   CHKERRQ(MatGetOwnershipRangeColumn(mat,&cstart,&cend));
3097 
3098   CHKERRQ(ISGetIndices(iscol,&is_idx));
3099   CHKERRQ(VecGetArray(x,&xarray));
3100   CHKERRQ(VecGetArray(cmap,&cmaparray));
3101   CHKERRQ(PetscMalloc1(ncols,&idx));
3102   for (i=0; i<ncols; i++) {
3103     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3104     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3105     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3106   }
3107   CHKERRQ(VecRestoreArray(x,&xarray));
3108   CHKERRQ(VecRestoreArray(cmap,&cmaparray));
3109   CHKERRQ(ISRestoreIndices(iscol,&is_idx));
3110 
3111   /* Get iscol_d */
3112   CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d));
3113   CHKERRQ(ISGetBlockSize(iscol,&i));
3114   CHKERRQ(ISSetBlockSize(*iscol_d,i));
3115 
3116   /* Get isrow_d */
3117   CHKERRQ(ISGetLocalSize(isrow,&m));
3118   rstart = mat->rmap->rstart;
3119   CHKERRQ(PetscMalloc1(m,&idx));
3120   CHKERRQ(ISGetIndices(isrow,&is_idx));
3121   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3122   CHKERRQ(ISRestoreIndices(isrow,&is_idx));
3123 
3124   CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d));
3125   CHKERRQ(ISGetBlockSize(isrow,&i));
3126   CHKERRQ(ISSetBlockSize(*isrow_d,i));
3127 
3128   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3129   CHKERRQ(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3130   CHKERRQ(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3131 
3132   CHKERRQ(VecDuplicate(lvec,&lcmap));
3133 
3134   CHKERRQ(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3135   CHKERRQ(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3136 
3137   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3138   /* off-process column indices */
3139   count = 0;
3140   CHKERRQ(PetscMalloc1(Bn,&idx));
3141   CHKERRQ(PetscMalloc1(Bn,&cmap1));
3142 
3143   CHKERRQ(VecGetArray(lvec,&xarray));
3144   CHKERRQ(VecGetArray(lcmap,&cmaparray));
3145   for (i=0; i<Bn; i++) {
3146     if (PetscRealPart(xarray[i]) > -1.0) {
3147       idx[count]     = i;                   /* local column index in off-diagonal part B */
3148       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3149       count++;
3150     }
3151   }
3152   CHKERRQ(VecRestoreArray(lvec,&xarray));
3153   CHKERRQ(VecRestoreArray(lcmap,&cmaparray));
3154 
3155   CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o));
3156   /* cannot ensure iscol_o has same blocksize as iscol! */
3157 
3158   CHKERRQ(PetscFree(idx));
3159   *garray = cmap1;
3160 
3161   CHKERRQ(VecDestroy(&x));
3162   CHKERRQ(VecDestroy(&cmap));
3163   CHKERRQ(VecDestroy(&lcmap));
3164   PetscFunctionReturn(0);
3165 }
3166 
3167 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3168 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3169 {
3170   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3171   Mat            M = NULL;
3172   MPI_Comm       comm;
3173   IS             iscol_d,isrow_d,iscol_o;
3174   Mat            Asub = NULL,Bsub = NULL;
3175   PetscInt       n;
3176 
3177   PetscFunctionBegin;
3178   CHKERRQ(PetscObjectGetComm((PetscObject)mat,&comm));
3179 
3180   if (call == MAT_REUSE_MATRIX) {
3181     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3182     CHKERRQ(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d));
3183     PetscCheckFalse(!isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3184 
3185     CHKERRQ(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d));
3186     PetscCheckFalse(!iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3187 
3188     CHKERRQ(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o));
3189     PetscCheckFalse(!iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3190 
3191     /* Update diagonal and off-diagonal portions of submat */
3192     asub = (Mat_MPIAIJ*)(*submat)->data;
3193     CHKERRQ(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A));
3194     CHKERRQ(ISGetLocalSize(iscol_o,&n));
3195     if (n) {
3196       CHKERRQ(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B));
3197     }
3198     CHKERRQ(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY));
3199     CHKERRQ(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY));
3200 
3201   } else { /* call == MAT_INITIAL_MATRIX) */
3202     const PetscInt *garray;
3203     PetscInt        BsubN;
3204 
3205     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3206     CHKERRQ(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray));
3207 
3208     /* Create local submatrices Asub and Bsub */
3209     CHKERRQ(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub));
3210     CHKERRQ(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub));
3211 
3212     /* Create submatrix M */
3213     CHKERRQ(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M));
3214 
3215     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3216     asub = (Mat_MPIAIJ*)M->data;
3217 
3218     CHKERRQ(ISGetLocalSize(iscol_o,&BsubN));
3219     n = asub->B->cmap->N;
3220     if (BsubN > n) {
3221       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3222       const PetscInt *idx;
3223       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3224       CHKERRQ(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN));
3225 
3226       CHKERRQ(PetscMalloc1(n,&idx_new));
3227       j = 0;
3228       CHKERRQ(ISGetIndices(iscol_o,&idx));
3229       for (i=0; i<n; i++) {
3230         if (j >= BsubN) break;
3231         while (subgarray[i] > garray[j]) j++;
3232 
3233         if (subgarray[i] == garray[j]) {
3234           idx_new[i] = idx[j++];
3235         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]);
3236       }
3237       CHKERRQ(ISRestoreIndices(iscol_o,&idx));
3238 
3239       CHKERRQ(ISDestroy(&iscol_o));
3240       CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o));
3241 
3242     } else if (BsubN < n) {
3243       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N);
3244     }
3245 
3246     CHKERRQ(PetscFree(garray));
3247     *submat = M;
3248 
3249     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3250     CHKERRQ(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d));
3251     CHKERRQ(ISDestroy(&isrow_d));
3252 
3253     CHKERRQ(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d));
3254     CHKERRQ(ISDestroy(&iscol_d));
3255 
3256     CHKERRQ(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o));
3257     CHKERRQ(ISDestroy(&iscol_o));
3258   }
3259   PetscFunctionReturn(0);
3260 }
3261 
3262 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3263 {
3264   IS             iscol_local=NULL,isrow_d;
3265   PetscInt       csize;
3266   PetscInt       n,i,j,start,end;
3267   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3268   MPI_Comm       comm;
3269 
3270   PetscFunctionBegin;
3271   /* If isrow has same processor distribution as mat,
3272      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3273   if (call == MAT_REUSE_MATRIX) {
3274     CHKERRQ(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d));
3275     if (isrow_d) {
3276       sameRowDist  = PETSC_TRUE;
3277       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3278     } else {
3279       CHKERRQ(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local));
3280       if (iscol_local) {
3281         sameRowDist  = PETSC_TRUE;
3282         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3283       }
3284     }
3285   } else {
3286     /* Check if isrow has same processor distribution as mat */
3287     sameDist[0] = PETSC_FALSE;
3288     CHKERRQ(ISGetLocalSize(isrow,&n));
3289     if (!n) {
3290       sameDist[0] = PETSC_TRUE;
3291     } else {
3292       CHKERRQ(ISGetMinMax(isrow,&i,&j));
3293       CHKERRQ(MatGetOwnershipRange(mat,&start,&end));
3294       if (i >= start && j < end) {
3295         sameDist[0] = PETSC_TRUE;
3296       }
3297     }
3298 
3299     /* Check if iscol has same processor distribution as mat */
3300     sameDist[1] = PETSC_FALSE;
3301     CHKERRQ(ISGetLocalSize(iscol,&n));
3302     if (!n) {
3303       sameDist[1] = PETSC_TRUE;
3304     } else {
3305       CHKERRQ(ISGetMinMax(iscol,&i,&j));
3306       CHKERRQ(MatGetOwnershipRangeColumn(mat,&start,&end));
3307       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3308     }
3309 
3310     CHKERRQ(PetscObjectGetComm((PetscObject)mat,&comm));
3311     CHKERRMPI(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm));
3312     sameRowDist = tsameDist[0];
3313   }
3314 
3315   if (sameRowDist) {
3316     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3317       /* isrow and iscol have same processor distribution as mat */
3318       CHKERRQ(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat));
3319       PetscFunctionReturn(0);
3320     } else { /* sameRowDist */
3321       /* isrow has same processor distribution as mat */
3322       if (call == MAT_INITIAL_MATRIX) {
3323         PetscBool sorted;
3324         CHKERRQ(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3325         CHKERRQ(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */
3326         CHKERRQ(ISGetSize(iscol,&i));
3327         PetscCheckFalse(n != i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i);
3328 
3329         CHKERRQ(ISSorted(iscol_local,&sorted));
3330         if (sorted) {
3331           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3332           CHKERRQ(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat));
3333           PetscFunctionReturn(0);
3334         }
3335       } else { /* call == MAT_REUSE_MATRIX */
3336         IS iscol_sub;
3337         CHKERRQ(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3338         if (iscol_sub) {
3339           CHKERRQ(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat));
3340           PetscFunctionReturn(0);
3341         }
3342       }
3343     }
3344   }
3345 
3346   /* General case: iscol -> iscol_local which has global size of iscol */
3347   if (call == MAT_REUSE_MATRIX) {
3348     CHKERRQ(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
3349     PetscCheckFalse(!iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3350   } else {
3351     if (!iscol_local) {
3352       CHKERRQ(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3353     }
3354   }
3355 
3356   CHKERRQ(ISGetLocalSize(iscol,&csize));
3357   CHKERRQ(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat));
3358 
3359   if (call == MAT_INITIAL_MATRIX) {
3360     CHKERRQ(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
3361     CHKERRQ(ISDestroy(&iscol_local));
3362   }
3363   PetscFunctionReturn(0);
3364 }
3365 
3366 /*@C
3367      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3368          and "off-diagonal" part of the matrix in CSR format.
3369 
3370    Collective
3371 
3372    Input Parameters:
3373 +  comm - MPI communicator
3374 .  A - "diagonal" portion of matrix
3375 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3376 -  garray - global index of B columns
3377 
3378    Output Parameter:
3379 .   mat - the matrix, with input A as its local diagonal matrix
3380    Level: advanced
3381 
3382    Notes:
3383        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3384        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3385 
3386 .seealso: MatCreateMPIAIJWithSplitArrays()
3387 @*/
3388 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3389 {
3390   Mat_MPIAIJ        *maij;
3391   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3392   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3393   const PetscScalar *oa;
3394   Mat               Bnew;
3395   PetscInt          m,n,N;
3396 
3397   PetscFunctionBegin;
3398   CHKERRQ(MatCreate(comm,mat));
3399   CHKERRQ(MatGetSize(A,&m,&n));
3400   PetscCheckFalse(m != B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N);
3401   PetscCheckFalse(A->rmap->bs != B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs);
3402   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3403   /* PetscCheckFalse(A->cmap->bs != B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3404 
3405   /* Get global columns of mat */
3406   CHKERRMPI(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm));
3407 
3408   CHKERRQ(MatSetSizes(*mat,m,n,PETSC_DECIDE,N));
3409   CHKERRQ(MatSetType(*mat,MATMPIAIJ));
3410   CHKERRQ(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs));
3411   maij = (Mat_MPIAIJ*)(*mat)->data;
3412 
3413   (*mat)->preallocated = PETSC_TRUE;
3414 
3415   CHKERRQ(PetscLayoutSetUp((*mat)->rmap));
3416   CHKERRQ(PetscLayoutSetUp((*mat)->cmap));
3417 
3418   /* Set A as diagonal portion of *mat */
3419   maij->A = A;
3420 
3421   nz = oi[m];
3422   for (i=0; i<nz; i++) {
3423     col   = oj[i];
3424     oj[i] = garray[col];
3425   }
3426 
3427   /* Set Bnew as off-diagonal portion of *mat */
3428   CHKERRQ(MatSeqAIJGetArrayRead(B,&oa));
3429   CHKERRQ(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew));
3430   CHKERRQ(MatSeqAIJRestoreArrayRead(B,&oa));
3431   bnew        = (Mat_SeqAIJ*)Bnew->data;
3432   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3433   maij->B     = Bnew;
3434 
3435   PetscCheckFalse(B->rmap->N != Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N);
3436 
3437   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3438   b->free_a       = PETSC_FALSE;
3439   b->free_ij      = PETSC_FALSE;
3440   CHKERRQ(MatDestroy(&B));
3441 
3442   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3443   bnew->free_a       = PETSC_TRUE;
3444   bnew->free_ij      = PETSC_TRUE;
3445 
3446   /* condense columns of maij->B */
3447   CHKERRQ(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
3448   CHKERRQ(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
3449   CHKERRQ(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
3450   CHKERRQ(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
3451   CHKERRQ(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3452   PetscFunctionReturn(0);
3453 }
3454 
3455 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3456 
3457 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3458 {
3459   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3460   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3461   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3462   Mat            M,Msub,B=a->B;
3463   MatScalar      *aa;
3464   Mat_SeqAIJ     *aij;
3465   PetscInt       *garray = a->garray,*colsub,Ncols;
3466   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3467   IS             iscol_sub,iscmap;
3468   const PetscInt *is_idx,*cmap;
3469   PetscBool      allcolumns=PETSC_FALSE;
3470   MPI_Comm       comm;
3471 
3472   PetscFunctionBegin;
3473   CHKERRQ(PetscObjectGetComm((PetscObject)mat,&comm));
3474   if (call == MAT_REUSE_MATRIX) {
3475     CHKERRQ(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3476     PetscCheckFalse(!iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3477     CHKERRQ(ISGetLocalSize(iscol_sub,&count));
3478 
3479     CHKERRQ(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap));
3480     PetscCheckFalse(!iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3481 
3482     CHKERRQ(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub));
3483     PetscCheckFalse(!Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3484 
3485     CHKERRQ(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub));
3486 
3487   } else { /* call == MAT_INITIAL_MATRIX) */
3488     PetscBool flg;
3489 
3490     CHKERRQ(ISGetLocalSize(iscol,&n));
3491     CHKERRQ(ISGetSize(iscol,&Ncols));
3492 
3493     /* (1) iscol -> nonscalable iscol_local */
3494     /* Check for special case: each processor gets entire matrix columns */
3495     CHKERRQ(ISIdentity(iscol_local,&flg));
3496     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3497     CHKERRMPI(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3498     if (allcolumns) {
3499       iscol_sub = iscol_local;
3500       CHKERRQ(PetscObjectReference((PetscObject)iscol_local));
3501       CHKERRQ(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap));
3502 
3503     } else {
3504       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3505       PetscInt *idx,*cmap1,k;
3506       CHKERRQ(PetscMalloc1(Ncols,&idx));
3507       CHKERRQ(PetscMalloc1(Ncols,&cmap1));
3508       CHKERRQ(ISGetIndices(iscol_local,&is_idx));
3509       count = 0;
3510       k     = 0;
3511       for (i=0; i<Ncols; i++) {
3512         j = is_idx[i];
3513         if (j >= cstart && j < cend) {
3514           /* diagonal part of mat */
3515           idx[count]     = j;
3516           cmap1[count++] = i; /* column index in submat */
3517         } else if (Bn) {
3518           /* off-diagonal part of mat */
3519           if (j == garray[k]) {
3520             idx[count]     = j;
3521             cmap1[count++] = i;  /* column index in submat */
3522           } else if (j > garray[k]) {
3523             while (j > garray[k] && k < Bn-1) k++;
3524             if (j == garray[k]) {
3525               idx[count]     = j;
3526               cmap1[count++] = i; /* column index in submat */
3527             }
3528           }
3529         }
3530       }
3531       CHKERRQ(ISRestoreIndices(iscol_local,&is_idx));
3532 
3533       CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub));
3534       CHKERRQ(ISGetBlockSize(iscol,&cbs));
3535       CHKERRQ(ISSetBlockSize(iscol_sub,cbs));
3536 
3537       CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap));
3538     }
3539 
3540     /* (3) Create sequential Msub */
3541     CHKERRQ(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub));
3542   }
3543 
3544   CHKERRQ(ISGetLocalSize(iscol_sub,&count));
3545   aij  = (Mat_SeqAIJ*)(Msub)->data;
3546   ii   = aij->i;
3547   CHKERRQ(ISGetIndices(iscmap,&cmap));
3548 
3549   /*
3550       m - number of local rows
3551       Ncols - number of columns (same on all processors)
3552       rstart - first row in new global matrix generated
3553   */
3554   CHKERRQ(MatGetSize(Msub,&m,NULL));
3555 
3556   if (call == MAT_INITIAL_MATRIX) {
3557     /* (4) Create parallel newmat */
3558     PetscMPIInt    rank,size;
3559     PetscInt       csize;
3560 
3561     CHKERRMPI(MPI_Comm_size(comm,&size));
3562     CHKERRMPI(MPI_Comm_rank(comm,&rank));
3563 
3564     /*
3565         Determine the number of non-zeros in the diagonal and off-diagonal
3566         portions of the matrix in order to do correct preallocation
3567     */
3568 
3569     /* first get start and end of "diagonal" columns */
3570     CHKERRQ(ISGetLocalSize(iscol,&csize));
3571     if (csize == PETSC_DECIDE) {
3572       CHKERRQ(ISGetSize(isrow,&mglobal));
3573       if (mglobal == Ncols) { /* square matrix */
3574         nlocal = m;
3575       } else {
3576         nlocal = Ncols/size + ((Ncols % size) > rank);
3577       }
3578     } else {
3579       nlocal = csize;
3580     }
3581     CHKERRMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3582     rstart = rend - nlocal;
3583     PetscCheckFalse(rank == size - 1 && rend != Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols);
3584 
3585     /* next, compute all the lengths */
3586     jj    = aij->j;
3587     CHKERRQ(PetscMalloc1(2*m+1,&dlens));
3588     olens = dlens + m;
3589     for (i=0; i<m; i++) {
3590       jend = ii[i+1] - ii[i];
3591       olen = 0;
3592       dlen = 0;
3593       for (j=0; j<jend; j++) {
3594         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3595         else dlen++;
3596         jj++;
3597       }
3598       olens[i] = olen;
3599       dlens[i] = dlen;
3600     }
3601 
3602     CHKERRQ(ISGetBlockSize(isrow,&bs));
3603     CHKERRQ(ISGetBlockSize(iscol,&cbs));
3604 
3605     CHKERRQ(MatCreate(comm,&M));
3606     CHKERRQ(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols));
3607     CHKERRQ(MatSetBlockSizes(M,bs,cbs));
3608     CHKERRQ(MatSetType(M,((PetscObject)mat)->type_name));
3609     CHKERRQ(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3610     CHKERRQ(PetscFree(dlens));
3611 
3612   } else { /* call == MAT_REUSE_MATRIX */
3613     M    = *newmat;
3614     CHKERRQ(MatGetLocalSize(M,&i,NULL));
3615     PetscCheckFalse(i != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3616     CHKERRQ(MatZeroEntries(M));
3617     /*
3618          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3619        rather than the slower MatSetValues().
3620     */
3621     M->was_assembled = PETSC_TRUE;
3622     M->assembled     = PETSC_FALSE;
3623   }
3624 
3625   /* (5) Set values of Msub to *newmat */
3626   CHKERRQ(PetscMalloc1(count,&colsub));
3627   CHKERRQ(MatGetOwnershipRange(M,&rstart,NULL));
3628 
3629   jj   = aij->j;
3630   CHKERRQ(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa));
3631   for (i=0; i<m; i++) {
3632     row = rstart + i;
3633     nz  = ii[i+1] - ii[i];
3634     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3635     CHKERRQ(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES));
3636     jj += nz; aa += nz;
3637   }
3638   CHKERRQ(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa));
3639   CHKERRQ(ISRestoreIndices(iscmap,&cmap));
3640 
3641   CHKERRQ(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3642   CHKERRQ(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3643 
3644   CHKERRQ(PetscFree(colsub));
3645 
3646   /* save Msub, iscol_sub and iscmap used in processor for next request */
3647   if (call == MAT_INITIAL_MATRIX) {
3648     *newmat = M;
3649     CHKERRQ(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub));
3650     CHKERRQ(MatDestroy(&Msub));
3651 
3652     CHKERRQ(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub));
3653     CHKERRQ(ISDestroy(&iscol_sub));
3654 
3655     CHKERRQ(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap));
3656     CHKERRQ(ISDestroy(&iscmap));
3657 
3658     if (iscol_local) {
3659       CHKERRQ(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local));
3660       CHKERRQ(ISDestroy(&iscol_local));
3661     }
3662   }
3663   PetscFunctionReturn(0);
3664 }
3665 
3666 /*
3667     Not great since it makes two copies of the submatrix, first an SeqAIJ
3668   in local and then by concatenating the local matrices the end result.
3669   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3670 
3671   Note: This requires a sequential iscol with all indices.
3672 */
3673 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3674 {
3675   PetscMPIInt    rank,size;
3676   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3677   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3678   Mat            M,Mreuse;
3679   MatScalar      *aa,*vwork;
3680   MPI_Comm       comm;
3681   Mat_SeqAIJ     *aij;
3682   PetscBool      colflag,allcolumns=PETSC_FALSE;
3683 
3684   PetscFunctionBegin;
3685   CHKERRQ(PetscObjectGetComm((PetscObject)mat,&comm));
3686   CHKERRMPI(MPI_Comm_rank(comm,&rank));
3687   CHKERRMPI(MPI_Comm_size(comm,&size));
3688 
3689   /* Check for special case: each processor gets entire matrix columns */
3690   CHKERRQ(ISIdentity(iscol,&colflag));
3691   CHKERRQ(ISGetLocalSize(iscol,&n));
3692   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3693   CHKERRMPI(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3694 
3695   if (call ==  MAT_REUSE_MATRIX) {
3696     CHKERRQ(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse));
3697     PetscCheckFalse(!Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3698     CHKERRQ(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse));
3699   } else {
3700     CHKERRQ(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse));
3701   }
3702 
3703   /*
3704       m - number of local rows
3705       n - number of columns (same on all processors)
3706       rstart - first row in new global matrix generated
3707   */
3708   CHKERRQ(MatGetSize(Mreuse,&m,&n));
3709   CHKERRQ(MatGetBlockSizes(Mreuse,&bs,&cbs));
3710   if (call == MAT_INITIAL_MATRIX) {
3711     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3712     ii  = aij->i;
3713     jj  = aij->j;
3714 
3715     /*
3716         Determine the number of non-zeros in the diagonal and off-diagonal
3717         portions of the matrix in order to do correct preallocation
3718     */
3719 
3720     /* first get start and end of "diagonal" columns */
3721     if (csize == PETSC_DECIDE) {
3722       CHKERRQ(ISGetSize(isrow,&mglobal));
3723       if (mglobal == n) { /* square matrix */
3724         nlocal = m;
3725       } else {
3726         nlocal = n/size + ((n % size) > rank);
3727       }
3728     } else {
3729       nlocal = csize;
3730     }
3731     CHKERRMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3732     rstart = rend - nlocal;
3733     PetscCheckFalse(rank == size - 1 && rend != n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
3734 
3735     /* next, compute all the lengths */
3736     CHKERRQ(PetscMalloc1(2*m+1,&dlens));
3737     olens = dlens + m;
3738     for (i=0; i<m; i++) {
3739       jend = ii[i+1] - ii[i];
3740       olen = 0;
3741       dlen = 0;
3742       for (j=0; j<jend; j++) {
3743         if (*jj < rstart || *jj >= rend) olen++;
3744         else dlen++;
3745         jj++;
3746       }
3747       olens[i] = olen;
3748       dlens[i] = dlen;
3749     }
3750     CHKERRQ(MatCreate(comm,&M));
3751     CHKERRQ(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n));
3752     CHKERRQ(MatSetBlockSizes(M,bs,cbs));
3753     CHKERRQ(MatSetType(M,((PetscObject)mat)->type_name));
3754     CHKERRQ(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3755     CHKERRQ(PetscFree(dlens));
3756   } else {
3757     PetscInt ml,nl;
3758 
3759     M    = *newmat;
3760     CHKERRQ(MatGetLocalSize(M,&ml,&nl));
3761     PetscCheckFalse(ml != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3762     CHKERRQ(MatZeroEntries(M));
3763     /*
3764          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3765        rather than the slower MatSetValues().
3766     */
3767     M->was_assembled = PETSC_TRUE;
3768     M->assembled     = PETSC_FALSE;
3769   }
3770   CHKERRQ(MatGetOwnershipRange(M,&rstart,&rend));
3771   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3772   ii   = aij->i;
3773   jj   = aij->j;
3774 
3775   /* trigger copy to CPU if needed */
3776   CHKERRQ(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa));
3777   for (i=0; i<m; i++) {
3778     row   = rstart + i;
3779     nz    = ii[i+1] - ii[i];
3780     cwork = jj; jj += nz;
3781     vwork = aa; aa += nz;
3782     CHKERRQ(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES));
3783   }
3784   CHKERRQ(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa));
3785 
3786   CHKERRQ(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3787   CHKERRQ(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3788   *newmat = M;
3789 
3790   /* save submatrix used in processor for next request */
3791   if (call ==  MAT_INITIAL_MATRIX) {
3792     CHKERRQ(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse));
3793     CHKERRQ(MatDestroy(&Mreuse));
3794   }
3795   PetscFunctionReturn(0);
3796 }
3797 
3798 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3799 {
3800   PetscInt       m,cstart, cend,j,nnz,i,d;
3801   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3802   const PetscInt *JJ;
3803   PetscBool      nooffprocentries;
3804 
3805   PetscFunctionBegin;
3806   PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]);
3807 
3808   CHKERRQ(PetscLayoutSetUp(B->rmap));
3809   CHKERRQ(PetscLayoutSetUp(B->cmap));
3810   m      = B->rmap->n;
3811   cstart = B->cmap->rstart;
3812   cend   = B->cmap->rend;
3813   rstart = B->rmap->rstart;
3814 
3815   CHKERRQ(PetscCalloc2(m,&d_nnz,m,&o_nnz));
3816 
3817   if (PetscDefined(USE_DEBUG)) {
3818     for (i=0; i<m; i++) {
3819       nnz = Ii[i+1]- Ii[i];
3820       JJ  = J + Ii[i];
3821       PetscCheckFalse(nnz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz);
3822       PetscCheckFalse(nnz && (JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]);
3823       PetscCheckFalse(nnz && (JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N);
3824     }
3825   }
3826 
3827   for (i=0; i<m; i++) {
3828     nnz     = Ii[i+1]- Ii[i];
3829     JJ      = J + Ii[i];
3830     nnz_max = PetscMax(nnz_max,nnz);
3831     d       = 0;
3832     for (j=0; j<nnz; j++) {
3833       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3834     }
3835     d_nnz[i] = d;
3836     o_nnz[i] = nnz - d;
3837   }
3838   CHKERRQ(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
3839   CHKERRQ(PetscFree2(d_nnz,o_nnz));
3840 
3841   for (i=0; i<m; i++) {
3842     ii   = i + rstart;
3843     CHKERRQ(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES));
3844   }
3845   nooffprocentries    = B->nooffprocentries;
3846   B->nooffprocentries = PETSC_TRUE;
3847   CHKERRQ(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
3848   CHKERRQ(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3849   B->nooffprocentries = nooffprocentries;
3850 
3851   CHKERRQ(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3852   PetscFunctionReturn(0);
3853 }
3854 
3855 /*@
3856    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3857    (the default parallel PETSc format).
3858 
3859    Collective
3860 
3861    Input Parameters:
3862 +  B - the matrix
3863 .  i - the indices into j for the start of each local row (starts with zero)
3864 .  j - the column indices for each local row (starts with zero)
3865 -  v - optional values in the matrix
3866 
3867    Level: developer
3868 
3869    Notes:
3870        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3871      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3872      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3873 
3874        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3875 
3876        The format which is used for the sparse matrix input, is equivalent to a
3877     row-major ordering.. i.e for the following matrix, the input data expected is
3878     as shown
3879 
3880 $        1 0 0
3881 $        2 0 3     P0
3882 $       -------
3883 $        4 5 6     P1
3884 $
3885 $     Process0 [P0]: rows_owned=[0,1]
3886 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3887 $        j =  {0,0,2}  [size = 3]
3888 $        v =  {1,2,3}  [size = 3]
3889 $
3890 $     Process1 [P1]: rows_owned=[2]
3891 $        i =  {0,3}    [size = nrow+1  = 1+1]
3892 $        j =  {0,1,2}  [size = 3]
3893 $        v =  {4,5,6}  [size = 3]
3894 
3895 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3896           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3897 @*/
3898 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3899 {
3900   PetscFunctionBegin;
3901   CHKERRQ(PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v)));
3902   PetscFunctionReturn(0);
3903 }
3904 
3905 /*@C
3906    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3907    (the default parallel PETSc format).  For good matrix assembly performance
3908    the user should preallocate the matrix storage by setting the parameters
3909    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3910    performance can be increased by more than a factor of 50.
3911 
3912    Collective
3913 
3914    Input Parameters:
3915 +  B - the matrix
3916 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3917            (same value is used for all local rows)
3918 .  d_nnz - array containing the number of nonzeros in the various rows of the
3919            DIAGONAL portion of the local submatrix (possibly different for each row)
3920            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3921            The size of this array is equal to the number of local rows, i.e 'm'.
3922            For matrices that will be factored, you must leave room for (and set)
3923            the diagonal entry even if it is zero.
3924 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3925            submatrix (same value is used for all local rows).
3926 -  o_nnz - array containing the number of nonzeros in the various rows of the
3927            OFF-DIAGONAL portion of the local submatrix (possibly different for
3928            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3929            structure. The size of this array is equal to the number
3930            of local rows, i.e 'm'.
3931 
3932    If the *_nnz parameter is given then the *_nz parameter is ignored
3933 
3934    The AIJ format (also called the Yale sparse matrix format or
3935    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3936    storage.  The stored row and column indices begin with zero.
3937    See Users-Manual: ch_mat for details.
3938 
3939    The parallel matrix is partitioned such that the first m0 rows belong to
3940    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3941    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3942 
3943    The DIAGONAL portion of the local submatrix of a processor can be defined
3944    as the submatrix which is obtained by extraction the part corresponding to
3945    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3946    first row that belongs to the processor, r2 is the last row belonging to
3947    the this processor, and c1-c2 is range of indices of the local part of a
3948    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3949    common case of a square matrix, the row and column ranges are the same and
3950    the DIAGONAL part is also square. The remaining portion of the local
3951    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3952 
3953    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3954 
3955    You can call MatGetInfo() to get information on how effective the preallocation was;
3956    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3957    You can also run with the option -info and look for messages with the string
3958    malloc in them to see if additional memory allocation was needed.
3959 
3960    Example usage:
3961 
3962    Consider the following 8x8 matrix with 34 non-zero values, that is
3963    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3964    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3965    as follows:
3966 
3967 .vb
3968             1  2  0  |  0  3  0  |  0  4
3969     Proc0   0  5  6  |  7  0  0  |  8  0
3970             9  0 10  | 11  0  0  | 12  0
3971     -------------------------------------
3972            13  0 14  | 15 16 17  |  0  0
3973     Proc1   0 18  0  | 19 20 21  |  0  0
3974             0  0  0  | 22 23  0  | 24  0
3975     -------------------------------------
3976     Proc2  25 26 27  |  0  0 28  | 29  0
3977            30  0  0  | 31 32 33  |  0 34
3978 .ve
3979 
3980    This can be represented as a collection of submatrices as:
3981 
3982 .vb
3983       A B C
3984       D E F
3985       G H I
3986 .ve
3987 
3988    Where the submatrices A,B,C are owned by proc0, D,E,F are
3989    owned by proc1, G,H,I are owned by proc2.
3990 
3991    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3992    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3993    The 'M','N' parameters are 8,8, and have the same values on all procs.
3994 
3995    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3996    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3997    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3998    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3999    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4000    matrix, ans [DF] as another SeqAIJ matrix.
4001 
4002    When d_nz, o_nz parameters are specified, d_nz storage elements are
4003    allocated for every row of the local diagonal submatrix, and o_nz
4004    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4005    One way to choose d_nz and o_nz is to use the max nonzerors per local
4006    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4007    In this case, the values of d_nz,o_nz are:
4008 .vb
4009      proc0 : dnz = 2, o_nz = 2
4010      proc1 : dnz = 3, o_nz = 2
4011      proc2 : dnz = 1, o_nz = 4
4012 .ve
4013    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4014    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4015    for proc3. i.e we are using 12+15+10=37 storage locations to store
4016    34 values.
4017 
4018    When d_nnz, o_nnz parameters are specified, the storage is specified
4019    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4020    In the above case the values for d_nnz,o_nnz are:
4021 .vb
4022      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4023      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4024      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4025 .ve
4026    Here the space allocated is sum of all the above values i.e 34, and
4027    hence pre-allocation is perfect.
4028 
4029    Level: intermediate
4030 
4031 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4032           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4033 @*/
4034 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4035 {
4036   PetscFunctionBegin;
4037   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4038   PetscValidType(B,1);
4039   CHKERRQ(PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz)));
4040   PetscFunctionReturn(0);
4041 }
4042 
4043 /*@
4044      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4045          CSR format for the local rows.
4046 
4047    Collective
4048 
4049    Input Parameters:
4050 +  comm - MPI communicator
4051 .  m - number of local rows (Cannot be PETSC_DECIDE)
4052 .  n - This value should be the same as the local size used in creating the
4053        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4054        calculated if N is given) For square matrices n is almost always m.
4055 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4056 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4057 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4058 .   j - column indices
4059 -   a - matrix values
4060 
4061    Output Parameter:
4062 .   mat - the matrix
4063 
4064    Level: intermediate
4065 
4066    Notes:
4067        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4068      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4069      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4070 
4071        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4072 
4073        The format which is used for the sparse matrix input, is equivalent to a
4074     row-major ordering.. i.e for the following matrix, the input data expected is
4075     as shown
4076 
4077        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4078 
4079 $        1 0 0
4080 $        2 0 3     P0
4081 $       -------
4082 $        4 5 6     P1
4083 $
4084 $     Process0 [P0]: rows_owned=[0,1]
4085 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4086 $        j =  {0,0,2}  [size = 3]
4087 $        v =  {1,2,3}  [size = 3]
4088 $
4089 $     Process1 [P1]: rows_owned=[2]
4090 $        i =  {0,3}    [size = nrow+1  = 1+1]
4091 $        j =  {0,1,2}  [size = 3]
4092 $        v =  {4,5,6}  [size = 3]
4093 
4094 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4095           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4096 @*/
4097 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4098 {
4099   PetscFunctionBegin;
4100   PetscCheckFalse(i && i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4101   PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4102   CHKERRQ(MatCreate(comm,mat));
4103   CHKERRQ(MatSetSizes(*mat,m,n,M,N));
4104   /* CHKERRQ(MatSetBlockSizes(M,bs,cbs)); */
4105   CHKERRQ(MatSetType(*mat,MATMPIAIJ));
4106   CHKERRQ(MatMPIAIJSetPreallocationCSR(*mat,i,j,a));
4107   PetscFunctionReturn(0);
4108 }
4109 
4110 /*@
4111      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4112          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4113 
4114    Collective
4115 
4116    Input Parameters:
4117 +  mat - the matrix
4118 .  m - number of local rows (Cannot be PETSC_DECIDE)
4119 .  n - This value should be the same as the local size used in creating the
4120        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4121        calculated if N is given) For square matrices n is almost always m.
4122 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4123 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4124 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4125 .  J - column indices
4126 -  v - matrix values
4127 
4128    Level: intermediate
4129 
4130 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4131           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4132 @*/
4133 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4134 {
4135   PetscInt       cstart,nnz,i,j;
4136   PetscInt       *ld;
4137   PetscBool      nooffprocentries;
4138   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4139   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4140   PetscScalar    *ad,*ao;
4141   const PetscInt *Adi = Ad->i;
4142   PetscInt       ldi,Iii,md;
4143 
4144   PetscFunctionBegin;
4145   PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4146   PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4147   PetscCheckFalse(m != mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4148   PetscCheckFalse(n != mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4149 
4150   CHKERRQ(MatSeqAIJGetArrayWrite(Aij->A,&ad));
4151   CHKERRQ(MatSeqAIJGetArrayWrite(Aij->B,&ao));
4152   cstart = mat->cmap->rstart;
4153   if (!Aij->ld) {
4154     /* count number of entries below block diagonal */
4155     CHKERRQ(PetscCalloc1(m,&ld));
4156     Aij->ld = ld;
4157     for (i=0; i<m; i++) {
4158       nnz  = Ii[i+1]- Ii[i];
4159       j     = 0;
4160       while  (J[j] < cstart && j < nnz) {j++;}
4161       J    += nnz;
4162       ld[i] = j;
4163     }
4164   } else {
4165     ld = Aij->ld;
4166   }
4167 
4168   for (i=0; i<m; i++) {
4169     nnz  = Ii[i+1]- Ii[i];
4170     Iii  = Ii[i];
4171     ldi  = ld[i];
4172     md   = Adi[i+1]-Adi[i];
4173     CHKERRQ(PetscArraycpy(ao,v + Iii,ldi));
4174     CHKERRQ(PetscArraycpy(ad,v + Iii + ldi,md));
4175     CHKERRQ(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md));
4176     ad  += md;
4177     ao  += nnz - md;
4178   }
4179   nooffprocentries      = mat->nooffprocentries;
4180   mat->nooffprocentries = PETSC_TRUE;
4181   CHKERRQ(MatSeqAIJRestoreArrayWrite(Aij->A,&ad));
4182   CHKERRQ(MatSeqAIJRestoreArrayWrite(Aij->B,&ao));
4183   CHKERRQ(PetscObjectStateIncrease((PetscObject)Aij->A));
4184   CHKERRQ(PetscObjectStateIncrease((PetscObject)Aij->B));
4185   CHKERRQ(PetscObjectStateIncrease((PetscObject)mat));
4186   CHKERRQ(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
4187   CHKERRQ(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
4188   mat->nooffprocentries = nooffprocentries;
4189   PetscFunctionReturn(0);
4190 }
4191 
4192 /*@C
4193    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4194    (the default parallel PETSc format).  For good matrix assembly performance
4195    the user should preallocate the matrix storage by setting the parameters
4196    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4197    performance can be increased by more than a factor of 50.
4198 
4199    Collective
4200 
4201    Input Parameters:
4202 +  comm - MPI communicator
4203 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4204            This value should be the same as the local size used in creating the
4205            y vector for the matrix-vector product y = Ax.
4206 .  n - This value should be the same as the local size used in creating the
4207        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4208        calculated if N is given) For square matrices n is almost always m.
4209 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4210 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4211 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4212            (same value is used for all local rows)
4213 .  d_nnz - array containing the number of nonzeros in the various rows of the
4214            DIAGONAL portion of the local submatrix (possibly different for each row)
4215            or NULL, if d_nz is used to specify the nonzero structure.
4216            The size of this array is equal to the number of local rows, i.e 'm'.
4217 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4218            submatrix (same value is used for all local rows).
4219 -  o_nnz - array containing the number of nonzeros in the various rows of the
4220            OFF-DIAGONAL portion of the local submatrix (possibly different for
4221            each row) or NULL, if o_nz is used to specify the nonzero
4222            structure. The size of this array is equal to the number
4223            of local rows, i.e 'm'.
4224 
4225    Output Parameter:
4226 .  A - the matrix
4227 
4228    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4229    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4230    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4231 
4232    Notes:
4233    If the *_nnz parameter is given then the *_nz parameter is ignored
4234 
4235    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4236    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4237    storage requirements for this matrix.
4238 
4239    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4240    processor than it must be used on all processors that share the object for
4241    that argument.
4242 
4243    The user MUST specify either the local or global matrix dimensions
4244    (possibly both).
4245 
4246    The parallel matrix is partitioned across processors such that the
4247    first m0 rows belong to process 0, the next m1 rows belong to
4248    process 1, the next m2 rows belong to process 2 etc.. where
4249    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4250    values corresponding to [m x N] submatrix.
4251 
4252    The columns are logically partitioned with the n0 columns belonging
4253    to 0th partition, the next n1 columns belonging to the next
4254    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4255 
4256    The DIAGONAL portion of the local submatrix on any given processor
4257    is the submatrix corresponding to the rows and columns m,n
4258    corresponding to the given processor. i.e diagonal matrix on
4259    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4260    etc. The remaining portion of the local submatrix [m x (N-n)]
4261    constitute the OFF-DIAGONAL portion. The example below better
4262    illustrates this concept.
4263 
4264    For a square global matrix we define each processor's diagonal portion
4265    to be its local rows and the corresponding columns (a square submatrix);
4266    each processor's off-diagonal portion encompasses the remainder of the
4267    local matrix (a rectangular submatrix).
4268 
4269    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4270 
4271    When calling this routine with a single process communicator, a matrix of
4272    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4273    type of communicator, use the construction mechanism
4274 .vb
4275      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4276 .ve
4277 
4278 $     MatCreate(...,&A);
4279 $     MatSetType(A,MATMPIAIJ);
4280 $     MatSetSizes(A, m,n,M,N);
4281 $     MatMPIAIJSetPreallocation(A,...);
4282 
4283    By default, this format uses inodes (identical nodes) when possible.
4284    We search for consecutive rows with the same nonzero structure, thereby
4285    reusing matrix information to achieve increased efficiency.
4286 
4287    Options Database Keys:
4288 +  -mat_no_inode  - Do not use inodes
4289 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4290 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
4291         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4292         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.
4293 
4294    Example usage:
4295 
4296    Consider the following 8x8 matrix with 34 non-zero values, that is
4297    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4298    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4299    as follows
4300 
4301 .vb
4302             1  2  0  |  0  3  0  |  0  4
4303     Proc0   0  5  6  |  7  0  0  |  8  0
4304             9  0 10  | 11  0  0  | 12  0
4305     -------------------------------------
4306            13  0 14  | 15 16 17  |  0  0
4307     Proc1   0 18  0  | 19 20 21  |  0  0
4308             0  0  0  | 22 23  0  | 24  0
4309     -------------------------------------
4310     Proc2  25 26 27  |  0  0 28  | 29  0
4311            30  0  0  | 31 32 33  |  0 34
4312 .ve
4313 
4314    This can be represented as a collection of submatrices as
4315 
4316 .vb
4317       A B C
4318       D E F
4319       G H I
4320 .ve
4321 
4322    Where the submatrices A,B,C are owned by proc0, D,E,F are
4323    owned by proc1, G,H,I are owned by proc2.
4324 
4325    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4326    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4327    The 'M','N' parameters are 8,8, and have the same values on all procs.
4328 
4329    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4330    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4331    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4332    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4333    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4334    matrix, ans [DF] as another SeqAIJ matrix.
4335 
4336    When d_nz, o_nz parameters are specified, d_nz storage elements are
4337    allocated for every row of the local diagonal submatrix, and o_nz
4338    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4339    One way to choose d_nz and o_nz is to use the max nonzerors per local
4340    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4341    In this case, the values of d_nz,o_nz are
4342 .vb
4343      proc0 : dnz = 2, o_nz = 2
4344      proc1 : dnz = 3, o_nz = 2
4345      proc2 : dnz = 1, o_nz = 4
4346 .ve
4347    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4348    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4349    for proc3. i.e we are using 12+15+10=37 storage locations to store
4350    34 values.
4351 
4352    When d_nnz, o_nnz parameters are specified, the storage is specified
4353    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4354    In the above case the values for d_nnz,o_nnz are
4355 .vb
4356      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4357      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4358      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4359 .ve
4360    Here the space allocated is sum of all the above values i.e 34, and
4361    hence pre-allocation is perfect.
4362 
4363    Level: intermediate
4364 
4365 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4366           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4367 @*/
4368 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4369 {
4370   PetscMPIInt    size;
4371 
4372   PetscFunctionBegin;
4373   CHKERRQ(MatCreate(comm,A));
4374   CHKERRQ(MatSetSizes(*A,m,n,M,N));
4375   CHKERRMPI(MPI_Comm_size(comm,&size));
4376   if (size > 1) {
4377     CHKERRQ(MatSetType(*A,MATMPIAIJ));
4378     CHKERRQ(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz));
4379   } else {
4380     CHKERRQ(MatSetType(*A,MATSEQAIJ));
4381     CHKERRQ(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz));
4382   }
4383   PetscFunctionReturn(0);
4384 }
4385 
4386 /*@C
4387   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4388 
4389   Not collective
4390 
4391   Input Parameter:
4392 . A - The MPIAIJ matrix
4393 
4394   Output Parameters:
4395 + Ad - The local diagonal block as a SeqAIJ matrix
4396 . Ao - The local off-diagonal block as a SeqAIJ matrix
4397 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4398 
4399   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4400   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4401   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4402   local column numbers to global column numbers in the original matrix.
4403 
4404   Level: intermediate
4405 
4406 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4407 @*/
4408 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4409 {
4410   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4411   PetscBool      flg;
4412 
4413   PetscFunctionBegin;
4414   CHKERRQ(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg));
4415   PetscCheckFalse(!flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4416   if (Ad)     *Ad     = a->A;
4417   if (Ao)     *Ao     = a->B;
4418   if (colmap) *colmap = a->garray;
4419   PetscFunctionReturn(0);
4420 }
4421 
4422 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4423 {
4424   PetscErrorCode ierr;
4425   PetscInt       m,N,i,rstart,nnz,Ii;
4426   PetscInt       *indx;
4427   PetscScalar    *values;
4428   MatType        rootType;
4429 
4430   PetscFunctionBegin;
4431   CHKERRQ(MatGetSize(inmat,&m,&N));
4432   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4433     PetscInt       *dnz,*onz,sum,bs,cbs;
4434 
4435     if (n == PETSC_DECIDE) {
4436       CHKERRQ(PetscSplitOwnership(comm,&n,&N));
4437     }
4438     /* Check sum(n) = N */
4439     CHKERRMPI(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
4440     PetscCheckFalse(sum != N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
4441 
4442     CHKERRMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm));
4443     rstart -= m;
4444 
4445     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4446     for (i=0; i<m; i++) {
4447       CHKERRQ(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4448       CHKERRQ(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz));
4449       CHKERRQ(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4450     }
4451 
4452     CHKERRQ(MatCreate(comm,outmat));
4453     CHKERRQ(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4454     CHKERRQ(MatGetBlockSizes(inmat,&bs,&cbs));
4455     CHKERRQ(MatSetBlockSizes(*outmat,bs,cbs));
4456     CHKERRQ(MatGetRootType_Private(inmat,&rootType));
4457     CHKERRQ(MatSetType(*outmat,rootType));
4458     CHKERRQ(MatSeqAIJSetPreallocation(*outmat,0,dnz));
4459     CHKERRQ(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz));
4460     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4461     CHKERRQ(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
4462   }
4463 
4464   /* numeric phase */
4465   CHKERRQ(MatGetOwnershipRange(*outmat,&rstart,NULL));
4466   for (i=0; i<m; i++) {
4467     CHKERRQ(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4468     Ii   = i + rstart;
4469     CHKERRQ(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
4470     CHKERRQ(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4471   }
4472   CHKERRQ(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
4473   CHKERRQ(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
4474   PetscFunctionReturn(0);
4475 }
4476 
4477 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4478 {
4479   PetscMPIInt       rank;
4480   PetscInt          m,N,i,rstart,nnz;
4481   size_t            len;
4482   const PetscInt    *indx;
4483   PetscViewer       out;
4484   char              *name;
4485   Mat               B;
4486   const PetscScalar *values;
4487 
4488   PetscFunctionBegin;
4489   CHKERRQ(MatGetLocalSize(A,&m,NULL));
4490   CHKERRQ(MatGetSize(A,NULL,&N));
4491   /* Should this be the type of the diagonal block of A? */
4492   CHKERRQ(MatCreate(PETSC_COMM_SELF,&B));
4493   CHKERRQ(MatSetSizes(B,m,N,m,N));
4494   CHKERRQ(MatSetBlockSizesFromMats(B,A,A));
4495   CHKERRQ(MatSetType(B,MATSEQAIJ));
4496   CHKERRQ(MatSeqAIJSetPreallocation(B,0,NULL));
4497   CHKERRQ(MatGetOwnershipRange(A,&rstart,NULL));
4498   for (i=0; i<m; i++) {
4499     CHKERRQ(MatGetRow(A,i+rstart,&nnz,&indx,&values));
4500     CHKERRQ(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES));
4501     CHKERRQ(MatRestoreRow(A,i+rstart,&nnz,&indx,&values));
4502   }
4503   CHKERRQ(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
4504   CHKERRQ(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
4505 
4506   CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
4507   CHKERRQ(PetscStrlen(outfile,&len));
4508   CHKERRQ(PetscMalloc1(len+6,&name));
4509   CHKERRQ(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank));
4510   CHKERRQ(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out));
4511   CHKERRQ(PetscFree(name));
4512   CHKERRQ(MatView(B,out));
4513   CHKERRQ(PetscViewerDestroy(&out));
4514   CHKERRQ(MatDestroy(&B));
4515   PetscFunctionReturn(0);
4516 }
4517 
4518 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4519 {
4520   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4521 
4522   PetscFunctionBegin;
4523   if (!merge) PetscFunctionReturn(0);
4524   CHKERRQ(PetscFree(merge->id_r));
4525   CHKERRQ(PetscFree(merge->len_s));
4526   CHKERRQ(PetscFree(merge->len_r));
4527   CHKERRQ(PetscFree(merge->bi));
4528   CHKERRQ(PetscFree(merge->bj));
4529   CHKERRQ(PetscFree(merge->buf_ri[0]));
4530   CHKERRQ(PetscFree(merge->buf_ri));
4531   CHKERRQ(PetscFree(merge->buf_rj[0]));
4532   CHKERRQ(PetscFree(merge->buf_rj));
4533   CHKERRQ(PetscFree(merge->coi));
4534   CHKERRQ(PetscFree(merge->coj));
4535   CHKERRQ(PetscFree(merge->owners_co));
4536   CHKERRQ(PetscLayoutDestroy(&merge->rowmap));
4537   CHKERRQ(PetscFree(merge));
4538   PetscFunctionReturn(0);
4539 }
4540 
4541 #include <../src/mat/utils/freespace.h>
4542 #include <petscbt.h>
4543 
4544 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4545 {
4546   MPI_Comm            comm;
4547   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4548   PetscMPIInt         size,rank,taga,*len_s;
4549   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4550   PetscInt            proc,m;
4551   PetscInt            **buf_ri,**buf_rj;
4552   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4553   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4554   MPI_Request         *s_waits,*r_waits;
4555   MPI_Status          *status;
4556   const MatScalar     *aa,*a_a;
4557   MatScalar           **abuf_r,*ba_i;
4558   Mat_Merge_SeqsToMPI *merge;
4559   PetscContainer      container;
4560 
4561   PetscFunctionBegin;
4562   CHKERRQ(PetscObjectGetComm((PetscObject)mpimat,&comm));
4563   CHKERRQ(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0));
4564 
4565   CHKERRMPI(MPI_Comm_size(comm,&size));
4566   CHKERRMPI(MPI_Comm_rank(comm,&rank));
4567 
4568   CHKERRQ(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container));
4569   PetscCheckFalse(!container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4570   CHKERRQ(PetscContainerGetPointer(container,(void**)&merge));
4571   CHKERRQ(MatSeqAIJGetArrayRead(seqmat,&a_a));
4572   aa   = a_a;
4573 
4574   bi     = merge->bi;
4575   bj     = merge->bj;
4576   buf_ri = merge->buf_ri;
4577   buf_rj = merge->buf_rj;
4578 
4579   CHKERRQ(PetscMalloc1(size,&status));
4580   owners = merge->rowmap->range;
4581   len_s  = merge->len_s;
4582 
4583   /* send and recv matrix values */
4584   /*-----------------------------*/
4585   CHKERRQ(PetscObjectGetNewTag((PetscObject)mpimat,&taga));
4586   CHKERRQ(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits));
4587 
4588   CHKERRQ(PetscMalloc1(merge->nsend+1,&s_waits));
4589   for (proc=0,k=0; proc<size; proc++) {
4590     if (!len_s[proc]) continue;
4591     i    = owners[proc];
4592     CHKERRMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k));
4593     k++;
4594   }
4595 
4596   if (merge->nrecv) CHKERRMPI(MPI_Waitall(merge->nrecv,r_waits,status));
4597   if (merge->nsend) CHKERRMPI(MPI_Waitall(merge->nsend,s_waits,status));
4598   CHKERRQ(PetscFree(status));
4599 
4600   CHKERRQ(PetscFree(s_waits));
4601   CHKERRQ(PetscFree(r_waits));
4602 
4603   /* insert mat values of mpimat */
4604   /*----------------------------*/
4605   CHKERRQ(PetscMalloc1(N,&ba_i));
4606   CHKERRQ(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4607 
4608   for (k=0; k<merge->nrecv; k++) {
4609     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4610     nrows       = *(buf_ri_k[k]);
4611     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4612     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4613   }
4614 
4615   /* set values of ba */
4616   m    = merge->rowmap->n;
4617   for (i=0; i<m; i++) {
4618     arow = owners[rank] + i;
4619     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4620     bnzi = bi[i+1] - bi[i];
4621     CHKERRQ(PetscArrayzero(ba_i,bnzi));
4622 
4623     /* add local non-zero vals of this proc's seqmat into ba */
4624     anzi   = ai[arow+1] - ai[arow];
4625     aj     = a->j + ai[arow];
4626     aa     = a_a + ai[arow];
4627     nextaj = 0;
4628     for (j=0; nextaj<anzi; j++) {
4629       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4630         ba_i[j] += aa[nextaj++];
4631       }
4632     }
4633 
4634     /* add received vals into ba */
4635     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4636       /* i-th row */
4637       if (i == *nextrow[k]) {
4638         anzi   = *(nextai[k]+1) - *nextai[k];
4639         aj     = buf_rj[k] + *(nextai[k]);
4640         aa     = abuf_r[k] + *(nextai[k]);
4641         nextaj = 0;
4642         for (j=0; nextaj<anzi; j++) {
4643           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4644             ba_i[j] += aa[nextaj++];
4645           }
4646         }
4647         nextrow[k]++; nextai[k]++;
4648       }
4649     }
4650     CHKERRQ(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES));
4651   }
4652   CHKERRQ(MatSeqAIJRestoreArrayRead(seqmat,&a_a));
4653   CHKERRQ(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY));
4654   CHKERRQ(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY));
4655 
4656   CHKERRQ(PetscFree(abuf_r[0]));
4657   CHKERRQ(PetscFree(abuf_r));
4658   CHKERRQ(PetscFree(ba_i));
4659   CHKERRQ(PetscFree3(buf_ri_k,nextrow,nextai));
4660   CHKERRQ(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0));
4661   PetscFunctionReturn(0);
4662 }
4663 
4664 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4665 {
4666   PetscErrorCode      ierr;
4667   Mat                 B_mpi;
4668   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4669   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4670   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4671   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4672   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4673   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4674   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4675   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4676   MPI_Status          *status;
4677   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4678   PetscBT             lnkbt;
4679   Mat_Merge_SeqsToMPI *merge;
4680   PetscContainer      container;
4681 
4682   PetscFunctionBegin;
4683   CHKERRQ(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0));
4684 
4685   /* make sure it is a PETSc comm */
4686   CHKERRQ(PetscCommDuplicate(comm,&comm,NULL));
4687   CHKERRMPI(MPI_Comm_size(comm,&size));
4688   CHKERRMPI(MPI_Comm_rank(comm,&rank));
4689 
4690   CHKERRQ(PetscNew(&merge));
4691   CHKERRQ(PetscMalloc1(size,&status));
4692 
4693   /* determine row ownership */
4694   /*---------------------------------------------------------*/
4695   CHKERRQ(PetscLayoutCreate(comm,&merge->rowmap));
4696   CHKERRQ(PetscLayoutSetLocalSize(merge->rowmap,m));
4697   CHKERRQ(PetscLayoutSetSize(merge->rowmap,M));
4698   CHKERRQ(PetscLayoutSetBlockSize(merge->rowmap,1));
4699   CHKERRQ(PetscLayoutSetUp(merge->rowmap));
4700   CHKERRQ(PetscMalloc1(size,&len_si));
4701   CHKERRQ(PetscMalloc1(size,&merge->len_s));
4702 
4703   m      = merge->rowmap->n;
4704   owners = merge->rowmap->range;
4705 
4706   /* determine the number of messages to send, their lengths */
4707   /*---------------------------------------------------------*/
4708   len_s = merge->len_s;
4709 
4710   len          = 0; /* length of buf_si[] */
4711   merge->nsend = 0;
4712   for (proc=0; proc<size; proc++) {
4713     len_si[proc] = 0;
4714     if (proc == rank) {
4715       len_s[proc] = 0;
4716     } else {
4717       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4718       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4719     }
4720     if (len_s[proc]) {
4721       merge->nsend++;
4722       nrows = 0;
4723       for (i=owners[proc]; i<owners[proc+1]; i++) {
4724         if (ai[i+1] > ai[i]) nrows++;
4725       }
4726       len_si[proc] = 2*(nrows+1);
4727       len         += len_si[proc];
4728     }
4729   }
4730 
4731   /* determine the number and length of messages to receive for ij-structure */
4732   /*-------------------------------------------------------------------------*/
4733   CHKERRQ(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv));
4734   CHKERRQ(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri));
4735 
4736   /* post the Irecv of j-structure */
4737   /*-------------------------------*/
4738   CHKERRQ(PetscCommGetNewTag(comm,&tagj));
4739   CHKERRQ(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits));
4740 
4741   /* post the Isend of j-structure */
4742   /*--------------------------------*/
4743   CHKERRQ(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits));
4744 
4745   for (proc=0, k=0; proc<size; proc++) {
4746     if (!len_s[proc]) continue;
4747     i    = owners[proc];
4748     CHKERRMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k));
4749     k++;
4750   }
4751 
4752   /* receives and sends of j-structure are complete */
4753   /*------------------------------------------------*/
4754   if (merge->nrecv) CHKERRMPI(MPI_Waitall(merge->nrecv,rj_waits,status));
4755   if (merge->nsend) CHKERRMPI(MPI_Waitall(merge->nsend,sj_waits,status));
4756 
4757   /* send and recv i-structure */
4758   /*---------------------------*/
4759   CHKERRQ(PetscCommGetNewTag(comm,&tagi));
4760   CHKERRQ(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits));
4761 
4762   CHKERRQ(PetscMalloc1(len+1,&buf_s));
4763   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4764   for (proc=0,k=0; proc<size; proc++) {
4765     if (!len_s[proc]) continue;
4766     /* form outgoing message for i-structure:
4767          buf_si[0]:                 nrows to be sent
4768                [1:nrows]:           row index (global)
4769                [nrows+1:2*nrows+1]: i-structure index
4770     */
4771     /*-------------------------------------------*/
4772     nrows       = len_si[proc]/2 - 1;
4773     buf_si_i    = buf_si + nrows+1;
4774     buf_si[0]   = nrows;
4775     buf_si_i[0] = 0;
4776     nrows       = 0;
4777     for (i=owners[proc]; i<owners[proc+1]; i++) {
4778       anzi = ai[i+1] - ai[i];
4779       if (anzi) {
4780         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4781         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4782         nrows++;
4783       }
4784     }
4785     CHKERRMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k));
4786     k++;
4787     buf_si += len_si[proc];
4788   }
4789 
4790   if (merge->nrecv) CHKERRMPI(MPI_Waitall(merge->nrecv,ri_waits,status));
4791   if (merge->nsend) CHKERRMPI(MPI_Waitall(merge->nsend,si_waits,status));
4792 
4793   CHKERRQ(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv));
4794   for (i=0; i<merge->nrecv; i++) {
4795     CHKERRQ(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));
4796   }
4797 
4798   CHKERRQ(PetscFree(len_si));
4799   CHKERRQ(PetscFree(len_ri));
4800   CHKERRQ(PetscFree(rj_waits));
4801   CHKERRQ(PetscFree2(si_waits,sj_waits));
4802   CHKERRQ(PetscFree(ri_waits));
4803   CHKERRQ(PetscFree(buf_s));
4804   CHKERRQ(PetscFree(status));
4805 
4806   /* compute a local seq matrix in each processor */
4807   /*----------------------------------------------*/
4808   /* allocate bi array and free space for accumulating nonzero column info */
4809   CHKERRQ(PetscMalloc1(m+1,&bi));
4810   bi[0] = 0;
4811 
4812   /* create and initialize a linked list */
4813   nlnk = N+1;
4814   CHKERRQ(PetscLLCreate(N,N,nlnk,lnk,lnkbt));
4815 
4816   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4817   len  = ai[owners[rank+1]] - ai[owners[rank]];
4818   CHKERRQ(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space));
4819 
4820   current_space = free_space;
4821 
4822   /* determine symbolic info for each local row */
4823   CHKERRQ(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4824 
4825   for (k=0; k<merge->nrecv; k++) {
4826     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4827     nrows       = *buf_ri_k[k];
4828     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4829     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4830   }
4831 
4832   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4833   len  = 0;
4834   for (i=0; i<m; i++) {
4835     bnzi = 0;
4836     /* add local non-zero cols of this proc's seqmat into lnk */
4837     arow  = owners[rank] + i;
4838     anzi  = ai[arow+1] - ai[arow];
4839     aj    = a->j + ai[arow];
4840     CHKERRQ(PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt));
4841     bnzi += nlnk;
4842     /* add received col data into lnk */
4843     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4844       if (i == *nextrow[k]) { /* i-th row */
4845         anzi  = *(nextai[k]+1) - *nextai[k];
4846         aj    = buf_rj[k] + *nextai[k];
4847         CHKERRQ(PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt));
4848         bnzi += nlnk;
4849         nextrow[k]++; nextai[k]++;
4850       }
4851     }
4852     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4853 
4854     /* if free space is not available, make more free space */
4855     if (current_space->local_remaining<bnzi) {
4856       CHKERRQ(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space));
4857       nspacedouble++;
4858     }
4859     /* copy data into free space, then initialize lnk */
4860     CHKERRQ(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt));
4861     CHKERRQ(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz));
4862 
4863     current_space->array           += bnzi;
4864     current_space->local_used      += bnzi;
4865     current_space->local_remaining -= bnzi;
4866 
4867     bi[i+1] = bi[i] + bnzi;
4868   }
4869 
4870   CHKERRQ(PetscFree3(buf_ri_k,nextrow,nextai));
4871 
4872   CHKERRQ(PetscMalloc1(bi[m]+1,&bj));
4873   CHKERRQ(PetscFreeSpaceContiguous(&free_space,bj));
4874   CHKERRQ(PetscLLDestroy(lnk,lnkbt));
4875 
4876   /* create symbolic parallel matrix B_mpi */
4877   /*---------------------------------------*/
4878   CHKERRQ(MatGetBlockSizes(seqmat,&bs,&cbs));
4879   CHKERRQ(MatCreate(comm,&B_mpi));
4880   if (n==PETSC_DECIDE) {
4881     CHKERRQ(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N));
4882   } else {
4883     CHKERRQ(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4884   }
4885   CHKERRQ(MatSetBlockSizes(B_mpi,bs,cbs));
4886   CHKERRQ(MatSetType(B_mpi,MATMPIAIJ));
4887   CHKERRQ(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz));
4888   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4889   CHKERRQ(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE));
4890 
4891   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4892   B_mpi->assembled  = PETSC_FALSE;
4893   merge->bi         = bi;
4894   merge->bj         = bj;
4895   merge->buf_ri     = buf_ri;
4896   merge->buf_rj     = buf_rj;
4897   merge->coi        = NULL;
4898   merge->coj        = NULL;
4899   merge->owners_co  = NULL;
4900 
4901   CHKERRQ(PetscCommDestroy(&comm));
4902 
4903   /* attach the supporting struct to B_mpi for reuse */
4904   CHKERRQ(PetscContainerCreate(PETSC_COMM_SELF,&container));
4905   CHKERRQ(PetscContainerSetPointer(container,merge));
4906   CHKERRQ(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI));
4907   CHKERRQ(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container));
4908   CHKERRQ(PetscContainerDestroy(&container));
4909   *mpimat = B_mpi;
4910 
4911   CHKERRQ(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0));
4912   PetscFunctionReturn(0);
4913 }
4914 
4915 /*@C
4916       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4917                  matrices from each processor
4918 
4919     Collective
4920 
4921    Input Parameters:
4922 +    comm - the communicators the parallel matrix will live on
4923 .    seqmat - the input sequential matrices
4924 .    m - number of local rows (or PETSC_DECIDE)
4925 .    n - number of local columns (or PETSC_DECIDE)
4926 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4927 
4928    Output Parameter:
4929 .    mpimat - the parallel matrix generated
4930 
4931     Level: advanced
4932 
4933    Notes:
4934      The dimensions of the sequential matrix in each processor MUST be the same.
4935      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4936      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4937 @*/
4938 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4939 {
4940   PetscMPIInt    size;
4941 
4942   PetscFunctionBegin;
4943   CHKERRMPI(MPI_Comm_size(comm,&size));
4944   if (size == 1) {
4945     CHKERRQ(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
4946     if (scall == MAT_INITIAL_MATRIX) {
4947       CHKERRQ(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat));
4948     } else {
4949       CHKERRQ(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN));
4950     }
4951     CHKERRQ(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
4952     PetscFunctionReturn(0);
4953   }
4954   CHKERRQ(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
4955   if (scall == MAT_INITIAL_MATRIX) {
4956     CHKERRQ(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat));
4957   }
4958   CHKERRQ(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat));
4959   CHKERRQ(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
4960   PetscFunctionReturn(0);
4961 }
4962 
4963 /*@
4964      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4965           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4966           with MatGetSize()
4967 
4968     Not Collective
4969 
4970    Input Parameters:
4971 +    A - the matrix
4972 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4973 
4974    Output Parameter:
4975 .    A_loc - the local sequential matrix generated
4976 
4977     Level: developer
4978 
4979    Notes:
4980      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
4981      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
4982      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
4983      modify the values of the returned A_loc.
4984 
4985 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
4986 @*/
4987 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4988 {
4989   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
4990   Mat_SeqAIJ        *mat,*a,*b;
4991   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4992   const PetscScalar *aa,*ba,*aav,*bav;
4993   PetscScalar       *ca,*cam;
4994   PetscMPIInt       size;
4995   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4996   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
4997   PetscBool         match;
4998 
4999   PetscFunctionBegin;
5000   CHKERRQ(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match));
5001   PetscCheckFalse(!match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5002   CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5003   if (size == 1) {
5004     if (scall == MAT_INITIAL_MATRIX) {
5005       CHKERRQ(PetscObjectReference((PetscObject)mpimat->A));
5006       *A_loc = mpimat->A;
5007     } else if (scall == MAT_REUSE_MATRIX) {
5008       CHKERRQ(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN));
5009     }
5010     PetscFunctionReturn(0);
5011   }
5012 
5013   CHKERRQ(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5014   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5015   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5016   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5017   CHKERRQ(MatSeqAIJGetArrayRead(mpimat->A,&aav));
5018   CHKERRQ(MatSeqAIJGetArrayRead(mpimat->B,&bav));
5019   aa   = aav;
5020   ba   = bav;
5021   if (scall == MAT_INITIAL_MATRIX) {
5022     CHKERRQ(PetscMalloc1(1+am,&ci));
5023     ci[0] = 0;
5024     for (i=0; i<am; i++) {
5025       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5026     }
5027     CHKERRQ(PetscMalloc1(1+ci[am],&cj));
5028     CHKERRQ(PetscMalloc1(1+ci[am],&ca));
5029     k    = 0;
5030     for (i=0; i<am; i++) {
5031       ncols_o = bi[i+1] - bi[i];
5032       ncols_d = ai[i+1] - ai[i];
5033       /* off-diagonal portion of A */
5034       for (jo=0; jo<ncols_o; jo++) {
5035         col = cmap[*bj];
5036         if (col >= cstart) break;
5037         cj[k]   = col; bj++;
5038         ca[k++] = *ba++;
5039       }
5040       /* diagonal portion of A */
5041       for (j=0; j<ncols_d; j++) {
5042         cj[k]   = cstart + *aj++;
5043         ca[k++] = *aa++;
5044       }
5045       /* off-diagonal portion of A */
5046       for (j=jo; j<ncols_o; j++) {
5047         cj[k]   = cmap[*bj++];
5048         ca[k++] = *ba++;
5049       }
5050     }
5051     /* put together the new matrix */
5052     CHKERRQ(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc));
5053     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5054     /* Since these are PETSc arrays, change flags to free them as necessary. */
5055     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5056     mat->free_a  = PETSC_TRUE;
5057     mat->free_ij = PETSC_TRUE;
5058     mat->nonew   = 0;
5059   } else if (scall == MAT_REUSE_MATRIX) {
5060     mat  =(Mat_SeqAIJ*)(*A_loc)->data;
5061     ci   = mat->i;
5062     cj   = mat->j;
5063     CHKERRQ(MatSeqAIJGetArrayWrite(*A_loc,&cam));
5064     for (i=0; i<am; i++) {
5065       /* off-diagonal portion of A */
5066       ncols_o = bi[i+1] - bi[i];
5067       for (jo=0; jo<ncols_o; jo++) {
5068         col = cmap[*bj];
5069         if (col >= cstart) break;
5070         *cam++ = *ba++; bj++;
5071       }
5072       /* diagonal portion of A */
5073       ncols_d = ai[i+1] - ai[i];
5074       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5075       /* off-diagonal portion of A */
5076       for (j=jo; j<ncols_o; j++) {
5077         *cam++ = *ba++; bj++;
5078       }
5079     }
5080     CHKERRQ(MatSeqAIJRestoreArrayWrite(*A_loc,&cam));
5081   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5082   CHKERRQ(MatSeqAIJRestoreArrayRead(mpimat->A,&aav));
5083   CHKERRQ(MatSeqAIJRestoreArrayRead(mpimat->B,&bav));
5084   CHKERRQ(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5085   PetscFunctionReturn(0);
5086 }
5087 
5088 /*@
5089      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5090           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5091 
5092     Not Collective
5093 
5094    Input Parameters:
5095 +    A - the matrix
5096 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5097 
5098    Output Parameters:
5099 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5100 -    A_loc - the local sequential matrix generated
5101 
5102     Level: developer
5103 
5104    Notes:
5105      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5106 
5107 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()
5108 
5109 @*/
5110 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5111 {
5112   Mat            Ao,Ad;
5113   const PetscInt *cmap;
5114   PetscMPIInt    size;
5115   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5116 
5117   PetscFunctionBegin;
5118   CHKERRQ(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap));
5119   CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5120   if (size == 1) {
5121     if (scall == MAT_INITIAL_MATRIX) {
5122       CHKERRQ(PetscObjectReference((PetscObject)Ad));
5123       *A_loc = Ad;
5124     } else if (scall == MAT_REUSE_MATRIX) {
5125       CHKERRQ(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN));
5126     }
5127     if (glob) CHKERRQ(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob));
5128     PetscFunctionReturn(0);
5129   }
5130   CHKERRQ(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f));
5131   CHKERRQ(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5132   if (f) {
5133     CHKERRQ((*f)(A,scall,glob,A_loc));
5134   } else {
5135     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5136     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5137     Mat_SeqAIJ        *c;
5138     PetscInt          *ai = a->i, *aj = a->j;
5139     PetscInt          *bi = b->i, *bj = b->j;
5140     PetscInt          *ci,*cj;
5141     const PetscScalar *aa,*ba;
5142     PetscScalar       *ca;
5143     PetscInt          i,j,am,dn,on;
5144 
5145     CHKERRQ(MatGetLocalSize(Ad,&am,&dn));
5146     CHKERRQ(MatGetLocalSize(Ao,NULL,&on));
5147     CHKERRQ(MatSeqAIJGetArrayRead(Ad,&aa));
5148     CHKERRQ(MatSeqAIJGetArrayRead(Ao,&ba));
5149     if (scall == MAT_INITIAL_MATRIX) {
5150       PetscInt k;
5151       CHKERRQ(PetscMalloc1(1+am,&ci));
5152       CHKERRQ(PetscMalloc1(ai[am]+bi[am],&cj));
5153       CHKERRQ(PetscMalloc1(ai[am]+bi[am],&ca));
5154       ci[0] = 0;
5155       for (i=0,k=0; i<am; i++) {
5156         const PetscInt ncols_o = bi[i+1] - bi[i];
5157         const PetscInt ncols_d = ai[i+1] - ai[i];
5158         ci[i+1] = ci[i] + ncols_o + ncols_d;
5159         /* diagonal portion of A */
5160         for (j=0; j<ncols_d; j++,k++) {
5161           cj[k] = *aj++;
5162           ca[k] = *aa++;
5163         }
5164         /* off-diagonal portion of A */
5165         for (j=0; j<ncols_o; j++,k++) {
5166           cj[k] = dn + *bj++;
5167           ca[k] = *ba++;
5168         }
5169       }
5170       /* put together the new matrix */
5171       CHKERRQ(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc));
5172       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5173       /* Since these are PETSc arrays, change flags to free them as necessary. */
5174       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5175       c->free_a  = PETSC_TRUE;
5176       c->free_ij = PETSC_TRUE;
5177       c->nonew   = 0;
5178       CHKERRQ(MatSetType(*A_loc,((PetscObject)Ad)->type_name));
5179     } else if (scall == MAT_REUSE_MATRIX) {
5180       CHKERRQ(MatSeqAIJGetArrayWrite(*A_loc,&ca));
5181       for (i=0; i<am; i++) {
5182         const PetscInt ncols_d = ai[i+1] - ai[i];
5183         const PetscInt ncols_o = bi[i+1] - bi[i];
5184         /* diagonal portion of A */
5185         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5186         /* off-diagonal portion of A */
5187         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5188       }
5189       CHKERRQ(MatSeqAIJRestoreArrayWrite(*A_loc,&ca));
5190     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5191     CHKERRQ(MatSeqAIJRestoreArrayRead(Ad,&aa));
5192     CHKERRQ(MatSeqAIJRestoreArrayRead(Ao,&aa));
5193     if (glob) {
5194       PetscInt cst, *gidx;
5195 
5196       CHKERRQ(MatGetOwnershipRangeColumn(A,&cst,NULL));
5197       CHKERRQ(PetscMalloc1(dn+on,&gidx));
5198       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5199       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5200       CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob));
5201     }
5202   }
5203   CHKERRQ(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5204   PetscFunctionReturn(0);
5205 }
5206 
5207 /*@C
5208      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5209 
5210     Not Collective
5211 
5212    Input Parameters:
5213 +    A - the matrix
5214 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5215 -    row, col - index sets of rows and columns to extract (or NULL)
5216 
5217    Output Parameter:
5218 .    A_loc - the local sequential matrix generated
5219 
5220     Level: developer
5221 
5222 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5223 
5224 @*/
5225 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5226 {
5227   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5228   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5229   IS             isrowa,iscola;
5230   Mat            *aloc;
5231   PetscBool      match;
5232 
5233   PetscFunctionBegin;
5234   CHKERRQ(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match));
5235   PetscCheckFalse(!match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5236   CHKERRQ(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0));
5237   if (!row) {
5238     start = A->rmap->rstart; end = A->rmap->rend;
5239     CHKERRQ(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa));
5240   } else {
5241     isrowa = *row;
5242   }
5243   if (!col) {
5244     start = A->cmap->rstart;
5245     cmap  = a->garray;
5246     nzA   = a->A->cmap->n;
5247     nzB   = a->B->cmap->n;
5248     CHKERRQ(PetscMalloc1(nzA+nzB, &idx));
5249     ncols = 0;
5250     for (i=0; i<nzB; i++) {
5251       if (cmap[i] < start) idx[ncols++] = cmap[i];
5252       else break;
5253     }
5254     imark = i;
5255     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5256     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5257     CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola));
5258   } else {
5259     iscola = *col;
5260   }
5261   if (scall != MAT_INITIAL_MATRIX) {
5262     CHKERRQ(PetscMalloc1(1,&aloc));
5263     aloc[0] = *A_loc;
5264   }
5265   CHKERRQ(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc));
5266   if (!col) { /* attach global id of condensed columns */
5267     CHKERRQ(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola));
5268   }
5269   *A_loc = aloc[0];
5270   CHKERRQ(PetscFree(aloc));
5271   if (!row) {
5272     CHKERRQ(ISDestroy(&isrowa));
5273   }
5274   if (!col) {
5275     CHKERRQ(ISDestroy(&iscola));
5276   }
5277   CHKERRQ(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0));
5278   PetscFunctionReturn(0);
5279 }
5280 
5281 /*
5282  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5283  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5284  * on a global size.
5285  * */
5286 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5287 {
5288   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5289   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5290   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5291   PetscMPIInt              owner;
5292   PetscSFNode              *iremote,*oiremote;
5293   const PetscInt           *lrowindices;
5294   PetscSF                  sf,osf;
5295   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5296   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5297   MPI_Comm                 comm;
5298   ISLocalToGlobalMapping   mapping;
5299   const PetscScalar        *pd_a,*po_a;
5300 
5301   PetscFunctionBegin;
5302   CHKERRQ(PetscObjectGetComm((PetscObject)P,&comm));
5303   /* plocalsize is the number of roots
5304    * nrows is the number of leaves
5305    * */
5306   CHKERRQ(MatGetLocalSize(P,&plocalsize,NULL));
5307   CHKERRQ(ISGetLocalSize(rows,&nrows));
5308   CHKERRQ(PetscCalloc1(nrows,&iremote));
5309   CHKERRQ(ISGetIndices(rows,&lrowindices));
5310   for (i=0;i<nrows;i++) {
5311     /* Find a remote index and an owner for a row
5312      * The row could be local or remote
5313      * */
5314     owner = 0;
5315     lidx  = 0;
5316     CHKERRQ(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx));
5317     iremote[i].index = lidx;
5318     iremote[i].rank  = owner;
5319   }
5320   /* Create SF to communicate how many nonzero columns for each row */
5321   CHKERRQ(PetscSFCreate(comm,&sf));
5322   /* SF will figure out the number of nonzero colunms for each row, and their
5323    * offsets
5324    * */
5325   CHKERRQ(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5326   CHKERRQ(PetscSFSetFromOptions(sf));
5327   CHKERRQ(PetscSFSetUp(sf));
5328 
5329   CHKERRQ(PetscCalloc1(2*(plocalsize+1),&roffsets));
5330   CHKERRQ(PetscCalloc1(2*plocalsize,&nrcols));
5331   CHKERRQ(PetscCalloc1(nrows,&pnnz));
5332   roffsets[0] = 0;
5333   roffsets[1] = 0;
5334   for (i=0;i<plocalsize;i++) {
5335     /* diag */
5336     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5337     /* off diag */
5338     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5339     /* compute offsets so that we relative location for each row */
5340     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5341     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5342   }
5343   CHKERRQ(PetscCalloc1(2*nrows,&nlcols));
5344   CHKERRQ(PetscCalloc1(2*nrows,&loffsets));
5345   /* 'r' means root, and 'l' means leaf */
5346   CHKERRQ(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5347   CHKERRQ(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5348   CHKERRQ(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5349   CHKERRQ(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5350   CHKERRQ(PetscSFDestroy(&sf));
5351   CHKERRQ(PetscFree(roffsets));
5352   CHKERRQ(PetscFree(nrcols));
5353   dntotalcols = 0;
5354   ontotalcols = 0;
5355   ncol = 0;
5356   for (i=0;i<nrows;i++) {
5357     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5358     ncol = PetscMax(pnnz[i],ncol);
5359     /* diag */
5360     dntotalcols += nlcols[i*2+0];
5361     /* off diag */
5362     ontotalcols += nlcols[i*2+1];
5363   }
5364   /* We do not need to figure the right number of columns
5365    * since all the calculations will be done by going through the raw data
5366    * */
5367   CHKERRQ(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth));
5368   CHKERRQ(MatSetUp(*P_oth));
5369   CHKERRQ(PetscFree(pnnz));
5370   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5371   /* diag */
5372   CHKERRQ(PetscCalloc1(dntotalcols,&iremote));
5373   /* off diag */
5374   CHKERRQ(PetscCalloc1(ontotalcols,&oiremote));
5375   /* diag */
5376   CHKERRQ(PetscCalloc1(dntotalcols,&ilocal));
5377   /* off diag */
5378   CHKERRQ(PetscCalloc1(ontotalcols,&oilocal));
5379   dntotalcols = 0;
5380   ontotalcols = 0;
5381   ntotalcols  = 0;
5382   for (i=0;i<nrows;i++) {
5383     owner = 0;
5384     CHKERRQ(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL));
5385     /* Set iremote for diag matrix */
5386     for (j=0;j<nlcols[i*2+0];j++) {
5387       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5388       iremote[dntotalcols].rank    = owner;
5389       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5390       ilocal[dntotalcols++]        = ntotalcols++;
5391     }
5392     /* off diag */
5393     for (j=0;j<nlcols[i*2+1];j++) {
5394       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5395       oiremote[ontotalcols].rank    = owner;
5396       oilocal[ontotalcols++]        = ntotalcols++;
5397     }
5398   }
5399   CHKERRQ(ISRestoreIndices(rows,&lrowindices));
5400   CHKERRQ(PetscFree(loffsets));
5401   CHKERRQ(PetscFree(nlcols));
5402   CHKERRQ(PetscSFCreate(comm,&sf));
5403   /* P serves as roots and P_oth is leaves
5404    * Diag matrix
5405    * */
5406   CHKERRQ(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5407   CHKERRQ(PetscSFSetFromOptions(sf));
5408   CHKERRQ(PetscSFSetUp(sf));
5409 
5410   CHKERRQ(PetscSFCreate(comm,&osf));
5411   /* Off diag */
5412   CHKERRQ(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER));
5413   CHKERRQ(PetscSFSetFromOptions(osf));
5414   CHKERRQ(PetscSFSetUp(osf));
5415   CHKERRQ(MatSeqAIJGetArrayRead(p->A,&pd_a));
5416   CHKERRQ(MatSeqAIJGetArrayRead(p->B,&po_a));
5417   /* We operate on the matrix internal data for saving memory */
5418   CHKERRQ(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5419   CHKERRQ(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5420   CHKERRQ(MatGetOwnershipRangeColumn(P,&pcstart,NULL));
5421   /* Convert to global indices for diag matrix */
5422   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5423   CHKERRQ(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5424   /* We want P_oth store global indices */
5425   CHKERRQ(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping));
5426   /* Use memory scalable approach */
5427   CHKERRQ(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH));
5428   CHKERRQ(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j));
5429   CHKERRQ(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5430   CHKERRQ(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5431   /* Convert back to local indices */
5432   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5433   CHKERRQ(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5434   nout = 0;
5435   CHKERRQ(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j));
5436   PetscCheckFalse(nout != po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout);
5437   CHKERRQ(ISLocalToGlobalMappingDestroy(&mapping));
5438   /* Exchange values */
5439   CHKERRQ(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5440   CHKERRQ(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5441   CHKERRQ(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5442   CHKERRQ(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5443   /* Stop PETSc from shrinking memory */
5444   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5445   CHKERRQ(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY));
5446   CHKERRQ(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY));
5447   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5448   CHKERRQ(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf));
5449   CHKERRQ(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf));
5450   CHKERRQ(PetscSFDestroy(&sf));
5451   CHKERRQ(PetscSFDestroy(&osf));
5452   PetscFunctionReturn(0);
5453 }
5454 
5455 /*
5456  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5457  * This supports MPIAIJ and MAIJ
5458  * */
5459 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5460 {
5461   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5462   Mat_SeqAIJ            *p_oth;
5463   IS                    rows,map;
5464   PetscHMapI            hamp;
5465   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5466   MPI_Comm              comm;
5467   PetscSF               sf,osf;
5468   PetscBool             has;
5469 
5470   PetscFunctionBegin;
5471   CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm));
5472   CHKERRQ(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0));
5473   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5474    *  and then create a submatrix (that often is an overlapping matrix)
5475    * */
5476   if (reuse == MAT_INITIAL_MATRIX) {
5477     /* Use a hash table to figure out unique keys */
5478     CHKERRQ(PetscHMapICreate(&hamp));
5479     CHKERRQ(PetscHMapIResize(hamp,a->B->cmap->n));
5480     CHKERRQ(PetscCalloc1(a->B->cmap->n,&mapping));
5481     count = 0;
5482     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5483     for (i=0;i<a->B->cmap->n;i++) {
5484       key  = a->garray[i]/dof;
5485       CHKERRQ(PetscHMapIHas(hamp,key,&has));
5486       if (!has) {
5487         mapping[i] = count;
5488         CHKERRQ(PetscHMapISet(hamp,key,count++));
5489       } else {
5490         /* Current 'i' has the same value the previous step */
5491         mapping[i] = count-1;
5492       }
5493     }
5494     CHKERRQ(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map));
5495     CHKERRQ(PetscHMapIGetSize(hamp,&htsize));
5496     PetscCheckFalse(htsize!=count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count);
5497     CHKERRQ(PetscCalloc1(htsize,&rowindices));
5498     off = 0;
5499     CHKERRQ(PetscHMapIGetKeys(hamp,&off,rowindices));
5500     CHKERRQ(PetscHMapIDestroy(&hamp));
5501     CHKERRQ(PetscSortInt(htsize,rowindices));
5502     CHKERRQ(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows));
5503     /* In case, the matrix was already created but users want to recreate the matrix */
5504     CHKERRQ(MatDestroy(P_oth));
5505     CHKERRQ(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth));
5506     CHKERRQ(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map));
5507     CHKERRQ(ISDestroy(&map));
5508     CHKERRQ(ISDestroy(&rows));
5509   } else if (reuse == MAT_REUSE_MATRIX) {
5510     /* If matrix was already created, we simply update values using SF objects
5511      * that as attached to the matrix ealier.
5512      */
5513     const PetscScalar *pd_a,*po_a;
5514 
5515     CHKERRQ(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf));
5516     CHKERRQ(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf));
5517     PetscCheckFalse(!sf || !osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5518     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5519     /* Update values in place */
5520     CHKERRQ(MatSeqAIJGetArrayRead(p->A,&pd_a));
5521     CHKERRQ(MatSeqAIJGetArrayRead(p->B,&po_a));
5522     CHKERRQ(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5523     CHKERRQ(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5524     CHKERRQ(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5525     CHKERRQ(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5526     CHKERRQ(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5527     CHKERRQ(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5528   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5529   CHKERRQ(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0));
5530   PetscFunctionReturn(0);
5531 }
5532 
5533 /*@C
5534   MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5535 
5536   Collective on Mat
5537 
5538   Input Parameters:
5539 + A - the first matrix in mpiaij format
5540 . B - the second matrix in mpiaij format
5541 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5542 
5543   Output Parameters:
5544 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5545 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5546 - B_seq - the sequential matrix generated
5547 
5548   Level: developer
5549 
5550 @*/
5551 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5552 {
5553   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5554   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5555   IS             isrowb,iscolb;
5556   Mat            *bseq=NULL;
5557 
5558   PetscFunctionBegin;
5559   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5560     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5561   }
5562   CHKERRQ(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0));
5563 
5564   if (scall == MAT_INITIAL_MATRIX) {
5565     start = A->cmap->rstart;
5566     cmap  = a->garray;
5567     nzA   = a->A->cmap->n;
5568     nzB   = a->B->cmap->n;
5569     CHKERRQ(PetscMalloc1(nzA+nzB, &idx));
5570     ncols = 0;
5571     for (i=0; i<nzB; i++) {  /* row < local row index */
5572       if (cmap[i] < start) idx[ncols++] = cmap[i];
5573       else break;
5574     }
5575     imark = i;
5576     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5577     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5578     CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb));
5579     CHKERRQ(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb));
5580   } else {
5581     PetscCheckFalse(!rowb || !colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5582     isrowb  = *rowb; iscolb = *colb;
5583     CHKERRQ(PetscMalloc1(1,&bseq));
5584     bseq[0] = *B_seq;
5585   }
5586   CHKERRQ(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq));
5587   *B_seq = bseq[0];
5588   CHKERRQ(PetscFree(bseq));
5589   if (!rowb) {
5590     CHKERRQ(ISDestroy(&isrowb));
5591   } else {
5592     *rowb = isrowb;
5593   }
5594   if (!colb) {
5595     CHKERRQ(ISDestroy(&iscolb));
5596   } else {
5597     *colb = iscolb;
5598   }
5599   CHKERRQ(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0));
5600   PetscFunctionReturn(0);
5601 }
5602 
5603 /*
5604     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5605     of the OFF-DIAGONAL portion of local A
5606 
5607     Collective on Mat
5608 
5609    Input Parameters:
5610 +    A,B - the matrices in mpiaij format
5611 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5612 
5613    Output Parameter:
5614 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5615 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5616 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5617 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5618 
5619     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5620      for this matrix. This is not desirable..
5621 
5622     Level: developer
5623 
5624 */
5625 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5626 {
5627   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5628   Mat_SeqAIJ             *b_oth;
5629   VecScatter             ctx;
5630   MPI_Comm               comm;
5631   const PetscMPIInt      *rprocs,*sprocs;
5632   const PetscInt         *srow,*rstarts,*sstarts;
5633   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5634   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5635   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5636   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5637   PetscMPIInt            size,tag,rank,nreqs;
5638 
5639   PetscFunctionBegin;
5640   CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm));
5641   CHKERRMPI(MPI_Comm_size(comm,&size));
5642 
5643   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5644     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5645   }
5646   CHKERRQ(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0));
5647   CHKERRMPI(MPI_Comm_rank(comm,&rank));
5648 
5649   if (size == 1) {
5650     startsj_s = NULL;
5651     bufa_ptr  = NULL;
5652     *B_oth    = NULL;
5653     PetscFunctionReturn(0);
5654   }
5655 
5656   ctx = a->Mvctx;
5657   tag = ((PetscObject)ctx)->tag;
5658 
5659   CHKERRQ(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs));
5660   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5661   CHKERRQ(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs));
5662   CHKERRQ(PetscMPIIntCast(nsends+nrecvs,&nreqs));
5663   CHKERRQ(PetscMalloc1(nreqs,&reqs));
5664   rwaits = reqs;
5665   swaits = reqs + nrecvs;
5666 
5667   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5668   if (scall == MAT_INITIAL_MATRIX) {
5669     /* i-array */
5670     /*---------*/
5671     /*  post receives */
5672     if (nrecvs) CHKERRQ(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */
5673     for (i=0; i<nrecvs; i++) {
5674       rowlen = rvalues + rstarts[i]*rbs;
5675       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5676       CHKERRMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5677     }
5678 
5679     /* pack the outgoing message */
5680     CHKERRQ(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj));
5681 
5682     sstartsj[0] = 0;
5683     rstartsj[0] = 0;
5684     len         = 0; /* total length of j or a array to be sent */
5685     if (nsends) {
5686       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5687       CHKERRQ(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues));
5688     }
5689     for (i=0; i<nsends; i++) {
5690       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5691       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5692       for (j=0; j<nrows; j++) {
5693         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5694         for (l=0; l<sbs; l++) {
5695           CHKERRQ(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */
5696 
5697           rowlen[j*sbs+l] = ncols;
5698 
5699           len += ncols;
5700           CHKERRQ(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL));
5701         }
5702         k++;
5703       }
5704       CHKERRMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i));
5705 
5706       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5707     }
5708     /* recvs and sends of i-array are completed */
5709     if (nreqs) CHKERRMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5710     CHKERRQ(PetscFree(svalues));
5711 
5712     /* allocate buffers for sending j and a arrays */
5713     CHKERRQ(PetscMalloc1(len+1,&bufj));
5714     CHKERRQ(PetscMalloc1(len+1,&bufa));
5715 
5716     /* create i-array of B_oth */
5717     CHKERRQ(PetscMalloc1(aBn+2,&b_othi));
5718 
5719     b_othi[0] = 0;
5720     len       = 0; /* total length of j or a array to be received */
5721     k         = 0;
5722     for (i=0; i<nrecvs; i++) {
5723       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5724       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5725       for (j=0; j<nrows; j++) {
5726         b_othi[k+1] = b_othi[k] + rowlen[j];
5727         CHKERRQ(PetscIntSumError(rowlen[j],len,&len));
5728         k++;
5729       }
5730       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5731     }
5732     CHKERRQ(PetscFree(rvalues));
5733 
5734     /* allocate space for j and a arrrays of B_oth */
5735     CHKERRQ(PetscMalloc1(b_othi[aBn]+1,&b_othj));
5736     CHKERRQ(PetscMalloc1(b_othi[aBn]+1,&b_otha));
5737 
5738     /* j-array */
5739     /*---------*/
5740     /*  post receives of j-array */
5741     for (i=0; i<nrecvs; i++) {
5742       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5743       CHKERRMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5744     }
5745 
5746     /* pack the outgoing message j-array */
5747     if (nsends) k = sstarts[0];
5748     for (i=0; i<nsends; i++) {
5749       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5750       bufJ  = bufj+sstartsj[i];
5751       for (j=0; j<nrows; j++) {
5752         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5753         for (ll=0; ll<sbs; ll++) {
5754           CHKERRQ(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5755           for (l=0; l<ncols; l++) {
5756             *bufJ++ = cols[l];
5757           }
5758           CHKERRQ(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5759         }
5760       }
5761       CHKERRMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i));
5762     }
5763 
5764     /* recvs and sends of j-array are completed */
5765     if (nreqs) CHKERRMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5766   } else if (scall == MAT_REUSE_MATRIX) {
5767     sstartsj = *startsj_s;
5768     rstartsj = *startsj_r;
5769     bufa     = *bufa_ptr;
5770     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5771     CHKERRQ(MatSeqAIJGetArrayWrite(*B_oth,&b_otha));
5772   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5773 
5774   /* a-array */
5775   /*---------*/
5776   /*  post receives of a-array */
5777   for (i=0; i<nrecvs; i++) {
5778     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5779     CHKERRMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i));
5780   }
5781 
5782   /* pack the outgoing message a-array */
5783   if (nsends) k = sstarts[0];
5784   for (i=0; i<nsends; i++) {
5785     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5786     bufA  = bufa+sstartsj[i];
5787     for (j=0; j<nrows; j++) {
5788       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5789       for (ll=0; ll<sbs; ll++) {
5790         CHKERRQ(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5791         for (l=0; l<ncols; l++) {
5792           *bufA++ = vals[l];
5793         }
5794         CHKERRQ(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5795       }
5796     }
5797     CHKERRMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i));
5798   }
5799   /* recvs and sends of a-array are completed */
5800   if (nreqs) CHKERRMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5801   CHKERRQ(PetscFree(reqs));
5802 
5803   if (scall == MAT_INITIAL_MATRIX) {
5804     /* put together the new matrix */
5805     CHKERRQ(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth));
5806 
5807     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5808     /* Since these are PETSc arrays, change flags to free them as necessary. */
5809     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5810     b_oth->free_a  = PETSC_TRUE;
5811     b_oth->free_ij = PETSC_TRUE;
5812     b_oth->nonew   = 0;
5813 
5814     CHKERRQ(PetscFree(bufj));
5815     if (!startsj_s || !bufa_ptr) {
5816       CHKERRQ(PetscFree2(sstartsj,rstartsj));
5817       CHKERRQ(PetscFree(bufa_ptr));
5818     } else {
5819       *startsj_s = sstartsj;
5820       *startsj_r = rstartsj;
5821       *bufa_ptr  = bufa;
5822     }
5823   } else if (scall == MAT_REUSE_MATRIX) {
5824     CHKERRQ(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha));
5825   }
5826 
5827   CHKERRQ(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs));
5828   CHKERRQ(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs));
5829   CHKERRQ(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0));
5830   PetscFunctionReturn(0);
5831 }
5832 
5833 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5834 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5835 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5836 #if defined(PETSC_HAVE_MKL_SPARSE)
5837 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5838 #endif
5839 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5840 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5841 #if defined(PETSC_HAVE_ELEMENTAL)
5842 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5843 #endif
5844 #if defined(PETSC_HAVE_SCALAPACK)
5845 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5846 #endif
5847 #if defined(PETSC_HAVE_HYPRE)
5848 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5849 #endif
5850 #if defined(PETSC_HAVE_CUDA)
5851 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5852 #endif
5853 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5854 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5855 #endif
5856 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5857 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5858 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5859 
5860 /*
5861     Computes (B'*A')' since computing B*A directly is untenable
5862 
5863                n                       p                          p
5864         [             ]       [             ]         [                 ]
5865       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5866         [             ]       [             ]         [                 ]
5867 
5868 */
5869 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5870 {
5871   Mat            At,Bt,Ct;
5872 
5873   PetscFunctionBegin;
5874   CHKERRQ(MatTranspose(A,MAT_INITIAL_MATRIX,&At));
5875   CHKERRQ(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt));
5876   CHKERRQ(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct));
5877   CHKERRQ(MatDestroy(&At));
5878   CHKERRQ(MatDestroy(&Bt));
5879   CHKERRQ(MatTranspose(Ct,MAT_REUSE_MATRIX,&C));
5880   CHKERRQ(MatDestroy(&Ct));
5881   PetscFunctionReturn(0);
5882 }
5883 
5884 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5885 {
5886   PetscBool      cisdense;
5887 
5888   PetscFunctionBegin;
5889   PetscCheckFalse(A->cmap->n != B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n);
5890   CHKERRQ(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N));
5891   CHKERRQ(MatSetBlockSizesFromMats(C,A,B));
5892   CHKERRQ(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
5893   if (!cisdense) {
5894     CHKERRQ(MatSetType(C,((PetscObject)A)->type_name));
5895   }
5896   CHKERRQ(MatSetUp(C));
5897 
5898   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5899   PetscFunctionReturn(0);
5900 }
5901 
5902 /* ----------------------------------------------------------------*/
5903 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5904 {
5905   Mat_Product *product = C->product;
5906   Mat         A = product->A,B=product->B;
5907 
5908   PetscFunctionBegin;
5909   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5910     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5911 
5912   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5913   C->ops->productsymbolic = MatProductSymbolic_AB;
5914   PetscFunctionReturn(0);
5915 }
5916 
5917 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
5918 {
5919   Mat_Product    *product = C->product;
5920 
5921   PetscFunctionBegin;
5922   if (product->type == MATPRODUCT_AB) {
5923     CHKERRQ(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
5924   }
5925   PetscFunctionReturn(0);
5926 }
5927 
5928 /* std::upper_bound(): Given a sorted array, return index of the first element in range [first,last) whose value
5929    is greater than value, or last if there is no such element.
5930 */
5931 static inline PetscErrorCode PetscSortedIntUpperBound(PetscInt *array,PetscCount first,PetscCount last,PetscInt value,PetscCount *upper)
5932 {
5933   PetscCount  it,step,count = last - first;
5934 
5935   PetscFunctionBegin;
5936   while (count > 0) {
5937     it   = first;
5938     step = count / 2;
5939     it  += step;
5940     if (!(value < array[it])) {
5941       first  = ++it;
5942       count -= step + 1;
5943     } else count = step;
5944   }
5945   *upper = first;
5946   PetscFunctionReturn(0);
5947 }
5948 
5949 /* Merge two sets of sorted nonzero entries and return a CSR for the merged (sequential) matrix
5950 
5951   Input Parameters:
5952 
5953     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
5954     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
5955 
5956     mat: both sets' entries are on m rows, where m is the number of local rows of the matrix mat
5957 
5958     For Set1, j1[] contains column indices of the nonzeros.
5959     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
5960     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
5961     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
5962 
5963     Similar for Set2.
5964 
5965     This routine merges the two sets of nonzeros row by row and removes repeats.
5966 
5967   Output Parameters: (memories are allocated by the caller)
5968 
5969     i[],j[]: the CSR of the merged matrix, which has m rows.
5970     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
5971     imap2[]: similar to imap1[], but for Set2.
5972     Note we order nonzeros row-by-row and from left to right.
5973 */
5974 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[],
5975   const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[],
5976   PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[])
5977 {
5978   PetscInt       r,m; /* Row index of mat */
5979   PetscCount     t,t1,t2,b1,e1,b2,e2;
5980 
5981   PetscFunctionBegin;
5982   CHKERRQ(MatGetLocalSize(mat,&m,NULL));
5983   t1   = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
5984   i[0] = 0;
5985   for (r=0; r<m; r++) { /* Do row by row merging */
5986     b1   = rowBegin1[r];
5987     e1   = rowEnd1[r];
5988     b2   = rowBegin2[r];
5989     e2   = rowEnd2[r];
5990     while (b1 < e1 && b2 < e2) {
5991       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
5992         j[t]      = j1[b1];
5993         imap1[t1] = t;
5994         imap2[t2] = t;
5995         b1       += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */
5996         b2       += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */
5997         t1++; t2++; t++;
5998       } else if (j1[b1] < j2[b2]) {
5999         j[t]      = j1[b1];
6000         imap1[t1] = t;
6001         b1       += jmap1[t1+1] - jmap1[t1];
6002         t1++; t++;
6003       } else {
6004         j[t]      = j2[b2];
6005         imap2[t2] = t;
6006         b2       += jmap2[t2+1] - jmap2[t2];
6007         t2++; t++;
6008       }
6009     }
6010     /* Merge the remaining in either j1[] or j2[] */
6011     while (b1 < e1) {
6012       j[t]      = j1[b1];
6013       imap1[t1] = t;
6014       b1       += jmap1[t1+1] - jmap1[t1];
6015       t1++; t++;
6016     }
6017     while (b2 < e2) {
6018       j[t]      = j2[b2];
6019       imap2[t2] = t;
6020       b2       += jmap2[t2+1] - jmap2[t2];
6021       t2++; t++;
6022     }
6023     i[r+1] = t;
6024   }
6025   PetscFunctionReturn(0);
6026 }
6027 
6028 /* Split a set/group of local entries into two subsets: those in the diagonal block and those in the off-diagonal block
6029 
6030   Input Parameters:
6031     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6032     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6033       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6034 
6035       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6036       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6037 
6038   Output Parameters:
6039     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6040     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6041       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6042       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6043 
6044     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6045       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6046         repeats (i.e., same 'i,j' pair).
6047       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6048         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6049 
6050       Atot: number of entries belonging to the diagonal block
6051       Annz: number of unique nonzeros belonging to the diagonal block.
6052 
6053     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6054 
6055     Aperm[],Bperm[],Ajmap[],Bjmap[] are allocated by this routine with PetscMalloc4(). One has to free them with PetscFree4() in the exact order.
6056 */
6057 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[],
6058   PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[],
6059   PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_,
6060   PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_)
6061 {
6062   PetscInt          cstart,cend,rstart,rend,row,col;
6063   PetscCount        Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6064   PetscCount        Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6065   PetscCount        k,m,p,q,r,s,mid;
6066   PetscCount        *Aperm,*Bperm,*Ajmap,*Bjmap;
6067 
6068   PetscFunctionBegin;
6069   CHKERRQ(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6070   CHKERRQ(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6071   m    = rend - rstart;
6072 
6073   for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */
6074 
6075   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6076      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6077   */
6078   while (k<n) {
6079     row = i[k];
6080     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6081     for (s=k; s<n; s++) if (i[s] != row) break;
6082     for (p=k; p<s; p++) {
6083       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6084       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]);
6085     }
6086     CHKERRQ(PetscSortIntWithCountArray(s-k,j+k,perm+k));
6087     CHKERRQ(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Seperate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6088     rowBegin[row-rstart] = k;
6089     rowMid[row-rstart]   = mid;
6090     rowEnd[row-rstart]   = s;
6091 
6092     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6093     Atot += mid - k;
6094     Btot += s - mid;
6095 
6096     /* Count unique nonzeros of this diag/offdiag row */
6097     for (p=k; p<mid;) {
6098       col = j[p];
6099       do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */
6100       Annz++;
6101     }
6102 
6103     for (p=mid; p<s;) {
6104       col = j[p];
6105       do {p++;} while (p<s && j[p] == col);
6106       Bnnz++;
6107     }
6108     k = s;
6109   }
6110 
6111   /* Allocation according to Atot, Btot, Annz, Bnnz */
6112   CHKERRQ(PetscMalloc4(Atot,&Aperm,Btot,&Bperm,Annz+1,&Ajmap,Bnnz+1,&Bjmap));
6113 
6114   /* Re-scan indices and copy diag/offdiag permuation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6115   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6116   for (r=0; r<m; r++) {
6117     k     = rowBegin[r];
6118     mid   = rowMid[r];
6119     s     = rowEnd[r];
6120     CHKERRQ(PetscArraycpy(Aperm+Atot,perm+k,  mid-k));
6121     CHKERRQ(PetscArraycpy(Bperm+Btot,perm+mid,s-mid));
6122     Atot += mid - k;
6123     Btot += s - mid;
6124 
6125     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6126     for (p=k; p<mid;) {
6127       col = j[p];
6128       q   = p;
6129       do {p++;} while (p<mid && j[p] == col);
6130       Ajmap[Annz+1] = Ajmap[Annz] + (p - q);
6131       Annz++;
6132     }
6133 
6134     for (p=mid; p<s;) {
6135       col = j[p];
6136       q   = p;
6137       do {p++;} while (p<s && j[p] == col);
6138       Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q);
6139       Bnnz++;
6140     }
6141   }
6142   /* Output */
6143   *Aperm_ = Aperm;
6144   *Annz_  = Annz;
6145   *Atot_  = Atot;
6146   *Ajmap_ = Ajmap;
6147   *Bperm_ = Bperm;
6148   *Bnnz_  = Bnnz;
6149   *Btot_  = Btot;
6150   *Bjmap_ = Bjmap;
6151   PetscFunctionReturn(0);
6152 }
6153 
6154 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[])
6155 {
6156   MPI_Comm                  comm;
6157   PetscMPIInt               rank,size;
6158   PetscInt                  m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6159   PetscCount                k,p,q,rem; /* Loop variables over coo arrays */
6160   Mat_MPIAIJ                *mpiaij = (Mat_MPIAIJ*)mat->data;
6161 
6162   PetscFunctionBegin;
6163   CHKERRQ(PetscFree(mpiaij->garray));
6164   CHKERRQ(VecDestroy(&mpiaij->lvec));
6165 #if defined(PETSC_USE_CTABLE)
6166   CHKERRQ(PetscTableDestroy(&mpiaij->colmap));
6167 #else
6168   CHKERRQ(PetscFree(mpiaij->colmap));
6169 #endif
6170   CHKERRQ(VecScatterDestroy(&mpiaij->Mvctx));
6171   mat->assembled = PETSC_FALSE;
6172   mat->was_assembled = PETSC_FALSE;
6173   CHKERRQ(MatResetPreallocationCOO_MPIAIJ(mat));
6174 
6175   CHKERRQ(PetscObjectGetComm((PetscObject)mat,&comm));
6176   CHKERRMPI(MPI_Comm_size(comm,&size));
6177   CHKERRMPI(MPI_Comm_rank(comm,&rank));
6178   CHKERRQ(PetscLayoutSetUp(mat->rmap));
6179   CHKERRQ(PetscLayoutSetUp(mat->cmap));
6180   CHKERRQ(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6181   CHKERRQ(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6182   CHKERRQ(MatGetLocalSize(mat,&m,&n));
6183   CHKERRQ(MatGetSize(mat,&M,&N));
6184 
6185   /* ---------------------------------------------------------------------------*/
6186   /* Sort (i,j) by row along with a permuation array, so that the to-be-ignored */
6187   /* entries come first, then local rows, then remote rows.                     */
6188   /* ---------------------------------------------------------------------------*/
6189   PetscCount n1 = coo_n,*perm1;
6190   PetscInt   *i1,*j1; /* Copies of input COOs along with a permutation array */
6191   CHKERRQ(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1));
6192   CHKERRQ(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */
6193   CHKERRQ(PetscArraycpy(j1,coo_j,n1));
6194   for (k=0; k<n1; k++) perm1[k] = k;
6195 
6196   /* Manipulate indices so that entries with negative row or col indices will have smallest
6197      row indices, local entries will have greater but negative row indices, and remote entries
6198      will have positive row indices.
6199   */
6200   for (k=0; k<n1; k++) {
6201     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */
6202     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6203     else PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6204     else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6205   }
6206 
6207   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6208   CHKERRQ(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1));
6209   for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */
6210   CHKERRQ(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */
6211   for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/
6212 
6213   /* ---------------------------------------------------------------------------*/
6214   /*           Split local rows into diag/offdiag portions                      */
6215   /* ---------------------------------------------------------------------------*/
6216   PetscCount   *rowBegin1,*rowMid1,*rowEnd1;
6217   PetscCount   *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1;
6218   PetscCount   Annz1,Bnnz1,Atot1,Btot1;
6219 
6220   CHKERRQ(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1));
6221   CHKERRQ(PetscMalloc1(n1-rem,&Cperm1));
6222   CHKERRQ(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1));
6223 
6224   /* ---------------------------------------------------------------------------*/
6225   /*           Send remote rows to their owner                                  */
6226   /* ---------------------------------------------------------------------------*/
6227   /* Find which rows should be sent to which remote ranks*/
6228   PetscInt       nsend = 0; /* Number of MPI ranks to send data to */
6229   PetscMPIInt    *sendto; /* [nsend], storing remote ranks */
6230   PetscInt       *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6231   const PetscInt *ranges;
6232   PetscInt       maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6233 
6234   CHKERRQ(PetscLayoutGetRanges(mat->rmap,&ranges));
6235   CHKERRQ(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries));
6236   for (k=rem; k<n1;) {
6237     PetscMPIInt  owner;
6238     PetscInt     firstRow,lastRow;
6239 
6240     /* Locate a row range */
6241     firstRow = i1[k]; /* first row of this owner */
6242     CHKERRQ(PetscLayoutFindOwner(mat->rmap,firstRow,&owner));
6243     lastRow  = ranges[owner+1]-1; /* last row of this owner */
6244 
6245     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6246     CHKERRQ(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p));
6247 
6248     /* All entries in [k,p) belong to this remote owner */
6249     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6250       PetscMPIInt *sendto2;
6251       PetscInt    *nentries2;
6252       PetscInt    maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size;
6253 
6254       CHKERRQ(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2));
6255       CHKERRQ(PetscArraycpy(sendto2,sendto,maxNsend));
6256       CHKERRQ(PetscArraycpy(nentries2,nentries2,maxNsend+1));
6257       CHKERRQ(PetscFree2(sendto,nentries2));
6258       sendto      = sendto2;
6259       nentries    = nentries2;
6260       maxNsend    = maxNsend2;
6261     }
6262     sendto[nsend]   = owner;
6263     nentries[nsend] = p - k;
6264     CHKERRQ(PetscCountCast(p-k,&nentries[nsend]));
6265     nsend++;
6266     k = p;
6267   }
6268 
6269   /* Build 1st SF to know offsets on remote to send data */
6270   PetscSF     sf1;
6271   PetscInt    nroots = 1,nroots2 = 0;
6272   PetscInt    nleaves = nsend,nleaves2 = 0;
6273   PetscInt    *offsets;
6274   PetscSFNode *iremote;
6275 
6276   CHKERRQ(PetscSFCreate(comm,&sf1));
6277   CHKERRQ(PetscMalloc1(nsend,&iremote));
6278   CHKERRQ(PetscMalloc1(nsend,&offsets));
6279   for (k=0; k<nsend; k++) {
6280     iremote[k].rank  = sendto[k];
6281     iremote[k].index = 0;
6282     nleaves2        += nentries[k];
6283     PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt");
6284   }
6285   CHKERRQ(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6286   CHKERRQ(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM));
6287   CHKERRQ(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6288   CHKERRQ(PetscSFDestroy(&sf1));
6289   PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 " PetscInt_FMT " != number of remote entries " PetscCount_FMT "",nleaves2,n1-rem);
6290 
6291   /* Build 2nd SF to send remote COOs to their owner */
6292   PetscSF sf2;
6293   nroots  = nroots2;
6294   nleaves = nleaves2;
6295   CHKERRQ(PetscSFCreate(comm,&sf2));
6296   CHKERRQ(PetscSFSetFromOptions(sf2));
6297   CHKERRQ(PetscMalloc1(nleaves,&iremote));
6298   p       = 0;
6299   for (k=0; k<nsend; k++) {
6300     PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt");
6301     for (q=0; q<nentries[k]; q++,p++) {
6302       iremote[p].rank  = sendto[k];
6303       iremote[p].index = offsets[k] + q;
6304     }
6305   }
6306   CHKERRQ(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6307 
6308   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permuation which will be used to fill leafdata */
6309   CHKERRQ(PetscArraycpy(Cperm1,perm1+rem,n1-rem));
6310 
6311   /* Send the remote COOs to their owner */
6312   PetscInt   n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6313   PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6314   CHKERRQ(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2));
6315   CHKERRQ(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE));
6316   CHKERRQ(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE));
6317   CHKERRQ(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE));
6318   CHKERRQ(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE));
6319 
6320   CHKERRQ(PetscFree(offsets));
6321   CHKERRQ(PetscFree2(sendto,nentries));
6322 
6323   /* ---------------------------------------------------------------*/
6324   /* Sort received COOs by row along with the permutation array     */
6325   /* ---------------------------------------------------------------*/
6326   for (k=0; k<n2; k++) perm2[k] = k;
6327   CHKERRQ(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2));
6328 
6329   /* ---------------------------------------------------------------*/
6330   /* Split received COOs into diag/offdiag portions                 */
6331   /* ---------------------------------------------------------------*/
6332   PetscCount  *rowBegin2,*rowMid2,*rowEnd2;
6333   PetscCount  *Ajmap2,*Aperm2,*Bjmap2,*Bperm2;
6334   PetscCount  Annz2,Bnnz2,Atot2,Btot2;
6335 
6336   CHKERRQ(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2));
6337   CHKERRQ(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2));
6338 
6339   /* --------------------------------------------------------------------------*/
6340   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6341   /* --------------------------------------------------------------------------*/
6342   PetscInt   *Ai,*Bi;
6343   PetscInt   *Aj,*Bj;
6344 
6345   CHKERRQ(PetscMalloc1(m+1,&Ai));
6346   CHKERRQ(PetscMalloc1(m+1,&Bi));
6347   CHKERRQ(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6348   CHKERRQ(PetscMalloc1(Bnnz1+Bnnz2,&Bj));
6349 
6350   PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2;
6351   CHKERRQ(PetscMalloc4(Annz1,&Aimap1,Bnnz1,&Bimap1,Annz2,&Aimap2,Bnnz2,&Bimap2));
6352 
6353   CHKERRQ(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj));
6354   CHKERRQ(MatMergeEntries_Internal(mat,j1,j2,rowMid1,  rowEnd1,rowMid2,  rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj));
6355   CHKERRQ(PetscFree3(rowBegin1,rowMid1,rowEnd1));
6356   CHKERRQ(PetscFree3(rowBegin2,rowMid2,rowEnd2));
6357   CHKERRQ(PetscFree3(i1,j1,perm1));
6358   CHKERRQ(PetscFree3(i2,j2,perm2));
6359 
6360   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6361   PetscInt Annz = Ai[m];
6362   PetscInt Bnnz = Bi[m];
6363   if (Annz < Annz1 + Annz2) {
6364     PetscInt *Aj_new;
6365     CHKERRQ(PetscMalloc1(Annz,&Aj_new));
6366     CHKERRQ(PetscArraycpy(Aj_new,Aj,Annz));
6367     CHKERRQ(PetscFree(Aj));
6368     Aj   = Aj_new;
6369   }
6370 
6371   if (Bnnz < Bnnz1 + Bnnz2) {
6372     PetscInt *Bj_new;
6373     CHKERRQ(PetscMalloc1(Bnnz,&Bj_new));
6374     CHKERRQ(PetscArraycpy(Bj_new,Bj,Bnnz));
6375     CHKERRQ(PetscFree(Bj));
6376     Bj   = Bj_new;
6377   }
6378 
6379   /* --------------------------------------------------------------------------------*/
6380   /* Create new submatrices for on-process and off-process coupling                  */
6381   /* --------------------------------------------------------------------------------*/
6382   PetscScalar   *Aa,*Ba;
6383   MatType       rtype;
6384   Mat_SeqAIJ    *a,*b;
6385   CHKERRQ(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */
6386   CHKERRQ(PetscCalloc1(Bnnz,&Ba));
6387   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6388   if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;}
6389   CHKERRQ(MatDestroy(&mpiaij->A));
6390   CHKERRQ(MatDestroy(&mpiaij->B));
6391   CHKERRQ(MatGetRootType_Private(mat,&rtype));
6392   CHKERRQ(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A));
6393   CHKERRQ(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B));
6394   CHKERRQ(MatSetUpMultiply_MPIAIJ(mat));
6395 
6396   a = (Mat_SeqAIJ*)mpiaij->A->data;
6397   b = (Mat_SeqAIJ*)mpiaij->B->data;
6398   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6399   a->free_a       = b->free_a       = PETSC_TRUE;
6400   a->free_ij      = b->free_ij      = PETSC_TRUE;
6401 
6402   /* conversion must happen AFTER multiply setup */
6403   CHKERRQ(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A));
6404   CHKERRQ(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B));
6405   CHKERRQ(VecDestroy(&mpiaij->lvec));
6406   CHKERRQ(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL));
6407   CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec));
6408 
6409   mpiaij->coo_n   = coo_n;
6410   mpiaij->coo_sf  = sf2;
6411   mpiaij->sendlen = nleaves;
6412   mpiaij->recvlen = nroots;
6413 
6414   mpiaij->Annz1   = Annz1;
6415   mpiaij->Annz2   = Annz2;
6416   mpiaij->Bnnz1   = Bnnz1;
6417   mpiaij->Bnnz2   = Bnnz2;
6418 
6419   mpiaij->Atot1   = Atot1;
6420   mpiaij->Atot2   = Atot2;
6421   mpiaij->Btot1   = Btot1;
6422   mpiaij->Btot2   = Btot2;
6423 
6424   mpiaij->Aimap1  = Aimap1;
6425   mpiaij->Aimap2  = Aimap2;
6426   mpiaij->Bimap1  = Bimap1;
6427   mpiaij->Bimap2  = Bimap2;
6428 
6429   mpiaij->Ajmap1  = Ajmap1;
6430   mpiaij->Ajmap2  = Ajmap2;
6431   mpiaij->Bjmap1  = Bjmap1;
6432   mpiaij->Bjmap2  = Bjmap2;
6433 
6434   mpiaij->Aperm1  = Aperm1;
6435   mpiaij->Aperm2  = Aperm2;
6436   mpiaij->Bperm1  = Bperm1;
6437   mpiaij->Bperm2  = Bperm2;
6438 
6439   mpiaij->Cperm1  = Cperm1;
6440 
6441   /* Allocate in preallocation. If not used, it has zero cost on host */
6442   CHKERRQ(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf));
6443   PetscFunctionReturn(0);
6444 }
6445 
6446 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode)
6447 {
6448   Mat_MPIAIJ           *mpiaij = (Mat_MPIAIJ*)mat->data;
6449   Mat                  A = mpiaij->A,B = mpiaij->B;
6450   PetscCount           Annz1 = mpiaij->Annz1,Annz2 = mpiaij->Annz2,Bnnz1 = mpiaij->Bnnz1,Bnnz2 = mpiaij->Bnnz2;
6451   PetscScalar          *Aa,*Ba;
6452   PetscScalar          *sendbuf = mpiaij->sendbuf;
6453   PetscScalar          *recvbuf = mpiaij->recvbuf;
6454   const PetscCount     *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap1 = mpiaij->Aimap1,*Aimap2 = mpiaij->Aimap2;
6455   const PetscCount     *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap1 = mpiaij->Bimap1,*Bimap2 = mpiaij->Bimap2;
6456   const PetscCount     *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2;
6457   const PetscCount     *Cperm1 = mpiaij->Cperm1;
6458 
6459   PetscFunctionBegin;
6460   CHKERRQ(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */
6461   CHKERRQ(MatSeqAIJGetArray(B,&Ba));
6462   if (imode == INSERT_VALUES) {
6463     CHKERRQ(PetscMemzero(Aa,((Mat_SeqAIJ*)A->data)->nz*sizeof(PetscScalar)));
6464     CHKERRQ(PetscMemzero(Ba,((Mat_SeqAIJ*)B->data)->nz*sizeof(PetscScalar)));
6465   }
6466 
6467   /* Pack entries to be sent to remote */
6468   for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6469 
6470   /* Send remote entries to their owner and overlap the communication with local computation */
6471   CHKERRQ(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE));
6472   /* Add local entries to A and B */
6473   for (PetscCount i=0; i<Annz1; i++) {
6474     for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) Aa[Aimap1[i]] += v[Aperm1[k]];
6475   }
6476   for (PetscCount i=0; i<Bnnz1; i++) {
6477     for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) Ba[Bimap1[i]] += v[Bperm1[k]];
6478   }
6479   CHKERRQ(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE));
6480 
6481   /* Add received remote entries to A and B */
6482   for (PetscCount i=0; i<Annz2; i++) {
6483     for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6484   }
6485   for (PetscCount i=0; i<Bnnz2; i++) {
6486     for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6487   }
6488   CHKERRQ(MatSeqAIJRestoreArray(A,&Aa));
6489   CHKERRQ(MatSeqAIJRestoreArray(B,&Ba));
6490   PetscFunctionReturn(0);
6491 }
6492 
6493 /* ----------------------------------------------------------------*/
6494 
6495 /*MC
6496    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6497 
6498    Options Database Keys:
6499 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6500 
6501    Level: beginner
6502 
6503    Notes:
6504     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6505     in this case the values associated with the rows and columns one passes in are set to zero
6506     in the matrix
6507 
6508     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6509     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6510 
6511 .seealso: MatCreateAIJ()
6512 M*/
6513 
6514 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6515 {
6516   Mat_MPIAIJ     *b;
6517   PetscMPIInt    size;
6518 
6519   PetscFunctionBegin;
6520   CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
6521 
6522   CHKERRQ(PetscNewLog(B,&b));
6523   B->data       = (void*)b;
6524   CHKERRQ(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
6525   B->assembled  = PETSC_FALSE;
6526   B->insertmode = NOT_SET_VALUES;
6527   b->size       = size;
6528 
6529   CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
6530 
6531   /* build cache for off array entries formed */
6532   CHKERRQ(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
6533 
6534   b->donotstash  = PETSC_FALSE;
6535   b->colmap      = NULL;
6536   b->garray      = NULL;
6537   b->roworiented = PETSC_TRUE;
6538 
6539   /* stuff used for matrix vector multiply */
6540   b->lvec  = NULL;
6541   b->Mvctx = NULL;
6542 
6543   /* stuff for MatGetRow() */
6544   b->rowindices   = NULL;
6545   b->rowvalues    = NULL;
6546   b->getrowactive = PETSC_FALSE;
6547 
6548   /* flexible pointer used in CUSPARSE classes */
6549   b->spptr = NULL;
6550 
6551   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6552   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ));
6553   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ));
6554   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ));
6555   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ));
6556   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ));
6557   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ));
6558   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ));
6559   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM));
6560   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL));
6561 #if defined(PETSC_HAVE_CUDA)
6562   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6563 #endif
6564 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6565   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos));
6566 #endif
6567 #if defined(PETSC_HAVE_MKL_SPARSE)
6568   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL));
6569 #endif
6570   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL));
6571   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ));
6572   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ));
6573   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
6574 #if defined(PETSC_HAVE_ELEMENTAL)
6575   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental));
6576 #endif
6577 #if defined(PETSC_HAVE_SCALAPACK)
6578   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK));
6579 #endif
6580   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS));
6581   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL));
6582 #if defined(PETSC_HAVE_HYPRE)
6583   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE));
6584   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ));
6585 #endif
6586   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ));
6587   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ));
6588   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ));
6589   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ));
6590   CHKERRQ(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ));
6591   PetscFunctionReturn(0);
6592 }
6593 
6594 /*@C
6595      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6596          and "off-diagonal" part of the matrix in CSR format.
6597 
6598    Collective
6599 
6600    Input Parameters:
6601 +  comm - MPI communicator
6602 .  m - number of local rows (Cannot be PETSC_DECIDE)
6603 .  n - This value should be the same as the local size used in creating the
6604        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6605        calculated if N is given) For square matrices n is almost always m.
6606 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6607 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6608 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6609 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6610 .   a - matrix values
6611 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6612 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6613 -   oa - matrix values
6614 
6615    Output Parameter:
6616 .   mat - the matrix
6617 
6618    Level: advanced
6619 
6620    Notes:
6621        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6622        must free the arrays once the matrix has been destroyed and not before.
6623 
6624        The i and j indices are 0 based
6625 
6626        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6627 
6628        This sets local rows and cannot be used to set off-processor values.
6629 
6630        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6631        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6632        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6633        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6634        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6635        communication if it is known that only local entries will be set.
6636 
6637 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6638           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6639 @*/
6640 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6641 {
6642   Mat_MPIAIJ     *maij;
6643 
6644   PetscFunctionBegin;
6645   PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6646   PetscCheckFalse(i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6647   PetscCheckFalse(oi[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6648   CHKERRQ(MatCreate(comm,mat));
6649   CHKERRQ(MatSetSizes(*mat,m,n,M,N));
6650   CHKERRQ(MatSetType(*mat,MATMPIAIJ));
6651   maij = (Mat_MPIAIJ*) (*mat)->data;
6652 
6653   (*mat)->preallocated = PETSC_TRUE;
6654 
6655   CHKERRQ(PetscLayoutSetUp((*mat)->rmap));
6656   CHKERRQ(PetscLayoutSetUp((*mat)->cmap));
6657 
6658   CHKERRQ(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A));
6659   CHKERRQ(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B));
6660 
6661   CHKERRQ(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
6662   CHKERRQ(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
6663   CHKERRQ(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
6664   CHKERRQ(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
6665   CHKERRQ(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
6666   PetscFunctionReturn(0);
6667 }
6668 
6669 typedef struct {
6670   Mat       *mp;    /* intermediate products */
6671   PetscBool *mptmp; /* is the intermediate product temporary ? */
6672   PetscInt  cp;     /* number of intermediate products */
6673 
6674   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6675   PetscInt    *startsj_s,*startsj_r;
6676   PetscScalar *bufa;
6677   Mat         P_oth;
6678 
6679   /* may take advantage of merging product->B */
6680   Mat Bloc; /* B-local by merging diag and off-diag */
6681 
6682   /* cusparse does not have support to split between symbolic and numeric phases.
6683      When api_user is true, we don't need to update the numerical values
6684      of the temporary storage */
6685   PetscBool reusesym;
6686 
6687   /* support for COO values insertion */
6688   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6689   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6690   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6691   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6692   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6693   PetscMemType mtype;
6694 
6695   /* customization */
6696   PetscBool abmerge;
6697   PetscBool P_oth_bind;
6698 } MatMatMPIAIJBACKEND;
6699 
6700 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6701 {
6702   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6703   PetscInt            i;
6704 
6705   PetscFunctionBegin;
6706   CHKERRQ(PetscFree2(mmdata->startsj_s,mmdata->startsj_r));
6707   CHKERRQ(PetscFree(mmdata->bufa));
6708   CHKERRQ(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v));
6709   CHKERRQ(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w));
6710   CHKERRQ(MatDestroy(&mmdata->P_oth));
6711   CHKERRQ(MatDestroy(&mmdata->Bloc));
6712   CHKERRQ(PetscSFDestroy(&mmdata->sf));
6713   for (i = 0; i < mmdata->cp; i++) {
6714     CHKERRQ(MatDestroy(&mmdata->mp[i]));
6715   }
6716   CHKERRQ(PetscFree2(mmdata->mp,mmdata->mptmp));
6717   CHKERRQ(PetscFree(mmdata->own[0]));
6718   CHKERRQ(PetscFree(mmdata->own));
6719   CHKERRQ(PetscFree(mmdata->off[0]));
6720   CHKERRQ(PetscFree(mmdata->off));
6721   CHKERRQ(PetscFree(mmdata));
6722   PetscFunctionReturn(0);
6723 }
6724 
6725 /* Copy selected n entries with indices in idx[] of A to v[].
6726    If idx is NULL, copy the whole data array of A to v[]
6727  */
6728 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6729 {
6730   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6731 
6732   PetscFunctionBegin;
6733   CHKERRQ(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f));
6734   if (f) {
6735     CHKERRQ((*f)(A,n,idx,v));
6736   } else {
6737     const PetscScalar *vv;
6738 
6739     CHKERRQ(MatSeqAIJGetArrayRead(A,&vv));
6740     if (n && idx) {
6741       PetscScalar    *w = v;
6742       const PetscInt *oi = idx;
6743       PetscInt       j;
6744 
6745       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6746     } else {
6747       CHKERRQ(PetscArraycpy(v,vv,n));
6748     }
6749     CHKERRQ(MatSeqAIJRestoreArrayRead(A,&vv));
6750   }
6751   PetscFunctionReturn(0);
6752 }
6753 
6754 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6755 {
6756   MatMatMPIAIJBACKEND *mmdata;
6757   PetscInt            i,n_d,n_o;
6758 
6759   PetscFunctionBegin;
6760   MatCheckProduct(C,1);
6761   PetscCheckFalse(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6762   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6763   if (!mmdata->reusesym) { /* update temporary matrices */
6764     if (mmdata->P_oth) {
6765       CHKERRQ(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
6766     }
6767     if (mmdata->Bloc) {
6768       CHKERRQ(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc));
6769     }
6770   }
6771   mmdata->reusesym = PETSC_FALSE;
6772 
6773   for (i = 0; i < mmdata->cp; i++) {
6774     PetscCheckFalse(!mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6775     CHKERRQ((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
6776   }
6777   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6778     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6779 
6780     if (mmdata->mptmp[i]) continue;
6781     if (noff) {
6782       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6783 
6784       CHKERRQ(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o));
6785       CHKERRQ(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d));
6786       n_o += noff;
6787       n_d += nown;
6788     } else {
6789       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6790 
6791       CHKERRQ(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d));
6792       n_d += mm->nz;
6793     }
6794   }
6795   if (mmdata->hasoffproc) { /* offprocess insertion */
6796     CHKERRQ(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6797     CHKERRQ(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6798   }
6799   CHKERRQ(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES));
6800   PetscFunctionReturn(0);
6801 }
6802 
6803 /* Support for Pt * A, A * P, or Pt * A * P */
6804 #define MAX_NUMBER_INTERMEDIATE 4
6805 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6806 {
6807   Mat_Product            *product = C->product;
6808   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6809   Mat_MPIAIJ             *a,*p;
6810   MatMatMPIAIJBACKEND    *mmdata;
6811   ISLocalToGlobalMapping P_oth_l2g = NULL;
6812   IS                     glob = NULL;
6813   const char             *prefix;
6814   char                   pprefix[256];
6815   const PetscInt         *globidx,*P_oth_idx;
6816   PetscInt               i,j,cp,m,n,M,N,*coo_i,*coo_j;
6817   PetscCount             ncoo,ncoo_d,ncoo_o,ncoo_oown;
6818   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6819                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
6820                                                                                         /* a base offset; type-2: sparse with a local to global map table */
6821   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6822 
6823   MatProductType         ptype;
6824   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6825   PetscMPIInt            size;
6826   PetscErrorCode         ierr;
6827 
6828   PetscFunctionBegin;
6829   MatCheckProduct(C,1);
6830   PetscCheckFalse(product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6831   ptype = product->type;
6832   if (product->A->symmetric && ptype == MATPRODUCT_AtB) {
6833     ptype = MATPRODUCT_AB;
6834     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6835   }
6836   switch (ptype) {
6837   case MATPRODUCT_AB:
6838     A = product->A;
6839     P = product->B;
6840     m = A->rmap->n;
6841     n = P->cmap->n;
6842     M = A->rmap->N;
6843     N = P->cmap->N;
6844     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6845     break;
6846   case MATPRODUCT_AtB:
6847     P = product->A;
6848     A = product->B;
6849     m = P->cmap->n;
6850     n = A->cmap->n;
6851     M = P->cmap->N;
6852     N = A->cmap->N;
6853     hasoffproc = PETSC_TRUE;
6854     break;
6855   case MATPRODUCT_PtAP:
6856     A = product->A;
6857     P = product->B;
6858     m = P->cmap->n;
6859     n = P->cmap->n;
6860     M = P->cmap->N;
6861     N = P->cmap->N;
6862     hasoffproc = PETSC_TRUE;
6863     break;
6864   default:
6865     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6866   }
6867   CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size));
6868   if (size == 1) hasoffproc = PETSC_FALSE;
6869 
6870   /* defaults */
6871   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6872     mp[i]    = NULL;
6873     mptmp[i] = PETSC_FALSE;
6874     rmapt[i] = -1;
6875     cmapt[i] = -1;
6876     rmapa[i] = NULL;
6877     cmapa[i] = NULL;
6878   }
6879 
6880   /* customization */
6881   CHKERRQ(PetscNew(&mmdata));
6882   mmdata->reusesym = product->api_user;
6883   if (ptype == MATPRODUCT_AB) {
6884     if (product->api_user) {
6885       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
6886       CHKERRQ(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
6887       CHKERRQ(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6888       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6889     } else {
6890       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
6891       CHKERRQ(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
6892       CHKERRQ(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6893       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6894     }
6895   } else if (ptype == MATPRODUCT_PtAP) {
6896     if (product->api_user) {
6897       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
6898       CHKERRQ(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6899       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6900     } else {
6901       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
6902       CHKERRQ(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6903       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6904     }
6905   }
6906   a = (Mat_MPIAIJ*)A->data;
6907   p = (Mat_MPIAIJ*)P->data;
6908   CHKERRQ(MatSetSizes(C,m,n,M,N));
6909   CHKERRQ(PetscLayoutSetUp(C->rmap));
6910   CHKERRQ(PetscLayoutSetUp(C->cmap));
6911   CHKERRQ(MatSetType(C,((PetscObject)A)->type_name));
6912   CHKERRQ(MatGetOptionsPrefix(C,&prefix));
6913 
6914   cp   = 0;
6915   switch (ptype) {
6916   case MATPRODUCT_AB: /* A * P */
6917     CHKERRQ(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
6918 
6919     /* A_diag * P_local (merged or not) */
6920     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
6921       /* P is product->B */
6922       CHKERRQ(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
6923       CHKERRQ(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
6924       CHKERRQ(MatProductSetType(mp[cp],MATPRODUCT_AB));
6925       CHKERRQ(MatProductSetFill(mp[cp],product->fill));
6926       CHKERRQ(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
6927       CHKERRQ(MatSetOptionsPrefix(mp[cp],prefix));
6928       CHKERRQ(MatAppendOptionsPrefix(mp[cp],pprefix));
6929       mp[cp]->product->api_user = product->api_user;
6930       CHKERRQ(MatProductSetFromOptions(mp[cp]));
6931       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6932       CHKERRQ((*mp[cp]->ops->productsymbolic)(mp[cp]));
6933       CHKERRQ(ISGetIndices(glob,&globidx));
6934       rmapt[cp] = 1;
6935       cmapt[cp] = 2;
6936       cmapa[cp] = globidx;
6937       mptmp[cp] = PETSC_FALSE;
6938       cp++;
6939     } else { /* A_diag * P_diag and A_diag * P_off */
6940       CHKERRQ(MatProductCreate(a->A,p->A,NULL,&mp[cp]));
6941       CHKERRQ(MatProductSetType(mp[cp],MATPRODUCT_AB));
6942       CHKERRQ(MatProductSetFill(mp[cp],product->fill));
6943       CHKERRQ(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
6944       CHKERRQ(MatSetOptionsPrefix(mp[cp],prefix));
6945       CHKERRQ(MatAppendOptionsPrefix(mp[cp],pprefix));
6946       mp[cp]->product->api_user = product->api_user;
6947       CHKERRQ(MatProductSetFromOptions(mp[cp]));
6948       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6949       CHKERRQ((*mp[cp]->ops->productsymbolic)(mp[cp]));
6950       rmapt[cp] = 1;
6951       cmapt[cp] = 1;
6952       mptmp[cp] = PETSC_FALSE;
6953       cp++;
6954       CHKERRQ(MatProductCreate(a->A,p->B,NULL,&mp[cp]));
6955       CHKERRQ(MatProductSetType(mp[cp],MATPRODUCT_AB));
6956       CHKERRQ(MatProductSetFill(mp[cp],product->fill));
6957       CHKERRQ(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
6958       CHKERRQ(MatSetOptionsPrefix(mp[cp],prefix));
6959       CHKERRQ(MatAppendOptionsPrefix(mp[cp],pprefix));
6960       mp[cp]->product->api_user = product->api_user;
6961       CHKERRQ(MatProductSetFromOptions(mp[cp]));
6962       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6963       CHKERRQ((*mp[cp]->ops->productsymbolic)(mp[cp]));
6964       rmapt[cp] = 1;
6965       cmapt[cp] = 2;
6966       cmapa[cp] = p->garray;
6967       mptmp[cp] = PETSC_FALSE;
6968       cp++;
6969     }
6970 
6971     /* A_off * P_other */
6972     if (mmdata->P_oth) {
6973       CHKERRQ(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */
6974       CHKERRQ(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
6975       CHKERRQ(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
6976       CHKERRQ(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
6977       CHKERRQ(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
6978       CHKERRQ(MatProductSetType(mp[cp],MATPRODUCT_AB));
6979       CHKERRQ(MatProductSetFill(mp[cp],product->fill));
6980       CHKERRQ(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
6981       CHKERRQ(MatSetOptionsPrefix(mp[cp],prefix));
6982       CHKERRQ(MatAppendOptionsPrefix(mp[cp],pprefix));
6983       mp[cp]->product->api_user = product->api_user;
6984       CHKERRQ(MatProductSetFromOptions(mp[cp]));
6985       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6986       CHKERRQ((*mp[cp]->ops->productsymbolic)(mp[cp]));
6987       rmapt[cp] = 1;
6988       cmapt[cp] = 2;
6989       cmapa[cp] = P_oth_idx;
6990       mptmp[cp] = PETSC_FALSE;
6991       cp++;
6992     }
6993     break;
6994 
6995   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
6996     /* A is product->B */
6997     CHKERRQ(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
6998     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
6999       CHKERRQ(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]));
7000       CHKERRQ(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7001       CHKERRQ(MatProductSetFill(mp[cp],product->fill));
7002       CHKERRQ(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7003       CHKERRQ(MatSetOptionsPrefix(mp[cp],prefix));
7004       CHKERRQ(MatAppendOptionsPrefix(mp[cp],pprefix));
7005       mp[cp]->product->api_user = product->api_user;
7006       CHKERRQ(MatProductSetFromOptions(mp[cp]));
7007       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7008       CHKERRQ((*mp[cp]->ops->productsymbolic)(mp[cp]));
7009       CHKERRQ(ISGetIndices(glob,&globidx));
7010       rmapt[cp] = 2;
7011       rmapa[cp] = globidx;
7012       cmapt[cp] = 2;
7013       cmapa[cp] = globidx;
7014       mptmp[cp] = PETSC_FALSE;
7015       cp++;
7016     } else {
7017       CHKERRQ(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]));
7018       CHKERRQ(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7019       CHKERRQ(MatProductSetFill(mp[cp],product->fill));
7020       CHKERRQ(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7021       CHKERRQ(MatSetOptionsPrefix(mp[cp],prefix));
7022       CHKERRQ(MatAppendOptionsPrefix(mp[cp],pprefix));
7023       mp[cp]->product->api_user = product->api_user;
7024       CHKERRQ(MatProductSetFromOptions(mp[cp]));
7025       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7026       CHKERRQ((*mp[cp]->ops->productsymbolic)(mp[cp]));
7027       CHKERRQ(ISGetIndices(glob,&globidx));
7028       rmapt[cp] = 1;
7029       cmapt[cp] = 2;
7030       cmapa[cp] = globidx;
7031       mptmp[cp] = PETSC_FALSE;
7032       cp++;
7033       CHKERRQ(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]));
7034       CHKERRQ(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7035       CHKERRQ(MatProductSetFill(mp[cp],product->fill));
7036       CHKERRQ(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7037       CHKERRQ(MatSetOptionsPrefix(mp[cp],prefix));
7038       CHKERRQ(MatAppendOptionsPrefix(mp[cp],pprefix));
7039       mp[cp]->product->api_user = product->api_user;
7040       CHKERRQ(MatProductSetFromOptions(mp[cp]));
7041       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7042       CHKERRQ((*mp[cp]->ops->productsymbolic)(mp[cp]));
7043       rmapt[cp] = 2;
7044       rmapa[cp] = p->garray;
7045       cmapt[cp] = 2;
7046       cmapa[cp] = globidx;
7047       mptmp[cp] = PETSC_FALSE;
7048       cp++;
7049     }
7050     break;
7051   case MATPRODUCT_PtAP:
7052     CHKERRQ(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
7053     /* P is product->B */
7054     CHKERRQ(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7055     CHKERRQ(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
7056     CHKERRQ(MatProductSetType(mp[cp],MATPRODUCT_PtAP));
7057     CHKERRQ(MatProductSetFill(mp[cp],product->fill));
7058     CHKERRQ(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7059     CHKERRQ(MatSetOptionsPrefix(mp[cp],prefix));
7060     CHKERRQ(MatAppendOptionsPrefix(mp[cp],pprefix));
7061     mp[cp]->product->api_user = product->api_user;
7062     CHKERRQ(MatProductSetFromOptions(mp[cp]));
7063     PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7064     CHKERRQ((*mp[cp]->ops->productsymbolic)(mp[cp]));
7065     CHKERRQ(ISGetIndices(glob,&globidx));
7066     rmapt[cp] = 2;
7067     rmapa[cp] = globidx;
7068     cmapt[cp] = 2;
7069     cmapa[cp] = globidx;
7070     mptmp[cp] = PETSC_FALSE;
7071     cp++;
7072     if (mmdata->P_oth) {
7073       CHKERRQ(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g));
7074       CHKERRQ(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7075       CHKERRQ(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7076       CHKERRQ(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7077       CHKERRQ(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7078       CHKERRQ(MatProductSetType(mp[cp],MATPRODUCT_AB));
7079       CHKERRQ(MatProductSetFill(mp[cp],product->fill));
7080       CHKERRQ(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7081       CHKERRQ(MatSetOptionsPrefix(mp[cp],prefix));
7082       CHKERRQ(MatAppendOptionsPrefix(mp[cp],pprefix));
7083       mp[cp]->product->api_user = product->api_user;
7084       CHKERRQ(MatProductSetFromOptions(mp[cp]));
7085       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7086       CHKERRQ((*mp[cp]->ops->productsymbolic)(mp[cp]));
7087       mptmp[cp] = PETSC_TRUE;
7088       cp++;
7089       CHKERRQ(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]));
7090       CHKERRQ(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7091       CHKERRQ(MatProductSetFill(mp[cp],product->fill));
7092       CHKERRQ(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7093       CHKERRQ(MatSetOptionsPrefix(mp[cp],prefix));
7094       CHKERRQ(MatAppendOptionsPrefix(mp[cp],pprefix));
7095       mp[cp]->product->api_user = product->api_user;
7096       CHKERRQ(MatProductSetFromOptions(mp[cp]));
7097       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7098       CHKERRQ((*mp[cp]->ops->productsymbolic)(mp[cp]));
7099       rmapt[cp] = 2;
7100       rmapa[cp] = globidx;
7101       cmapt[cp] = 2;
7102       cmapa[cp] = P_oth_idx;
7103       mptmp[cp] = PETSC_FALSE;
7104       cp++;
7105     }
7106     break;
7107   default:
7108     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
7109   }
7110   /* sanity check */
7111   if (size > 1) for (i = 0; i < cp; i++) PetscCheckFalse(rmapt[i] == 2 && !hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i);
7112 
7113   CHKERRQ(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp));
7114   for (i = 0; i < cp; i++) {
7115     mmdata->mp[i]    = mp[i];
7116     mmdata->mptmp[i] = mptmp[i];
7117   }
7118   mmdata->cp = cp;
7119   C->product->data       = mmdata;
7120   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7121   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7122 
7123   /* memory type */
7124   mmdata->mtype = PETSC_MEMTYPE_HOST;
7125   CHKERRQ(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,""));
7126   CHKERRQ(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,""));
7127   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7128   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7129 
7130   /* prepare coo coordinates for values insertion */
7131 
7132   /* count total nonzeros of those intermediate seqaij Mats
7133     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7134     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7135     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7136   */
7137   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7138     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7139     if (mptmp[cp]) continue;
7140     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7141       const PetscInt *rmap = rmapa[cp];
7142       const PetscInt mr = mp[cp]->rmap->n;
7143       const PetscInt rs = C->rmap->rstart;
7144       const PetscInt re = C->rmap->rend;
7145       const PetscInt *ii  = mm->i;
7146       for (i = 0; i < mr; i++) {
7147         const PetscInt gr = rmap[i];
7148         const PetscInt nz = ii[i+1] - ii[i];
7149         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7150         else ncoo_oown += nz; /* this row is local */
7151       }
7152     } else ncoo_d += mm->nz;
7153   }
7154 
7155   /*
7156     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7157 
7158     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7159 
7160     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
7161 
7162     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7163     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7164     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7165 
7166     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7167     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7168   */
7169   CHKERRQ(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */
7170   CHKERRQ(PetscCalloc1(mmdata->cp+1,&mmdata->own));
7171 
7172   /* gather (i,j) of nonzeros inserted by remote procs */
7173   if (hasoffproc) {
7174     PetscSF  msf;
7175     PetscInt ncoo2,*coo_i2,*coo_j2;
7176 
7177     CHKERRQ(PetscMalloc1(ncoo_o,&mmdata->off[0]));
7178     CHKERRQ(PetscMalloc1(ncoo_oown,&mmdata->own[0]));
7179     CHKERRQ(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */
7180 
7181     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7182       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7183       PetscInt   *idxoff = mmdata->off[cp];
7184       PetscInt   *idxown = mmdata->own[cp];
7185       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7186         const PetscInt *rmap = rmapa[cp];
7187         const PetscInt *cmap = cmapa[cp];
7188         const PetscInt *ii  = mm->i;
7189         PetscInt       *coi = coo_i + ncoo_o;
7190         PetscInt       *coj = coo_j + ncoo_o;
7191         const PetscInt mr = mp[cp]->rmap->n;
7192         const PetscInt rs = C->rmap->rstart;
7193         const PetscInt re = C->rmap->rend;
7194         const PetscInt cs = C->cmap->rstart;
7195         for (i = 0; i < mr; i++) {
7196           const PetscInt *jj = mm->j + ii[i];
7197           const PetscInt gr  = rmap[i];
7198           const PetscInt nz  = ii[i+1] - ii[i];
7199           if (gr < rs || gr >= re) { /* this is an offproc row */
7200             for (j = ii[i]; j < ii[i+1]; j++) {
7201               *coi++ = gr;
7202               *idxoff++ = j;
7203             }
7204             if (!cmapt[cp]) { /* already global */
7205               for (j = 0; j < nz; j++) *coj++ = jj[j];
7206             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7207               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7208             } else { /* offdiag */
7209               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7210             }
7211             ncoo_o += nz;
7212           } else { /* this is a local row */
7213             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
7214           }
7215         }
7216       }
7217       mmdata->off[cp + 1] = idxoff;
7218       mmdata->own[cp + 1] = idxown;
7219     }
7220 
7221     CHKERRQ(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7222     CHKERRQ(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i));
7223     CHKERRQ(PetscSFGetMultiSF(mmdata->sf,&msf));
7224     CHKERRQ(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL));
7225     ncoo = ncoo_d + ncoo_oown + ncoo2;
7226     CHKERRQ(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2));
7227     CHKERRQ(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7228     CHKERRQ(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown));
7229     CHKERRQ(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7230     CHKERRQ(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7231     CHKERRQ(PetscFree2(coo_i,coo_j));
7232     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7233     CHKERRQ(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w));
7234     coo_i = coo_i2;
7235     coo_j = coo_j2;
7236   } else { /* no offproc values insertion */
7237     ncoo = ncoo_d;
7238     CHKERRQ(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j));
7239 
7240     CHKERRQ(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7241     CHKERRQ(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER));
7242     CHKERRQ(PetscSFSetUp(mmdata->sf));
7243   }
7244   mmdata->hasoffproc = hasoffproc;
7245 
7246   /* gather (i,j) of nonzeros inserted locally */
7247   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7248     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
7249     PetscInt       *coi = coo_i + ncoo_d;
7250     PetscInt       *coj = coo_j + ncoo_d;
7251     const PetscInt *jj  = mm->j;
7252     const PetscInt *ii  = mm->i;
7253     const PetscInt *cmap = cmapa[cp];
7254     const PetscInt *rmap = rmapa[cp];
7255     const PetscInt mr = mp[cp]->rmap->n;
7256     const PetscInt rs = C->rmap->rstart;
7257     const PetscInt re = C->rmap->rend;
7258     const PetscInt cs = C->cmap->rstart;
7259 
7260     if (mptmp[cp]) continue;
7261     if (rmapt[cp] == 1) { /* consecutive rows */
7262       /* fill coo_i */
7263       for (i = 0; i < mr; i++) {
7264         const PetscInt gr = i + rs;
7265         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
7266       }
7267       /* fill coo_j */
7268       if (!cmapt[cp]) { /* type-0, already global */
7269         CHKERRQ(PetscArraycpy(coj,jj,mm->nz));
7270       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
7271         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7272       } else { /* type-2, local to global for sparse columns */
7273         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7274       }
7275       ncoo_d += mm->nz;
7276     } else if (rmapt[cp] == 2) { /* sparse rows */
7277       for (i = 0; i < mr; i++) {
7278         const PetscInt *jj = mm->j + ii[i];
7279         const PetscInt gr  = rmap[i];
7280         const PetscInt nz  = ii[i+1] - ii[i];
7281         if (gr >= rs && gr < re) { /* local rows */
7282           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
7283           if (!cmapt[cp]) { /* type-0, already global */
7284             for (j = 0; j < nz; j++) *coj++ = jj[j];
7285           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7286             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7287           } else { /* type-2, local to global for sparse columns */
7288             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7289           }
7290           ncoo_d += nz;
7291         }
7292       }
7293     }
7294   }
7295   if (glob) {
7296     CHKERRQ(ISRestoreIndices(glob,&globidx));
7297   }
7298   CHKERRQ(ISDestroy(&glob));
7299   if (P_oth_l2g) {
7300     CHKERRQ(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx));
7301   }
7302   CHKERRQ(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7303   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7304   CHKERRQ(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v));
7305 
7306   /* preallocate with COO data */
7307   CHKERRQ(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j));
7308   CHKERRQ(PetscFree2(coo_i,coo_j));
7309   PetscFunctionReturn(0);
7310 }
7311 
7312 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7313 {
7314   Mat_Product *product = mat->product;
7315 #if defined(PETSC_HAVE_DEVICE)
7316   PetscBool    match   = PETSC_FALSE;
7317   PetscBool    usecpu  = PETSC_FALSE;
7318 #else
7319   PetscBool    match   = PETSC_TRUE;
7320 #endif
7321 
7322   PetscFunctionBegin;
7323   MatCheckProduct(mat,1);
7324 #if defined(PETSC_HAVE_DEVICE)
7325   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7326     CHKERRQ(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match));
7327   }
7328   if (match) { /* we can always fallback to the CPU if requested */
7329     PetscErrorCode ierr;
7330     switch (product->type) {
7331     case MATPRODUCT_AB:
7332       if (product->api_user) {
7333         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
7334         CHKERRQ(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7335         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7336       } else {
7337         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
7338         CHKERRQ(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7339         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7340       }
7341       break;
7342     case MATPRODUCT_AtB:
7343       if (product->api_user) {
7344         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr);
7345         CHKERRQ(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7346         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7347       } else {
7348         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr);
7349         CHKERRQ(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7350         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7351       }
7352       break;
7353     case MATPRODUCT_PtAP:
7354       if (product->api_user) {
7355         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
7356         CHKERRQ(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7357         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7358       } else {
7359         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
7360         CHKERRQ(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7361         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7362       }
7363       break;
7364     default:
7365       break;
7366     }
7367     match = (PetscBool)!usecpu;
7368   }
7369 #endif
7370   if (match) {
7371     switch (product->type) {
7372     case MATPRODUCT_AB:
7373     case MATPRODUCT_AtB:
7374     case MATPRODUCT_PtAP:
7375       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7376       break;
7377     default:
7378       break;
7379     }
7380   }
7381   /* fallback to MPIAIJ ops */
7382   if (!mat->ops->productsymbolic) CHKERRQ(MatProductSetFromOptions_MPIAIJ(mat));
7383   PetscFunctionReturn(0);
7384 }
7385 
7386 /*
7387     Special version for direct calls from Fortran
7388 */
7389 #include <petsc/private/fortranimpl.h>
7390 
7391 /* Change these macros so can be used in void function */
7392 /* Identical to CHKERRV, except it assigns to *_ierr */
7393 #undef CHKERRQ
7394 #define CHKERRQ(...) do {                                                                      \
7395     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__;                                              \
7396     if (PetscUnlikely(ierr_msv_mpiaij)) {                                                      \
7397       *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \
7398       return;                                                                                  \
7399     }                                                                                          \
7400   } while (0)
7401 
7402 #undef SETERRQ
7403 #define SETERRQ(comm,ierr,...) do {                                                            \
7404     *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \
7405     return;                                                                                    \
7406   } while (0)
7407 
7408 #if defined(PETSC_HAVE_FORTRAN_CAPS)
7409 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
7410 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
7411 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
7412 #else
7413 #endif
7414 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
7415 {
7416   Mat          mat  = *mmat;
7417   PetscInt     m    = *mm, n = *mn;
7418   InsertMode   addv = *maddv;
7419   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ*)mat->data;
7420   PetscScalar  value;
7421 
7422   MatCheckPreallocated(mat,1);
7423   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
7424   else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
7425   {
7426     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
7427     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
7428     PetscBool roworiented = aij->roworiented;
7429 
7430     /* Some Variables required in the macro */
7431     Mat        A                    = aij->A;
7432     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
7433     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
7434     MatScalar  *aa;
7435     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
7436     Mat        B                    = aij->B;
7437     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
7438     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
7439     MatScalar  *ba;
7440     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
7441      * cannot use "#if defined" inside a macro. */
7442     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
7443 
7444     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
7445     PetscInt  nonew = a->nonew;
7446     MatScalar *ap1,*ap2;
7447 
7448     PetscFunctionBegin;
7449     CHKERRQ(MatSeqAIJGetArray(A,&aa));
7450     CHKERRQ(MatSeqAIJGetArray(B,&ba));
7451     for (i=0; i<m; i++) {
7452       if (im[i] < 0) continue;
7453       PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
7454       if (im[i] >= rstart && im[i] < rend) {
7455         row      = im[i] - rstart;
7456         lastcol1 = -1;
7457         rp1      = aj + ai[row];
7458         ap1      = aa + ai[row];
7459         rmax1    = aimax[row];
7460         nrow1    = ailen[row];
7461         low1     = 0;
7462         high1    = nrow1;
7463         lastcol2 = -1;
7464         rp2      = bj + bi[row];
7465         ap2      = ba + bi[row];
7466         rmax2    = bimax[row];
7467         nrow2    = bilen[row];
7468         low2     = 0;
7469         high2    = nrow2;
7470 
7471         for (j=0; j<n; j++) {
7472           if (roworiented) value = v[i*n+j];
7473           else value = v[i+j*m];
7474           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
7475           if (in[j] >= cstart && in[j] < cend) {
7476             col = in[j] - cstart;
7477             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
7478           } else if (in[j] < 0) continue;
7479           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
7480             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
7481             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
7482           } else {
7483             if (mat->was_assembled) {
7484               if (!aij->colmap) {
7485                 CHKERRQ(MatCreateColmap_MPIAIJ_Private(mat));
7486               }
7487 #if defined(PETSC_USE_CTABLE)
7488               CHKERRQ(PetscTableFind(aij->colmap,in[j]+1,&col));
7489               col--;
7490 #else
7491               col = aij->colmap[in[j]] - 1;
7492 #endif
7493               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
7494                 CHKERRQ(MatDisAssemble_MPIAIJ(mat));
7495                 col  =  in[j];
7496                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
7497                 B        = aij->B;
7498                 b        = (Mat_SeqAIJ*)B->data;
7499                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
7500                 rp2      = bj + bi[row];
7501                 ap2      = ba + bi[row];
7502                 rmax2    = bimax[row];
7503                 nrow2    = bilen[row];
7504                 low2     = 0;
7505                 high2    = nrow2;
7506                 bm       = aij->B->rmap->n;
7507                 ba       = b->a;
7508                 inserted = PETSC_FALSE;
7509               }
7510             } else col = in[j];
7511             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
7512           }
7513         }
7514       } else if (!aij->donotstash) {
7515         if (roworiented) {
7516           CHKERRQ(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
7517         } else {
7518           CHKERRQ(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
7519         }
7520       }
7521     }
7522     CHKERRQ(MatSeqAIJRestoreArray(A,&aa));
7523     CHKERRQ(MatSeqAIJRestoreArray(B,&ba));
7524   }
7525   PetscFunctionReturnVoid();
7526 }
7527 /* Undefining these here since they were redefined from their original definition above! No
7528  * other PETSc functions should be defined past this point, as it is impossible to recover the
7529  * original definitions */
7530 #undef CHKERRQ
7531 #undef SETERRQ
7532