xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 80fdaca06e24329b048de2e274d60da818b72e9c)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = NULL;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=NULL;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRMPI(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRMPI(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRMPI(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRMPI(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRMPI(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRMPI(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRMPI(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRMPI(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRMPI(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRMPI(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRMPI(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRMPI(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to access.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           inserted = PETSC_TRUE; \
468           goto a_noinsert; \
469         } \
470       }  \
471       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
473       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475       N = nrow1++ - 1; a->nz++; high1++; \
476       /* shift up all the later entries in this row */ \
477       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
478       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
479       rp1[_i] = col;  \
480       ap1[_i] = value;  \
481       A->nonzerostate++;\
482       a_noinsert: ; \
483       ailen[row] = nrow1; \
484 }
485 
486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487   { \
488     if (col <= lastcol2) low2 = 0;                        \
489     else high2 = nrow2;                                   \
490     lastcol2 = col;                                       \
491     while (high2-low2 > 5) {                              \
492       t = (low2+high2)/2;                                 \
493       if (rp2[t] > col) high2 = t;                        \
494       else             low2  = t;                         \
495     }                                                     \
496     for (_i=low2; _i<high2; _i++) {                       \
497       if (rp2[_i] > col) break;                           \
498       if (rp2[_i] == col) {                               \
499         if (addv == ADD_VALUES) {                         \
500           ap2[_i] += value;                               \
501           (void)PetscLogFlops(1.0);                       \
502         }                                                 \
503         else                    ap2[_i] = value;          \
504         inserted = PETSC_TRUE;                            \
505         goto b_noinsert;                                  \
506       }                                                   \
507     }                                                     \
508     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
510     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512     N = nrow2++ - 1; b->nz++; high2++;                    \
513     /* shift up all the later entries in this row */      \
514     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
515     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
516     rp2[_i] = col;                                        \
517     ap2[_i] = value;                                      \
518     B->nonzerostate++;                                    \
519     b_noinsert: ;                                         \
520     bilen[row] = nrow2;                                   \
521   }
522 
523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524 {
525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
526   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
527   PetscErrorCode ierr;
528   PetscInt       l,*garray = mat->garray,diag;
529 
530   PetscFunctionBegin;
531   /* code only works for square matrices A */
532 
533   /* find size of row to the left of the diagonal part */
534   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
535   row  = row - diag;
536   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537     if (garray[b->j[b->i[row]+l]] > diag) break;
538   }
539   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
540 
541   /* diagonal part */
542   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
543 
544   /* right of diagonal part */
545   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
546 #if defined(PETSC_HAVE_DEVICE)
547   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548 #endif
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553 {
554   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
555   PetscScalar    value = 0.0;
556   PetscErrorCode ierr;
557   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
558   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559   PetscBool      roworiented = aij->roworiented;
560 
561   /* Some Variables required in the macro */
562   Mat        A                    = aij->A;
563   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
564   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565   MatScalar  *aa                  = a->a;
566   PetscBool  ignorezeroentries    = a->ignorezeroentries;
567   Mat        B                    = aij->B;
568   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
569   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570   MatScalar  *ba                  = b->a;
571   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572    * cannot use "#if defined" inside a macro. */
573   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
574 
575   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576   PetscInt  nonew;
577   MatScalar *ap1,*ap2;
578 
579   PetscFunctionBegin;
580   for (i=0; i<m; i++) {
581     if (im[i] < 0) continue;
582     if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
583     if (im[i] >= rstart && im[i] < rend) {
584       row      = im[i] - rstart;
585       lastcol1 = -1;
586       rp1      = aj + ai[row];
587       ap1      = aa + ai[row];
588       rmax1    = aimax[row];
589       nrow1    = ailen[row];
590       low1     = 0;
591       high1    = nrow1;
592       lastcol2 = -1;
593       rp2      = bj + bi[row];
594       ap2      = ba + bi[row];
595       rmax2    = bimax[row];
596       nrow2    = bilen[row];
597       low2     = 0;
598       high2    = nrow2;
599 
600       for (j=0; j<n; j++) {
601         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
602         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
603         if (in[j] >= cstart && in[j] < cend) {
604           col   = in[j] - cstart;
605           nonew = a->nonew;
606           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
607 #if defined(PETSC_HAVE_DEVICE)
608           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
609 #endif
610         } else if (in[j] < 0) continue;
611         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
612         else {
613           if (mat->was_assembled) {
614             if (!aij->colmap) {
615               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
616             }
617 #if defined(PETSC_USE_CTABLE)
618             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
619             col--;
620 #else
621             col = aij->colmap[in[j]] - 1;
622 #endif
623             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
624               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
625               col  =  in[j];
626               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
627               B        = aij->B;
628               b        = (Mat_SeqAIJ*)B->data;
629               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
630               rp2      = bj + bi[row];
631               ap2      = ba + bi[row];
632               rmax2    = bimax[row];
633               nrow2    = bilen[row];
634               low2     = 0;
635               high2    = nrow2;
636               bm       = aij->B->rmap->n;
637               ba       = b->a;
638               inserted = PETSC_FALSE;
639             } else if (col < 0) {
640               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
641                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
642               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
643             }
644           } else col = in[j];
645           nonew = b->nonew;
646           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
647 #if defined(PETSC_HAVE_DEVICE)
648           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
649 #endif
650         }
651       }
652     } else {
653       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
654       if (!aij->donotstash) {
655         mat->assembled = PETSC_FALSE;
656         if (roworiented) {
657           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
658         } else {
659           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
660         }
661       }
662     }
663   }
664   PetscFunctionReturn(0);
665 }
666 
667 /*
668     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
669     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
670     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
671 */
672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
673 {
674   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
675   Mat            A           = aij->A; /* diagonal part of the matrix */
676   Mat            B           = aij->B; /* offdiagonal part of the matrix */
677   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
678   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
679   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
680   PetscInt       *ailen      = a->ilen,*aj = a->j;
681   PetscInt       *bilen      = b->ilen,*bj = b->j;
682   PetscInt       am          = aij->A->rmap->n,j;
683   PetscInt       diag_so_far = 0,dnz;
684   PetscInt       offd_so_far = 0,onz;
685 
686   PetscFunctionBegin;
687   /* Iterate over all rows of the matrix */
688   for (j=0; j<am; j++) {
689     dnz = onz = 0;
690     /*  Iterate over all non-zero columns of the current row */
691     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
692       /* If column is in the diagonal */
693       if (mat_j[col] >= cstart && mat_j[col] < cend) {
694         aj[diag_so_far++] = mat_j[col] - cstart;
695         dnz++;
696       } else { /* off-diagonal entries */
697         bj[offd_so_far++] = mat_j[col];
698         onz++;
699       }
700     }
701     ailen[j] = dnz;
702     bilen[j] = onz;
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 /*
708     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
709     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
710     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
711     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
712     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
713 */
714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
715 {
716   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
717   Mat            A      = aij->A; /* diagonal part of the matrix */
718   Mat            B      = aij->B; /* offdiagonal part of the matrix */
719   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
720   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
721   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
722   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
723   PetscInt       *ailen = a->ilen,*aj = a->j;
724   PetscInt       *bilen = b->ilen,*bj = b->j;
725   PetscInt       am     = aij->A->rmap->n,j;
726   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
727   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
728   PetscScalar    *aa = a->a,*ba = b->a;
729 
730   PetscFunctionBegin;
731   /* Iterate over all rows of the matrix */
732   for (j=0; j<am; j++) {
733     dnz_row = onz_row = 0;
734     rowstart_offd = full_offd_i[j];
735     rowstart_diag = full_diag_i[j];
736     /*  Iterate over all non-zero columns of the current row */
737     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
738       /* If column is in the diagonal */
739       if (mat_j[col] >= cstart && mat_j[col] < cend) {
740         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
741         aa[rowstart_diag+dnz_row] = mat_a[col];
742         dnz_row++;
743       } else { /* off-diagonal entries */
744         bj[rowstart_offd+onz_row] = mat_j[col];
745         ba[rowstart_offd+onz_row] = mat_a[col];
746         onz_row++;
747       }
748     }
749     ailen[j] = dnz_row;
750     bilen[j] = onz_row;
751   }
752   PetscFunctionReturn(0);
753 }
754 
755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
756 {
757   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
758   PetscErrorCode ierr;
759   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
760   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
761 
762   PetscFunctionBegin;
763   for (i=0; i<m; i++) {
764     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
765     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
766     if (idxm[i] >= rstart && idxm[i] < rend) {
767       row = idxm[i] - rstart;
768       for (j=0; j<n; j++) {
769         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
770         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
771         if (idxn[j] >= cstart && idxn[j] < cend) {
772           col  = idxn[j] - cstart;
773           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
774         } else {
775           if (!aij->colmap) {
776             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
777           }
778 #if defined(PETSC_USE_CTABLE)
779           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
780           col--;
781 #else
782           col = aij->colmap[idxn[j]] - 1;
783 #endif
784           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
785           else {
786             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
787           }
788         }
789       }
790     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
791   }
792   PetscFunctionReturn(0);
793 }
794 
795 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
796 {
797   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
798   PetscErrorCode ierr;
799   PetscInt       nstash,reallocs;
800 
801   PetscFunctionBegin;
802   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
803 
804   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
805   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
806   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
807   PetscFunctionReturn(0);
808 }
809 
810 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
811 {
812   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
813   PetscErrorCode ierr;
814   PetscMPIInt    n;
815   PetscInt       i,j,rstart,ncols,flg;
816   PetscInt       *row,*col;
817   PetscBool      other_disassembled;
818   PetscScalar    *val;
819 
820   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
821 
822   PetscFunctionBegin;
823   if (!aij->donotstash && !mat->nooffprocentries) {
824     while (1) {
825       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
826       if (!flg) break;
827 
828       for (i=0; i<n;) {
829         /* Now identify the consecutive vals belonging to the same row */
830         for (j=i,rstart=row[j]; j<n; j++) {
831           if (row[j] != rstart) break;
832         }
833         if (j < n) ncols = j-i;
834         else       ncols = n-i;
835         /* Now assemble all these values with a single function call */
836         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
837         i    = j;
838       }
839     }
840     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
841   }
842 #if defined(PETSC_HAVE_DEVICE)
843   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
844   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
845   if (mat->boundtocpu) {
846     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
847     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
848   }
849 #endif
850   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
851   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
852 
853   /* determine if any processor has disassembled, if so we must
854      also disassemble ourself, in order that we may reassemble. */
855   /*
856      if nonzero structure of submatrix B cannot change then we know that
857      no processor disassembled thus we can skip this stuff
858   */
859   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
860     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
861     if (mat->was_assembled && !other_disassembled) {
862 #if defined(PETSC_HAVE_DEVICE)
863       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
864 #endif
865       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
866     }
867   }
868   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
869     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
870   }
871   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
872 #if defined(PETSC_HAVE_DEVICE)
873   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
874 #endif
875   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
876   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
877 
878   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
879 
880   aij->rowvalues = NULL;
881 
882   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
883 
884   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
885   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
886     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
887     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
888   }
889 #if defined(PETSC_HAVE_DEVICE)
890   mat->offloadmask = PETSC_OFFLOAD_BOTH;
891 #endif
892   PetscFunctionReturn(0);
893 }
894 
895 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
896 {
897   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
898   PetscErrorCode ierr;
899 
900   PetscFunctionBegin;
901   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
902   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
903   PetscFunctionReturn(0);
904 }
905 
906 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
907 {
908   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
909   PetscObjectState sA, sB;
910   PetscInt        *lrows;
911   PetscInt         r, len;
912   PetscBool        cong, lch, gch;
913   PetscErrorCode   ierr;
914 
915   PetscFunctionBegin;
916   /* get locally owned rows */
917   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
918   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
919   /* fix right hand side if needed */
920   if (x && b) {
921     const PetscScalar *xx;
922     PetscScalar       *bb;
923 
924     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
925     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
926     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
927     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
928     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
929     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
930   }
931 
932   sA = mat->A->nonzerostate;
933   sB = mat->B->nonzerostate;
934 
935   if (diag != 0.0 && cong) {
936     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
937     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
938   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
939     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
940     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
941     PetscInt   nnwA, nnwB;
942     PetscBool  nnzA, nnzB;
943 
944     nnwA = aijA->nonew;
945     nnwB = aijB->nonew;
946     nnzA = aijA->keepnonzeropattern;
947     nnzB = aijB->keepnonzeropattern;
948     if (!nnzA) {
949       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
950       aijA->nonew = 0;
951     }
952     if (!nnzB) {
953       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
954       aijB->nonew = 0;
955     }
956     /* Must zero here before the next loop */
957     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
958     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
959     for (r = 0; r < len; ++r) {
960       const PetscInt row = lrows[r] + A->rmap->rstart;
961       if (row >= A->cmap->N) continue;
962       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
963     }
964     aijA->nonew = nnwA;
965     aijB->nonew = nnwB;
966   } else {
967     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
968     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
969   }
970   ierr = PetscFree(lrows);CHKERRQ(ierr);
971   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
972   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
973 
974   /* reduce nonzerostate */
975   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
976   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
977   if (gch) A->nonzerostate++;
978   PetscFunctionReturn(0);
979 }
980 
981 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
982 {
983   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
984   PetscErrorCode    ierr;
985   PetscMPIInt       n = A->rmap->n;
986   PetscInt          i,j,r,m,len = 0;
987   PetscInt          *lrows,*owners = A->rmap->range;
988   PetscMPIInt       p = 0;
989   PetscSFNode       *rrows;
990   PetscSF           sf;
991   const PetscScalar *xx;
992   PetscScalar       *bb,*mask;
993   Vec               xmask,lmask;
994   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
995   const PetscInt    *aj, *ii,*ridx;
996   PetscScalar       *aa;
997 
998   PetscFunctionBegin;
999   /* Create SF where leaves are input rows and roots are owned rows */
1000   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1001   for (r = 0; r < n; ++r) lrows[r] = -1;
1002   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1003   for (r = 0; r < N; ++r) {
1004     const PetscInt idx   = rows[r];
1005     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1006     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1007       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1008     }
1009     rrows[r].rank  = p;
1010     rrows[r].index = rows[r] - owners[p];
1011   }
1012   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1013   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1014   /* Collect flags for rows to be zeroed */
1015   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1016   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1017   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1018   /* Compress and put in row numbers */
1019   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1020   /* zero diagonal part of matrix */
1021   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1022   /* handle off diagonal part of matrix */
1023   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1024   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1025   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1026   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1027   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1028   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1031   if (x && b) { /* this code is buggy when the row and column layout don't match */
1032     PetscBool cong;
1033 
1034     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1035     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1036     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1037     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1038     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1039     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1040   }
1041   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1042   /* remove zeroed rows of off diagonal matrix */
1043   ii = aij->i;
1044   for (i=0; i<len; i++) {
1045     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1046   }
1047   /* loop over all elements of off process part of matrix zeroing removed columns*/
1048   if (aij->compressedrow.use) {
1049     m    = aij->compressedrow.nrows;
1050     ii   = aij->compressedrow.i;
1051     ridx = aij->compressedrow.rindex;
1052     for (i=0; i<m; i++) {
1053       n  = ii[i+1] - ii[i];
1054       aj = aij->j + ii[i];
1055       aa = aij->a + ii[i];
1056 
1057       for (j=0; j<n; j++) {
1058         if (PetscAbsScalar(mask[*aj])) {
1059           if (b) bb[*ridx] -= *aa*xx[*aj];
1060           *aa = 0.0;
1061         }
1062         aa++;
1063         aj++;
1064       }
1065       ridx++;
1066     }
1067   } else { /* do not use compressed row format */
1068     m = l->B->rmap->n;
1069     for (i=0; i<m; i++) {
1070       n  = ii[i+1] - ii[i];
1071       aj = aij->j + ii[i];
1072       aa = aij->a + ii[i];
1073       for (j=0; j<n; j++) {
1074         if (PetscAbsScalar(mask[*aj])) {
1075           if (b) bb[i] -= *aa*xx[*aj];
1076           *aa = 0.0;
1077         }
1078         aa++;
1079         aj++;
1080       }
1081     }
1082   }
1083   if (x && b) {
1084     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1085     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1086   }
1087   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1088   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1089   ierr = PetscFree(lrows);CHKERRQ(ierr);
1090 
1091   /* only change matrix nonzero state if pattern was allowed to be changed */
1092   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1093     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1094     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1095   }
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1100 {
1101   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1102   PetscErrorCode ierr;
1103   PetscInt       nt;
1104   VecScatter     Mvctx = a->Mvctx;
1105 
1106   PetscFunctionBegin;
1107   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1108   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1109   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1110   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1111   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1117 {
1118   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1119   PetscErrorCode ierr;
1120 
1121   PetscFunctionBegin;
1122   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1127 {
1128   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1129   PetscErrorCode ierr;
1130   VecScatter     Mvctx = a->Mvctx;
1131 
1132   PetscFunctionBegin;
1133   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1134   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1135   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1136   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1141 {
1142   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1143   PetscErrorCode ierr;
1144 
1145   PetscFunctionBegin;
1146   /* do nondiagonal part */
1147   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1148   /* do local part */
1149   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1150   /* add partial results together */
1151   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1152   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1157 {
1158   MPI_Comm       comm;
1159   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1160   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1161   IS             Me,Notme;
1162   PetscErrorCode ierr;
1163   PetscInt       M,N,first,last,*notme,i;
1164   PetscBool      lf;
1165   PetscMPIInt    size;
1166 
1167   PetscFunctionBegin;
1168   /* Easy test: symmetric diagonal block */
1169   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1170   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1171   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1172   if (!*f) PetscFunctionReturn(0);
1173   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1174   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1175   if (size == 1) PetscFunctionReturn(0);
1176 
1177   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1178   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1179   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1180   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1181   for (i=0; i<first; i++) notme[i] = i;
1182   for (i=last; i<M; i++) notme[i-last+first] = i;
1183   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1184   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1185   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1186   Aoff = Aoffs[0];
1187   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1188   Boff = Boffs[0];
1189   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1190   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1191   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1192   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1193   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1194   ierr = PetscFree(notme);CHKERRQ(ierr);
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1199 {
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1208 {
1209   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1210   PetscErrorCode ierr;
1211 
1212   PetscFunctionBegin;
1213   /* do nondiagonal part */
1214   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1215   /* do local part */
1216   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1217   /* add partial results together */
1218   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1219   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 /*
1224   This only works correctly for square matrices where the subblock A->A is the
1225    diagonal block
1226 */
1227 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1228 {
1229   PetscErrorCode ierr;
1230   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1231 
1232   PetscFunctionBegin;
1233   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1234   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1235   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1240 {
1241   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1242   PetscErrorCode ierr;
1243 
1244   PetscFunctionBegin;
1245   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1246   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1251 {
1252   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1253   PetscErrorCode ierr;
1254 
1255   PetscFunctionBegin;
1256 #if defined(PETSC_USE_LOG)
1257   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1258 #endif
1259   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1260   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1261   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1262   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1263 #if defined(PETSC_USE_CTABLE)
1264   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1265 #else
1266   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1267 #endif
1268   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1269   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1270   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1271   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1272   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1273   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1274 
1275   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1276   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1277 
1278   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1288 #if defined(PETSC_HAVE_CUDA)
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1290 #endif
1291 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr);
1293 #endif
1294 #if defined(PETSC_HAVE_ELEMENTAL)
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1296 #endif
1297 #if defined(PETSC_HAVE_SCALAPACK)
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1299 #endif
1300 #if defined(PETSC_HAVE_HYPRE)
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1303 #endif
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr);
1308   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr);
1309   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr);
1310 #if defined(PETSC_HAVE_MKL_SPARSE)
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr);
1312 #endif
1313   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr);
1314   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1315   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr);
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1320 {
1321   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1322   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1323   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1324   const PetscInt    *garray = aij->garray;
1325   const PetscScalar *aa,*ba;
1326   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1327   PetscInt          *rowlens;
1328   PetscInt          *colidxs;
1329   PetscScalar       *matvals;
1330   PetscErrorCode    ierr;
1331 
1332   PetscFunctionBegin;
1333   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1334 
1335   M  = mat->rmap->N;
1336   N  = mat->cmap->N;
1337   m  = mat->rmap->n;
1338   rs = mat->rmap->rstart;
1339   cs = mat->cmap->rstart;
1340   nz = A->nz + B->nz;
1341 
1342   /* write matrix header */
1343   header[0] = MAT_FILE_CLASSID;
1344   header[1] = M; header[2] = N; header[3] = nz;
1345   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1346   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1347 
1348   /* fill in and store row lengths  */
1349   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1350   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1351   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1352   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1353 
1354   /* fill in and store column indices */
1355   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1356   for (cnt=0, i=0; i<m; i++) {
1357     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1358       if (garray[B->j[jb]] > cs) break;
1359       colidxs[cnt++] = garray[B->j[jb]];
1360     }
1361     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1362       colidxs[cnt++] = A->j[ja] + cs;
1363     for (; jb<B->i[i+1]; jb++)
1364       colidxs[cnt++] = garray[B->j[jb]];
1365   }
1366   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1367   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1368   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1369 
1370   /* fill in and store nonzero values */
1371   ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr);
1372   ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr);
1373   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1374   for (cnt=0, i=0; i<m; i++) {
1375     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1376       if (garray[B->j[jb]] > cs) break;
1377       matvals[cnt++] = ba[jb];
1378     }
1379     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1380       matvals[cnt++] = aa[ja];
1381     for (; jb<B->i[i+1]; jb++)
1382       matvals[cnt++] = ba[jb];
1383   }
1384   ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr);
1385   ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr);
1386   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1387   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1388   ierr = PetscFree(matvals);CHKERRQ(ierr);
1389 
1390   /* write block size option to the viewer's .info file */
1391   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 #include <petscdraw.h>
1396 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1397 {
1398   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1399   PetscErrorCode    ierr;
1400   PetscMPIInt       rank = aij->rank,size = aij->size;
1401   PetscBool         isdraw,iascii,isbinary;
1402   PetscViewer       sviewer;
1403   PetscViewerFormat format;
1404 
1405   PetscFunctionBegin;
1406   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1407   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1408   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1409   if (iascii) {
1410     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1411     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1412       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1413       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1414       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1415       for (i=0; i<(PetscInt)size; i++) {
1416         nmax = PetscMax(nmax,nz[i]);
1417         nmin = PetscMin(nmin,nz[i]);
1418         navg += nz[i];
1419       }
1420       ierr = PetscFree(nz);CHKERRQ(ierr);
1421       navg = navg/size;
1422       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1423       PetscFunctionReturn(0);
1424     }
1425     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1426     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1427       MatInfo   info;
1428       PetscBool inodes;
1429 
1430       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
1431       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1432       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1433       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1434       if (!inodes) {
1435         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1436                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1437       } else {
1438         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1439                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1440       }
1441       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1442       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1443       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1444       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1445       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1446       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1447       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1448       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1449       PetscFunctionReturn(0);
1450     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1451       PetscInt inodecount,inodelimit,*inodes;
1452       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1453       if (inodes) {
1454         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1455       } else {
1456         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1457       }
1458       PetscFunctionReturn(0);
1459     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1460       PetscFunctionReturn(0);
1461     }
1462   } else if (isbinary) {
1463     if (size == 1) {
1464       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1465       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1466     } else {
1467       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1468     }
1469     PetscFunctionReturn(0);
1470   } else if (iascii && size == 1) {
1471     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1472     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1473     PetscFunctionReturn(0);
1474   } else if (isdraw) {
1475     PetscDraw draw;
1476     PetscBool isnull;
1477     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1478     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1479     if (isnull) PetscFunctionReturn(0);
1480   }
1481 
1482   { /* assemble the entire matrix onto first processor */
1483     Mat A = NULL, Av;
1484     IS  isrow,iscol;
1485 
1486     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1487     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1488     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1489     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1490 /*  The commented code uses MatCreateSubMatrices instead */
1491 /*
1492     Mat *AA, A = NULL, Av;
1493     IS  isrow,iscol;
1494 
1495     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1496     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1497     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1498     if (!rank) {
1499        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1500        A    = AA[0];
1501        Av   = AA[0];
1502     }
1503     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1504 */
1505     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1506     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1507     /*
1508        Everyone has to call to draw the matrix since the graphics waits are
1509        synchronized across all processors that share the PetscDraw object
1510     */
1511     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1512     if (!rank) {
1513       if (((PetscObject)mat)->name) {
1514         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1515       }
1516       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1517     }
1518     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1519     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1520     ierr = MatDestroy(&A);CHKERRQ(ierr);
1521   }
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1526 {
1527   PetscErrorCode ierr;
1528   PetscBool      iascii,isdraw,issocket,isbinary;
1529 
1530   PetscFunctionBegin;
1531   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1532   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1533   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1534   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1535   if (iascii || isdraw || isbinary || issocket) {
1536     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1537   }
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1542 {
1543   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1544   PetscErrorCode ierr;
1545   Vec            bb1 = NULL;
1546   PetscBool      hasop;
1547 
1548   PetscFunctionBegin;
1549   if (flag == SOR_APPLY_UPPER) {
1550     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1551     PetscFunctionReturn(0);
1552   }
1553 
1554   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1555     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1556   }
1557 
1558   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1559     if (flag & SOR_ZERO_INITIAL_GUESS) {
1560       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1561       its--;
1562     }
1563 
1564     while (its--) {
1565       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1566       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1567 
1568       /* update rhs: bb1 = bb - B*x */
1569       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1570       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1571 
1572       /* local sweep */
1573       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1574     }
1575   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1576     if (flag & SOR_ZERO_INITIAL_GUESS) {
1577       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1578       its--;
1579     }
1580     while (its--) {
1581       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1582       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1583 
1584       /* update rhs: bb1 = bb - B*x */
1585       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1586       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1587 
1588       /* local sweep */
1589       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1590     }
1591   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1592     if (flag & SOR_ZERO_INITIAL_GUESS) {
1593       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1594       its--;
1595     }
1596     while (its--) {
1597       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1598       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1599 
1600       /* update rhs: bb1 = bb - B*x */
1601       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1602       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1603 
1604       /* local sweep */
1605       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1606     }
1607   } else if (flag & SOR_EISENSTAT) {
1608     Vec xx1;
1609 
1610     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1611     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1612 
1613     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1614     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1615     if (!mat->diag) {
1616       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1617       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1618     }
1619     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1620     if (hasop) {
1621       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1622     } else {
1623       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1624     }
1625     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1626 
1627     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1628 
1629     /* local sweep */
1630     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1631     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1632     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1633   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1634 
1635   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1636 
1637   matin->factorerrortype = mat->A->factorerrortype;
1638   PetscFunctionReturn(0);
1639 }
1640 
1641 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1642 {
1643   Mat            aA,aB,Aperm;
1644   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1645   PetscScalar    *aa,*ba;
1646   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1647   PetscSF        rowsf,sf;
1648   IS             parcolp = NULL;
1649   PetscBool      done;
1650   PetscErrorCode ierr;
1651 
1652   PetscFunctionBegin;
1653   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1654   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1655   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1656   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1657 
1658   /* Invert row permutation to find out where my rows should go */
1659   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1660   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1661   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1662   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1663   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1664   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1665 
1666   /* Invert column permutation to find out where my columns should go */
1667   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1668   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1669   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1670   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1671   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1672   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1673   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1674 
1675   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1676   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1677   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1678 
1679   /* Find out where my gcols should go */
1680   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1681   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1682   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1683   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1684   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1685   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1686   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1687   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1688 
1689   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1690   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1691   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1692   for (i=0; i<m; i++) {
1693     PetscInt    row = rdest[i];
1694     PetscMPIInt rowner;
1695     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1696     for (j=ai[i]; j<ai[i+1]; j++) {
1697       PetscInt    col = cdest[aj[j]];
1698       PetscMPIInt cowner;
1699       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1700       if (rowner == cowner) dnnz[i]++;
1701       else onnz[i]++;
1702     }
1703     for (j=bi[i]; j<bi[i+1]; j++) {
1704       PetscInt    col = gcdest[bj[j]];
1705       PetscMPIInt cowner;
1706       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1707       if (rowner == cowner) dnnz[i]++;
1708       else onnz[i]++;
1709     }
1710   }
1711   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1712   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1713   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1714   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1715   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1716 
1717   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1718   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1719   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1720   for (i=0; i<m; i++) {
1721     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1722     PetscInt j0,rowlen;
1723     rowlen = ai[i+1] - ai[i];
1724     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1725       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1726       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1727     }
1728     rowlen = bi[i+1] - bi[i];
1729     for (j0=j=0; j<rowlen; j0=j) {
1730       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1731       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1732     }
1733   }
1734   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1735   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1736   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1737   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1738   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1739   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1740   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1741   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1742   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1743   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1744   *B = Aperm;
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1749 {
1750   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1751   PetscErrorCode ierr;
1752 
1753   PetscFunctionBegin;
1754   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1755   if (ghosts) *ghosts = aij->garray;
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1760 {
1761   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1762   Mat            A    = mat->A,B = mat->B;
1763   PetscErrorCode ierr;
1764   PetscLogDouble isend[5],irecv[5];
1765 
1766   PetscFunctionBegin;
1767   info->block_size = 1.0;
1768   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1769 
1770   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1771   isend[3] = info->memory;  isend[4] = info->mallocs;
1772 
1773   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1774 
1775   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1776   isend[3] += info->memory;  isend[4] += info->mallocs;
1777   if (flag == MAT_LOCAL) {
1778     info->nz_used      = isend[0];
1779     info->nz_allocated = isend[1];
1780     info->nz_unneeded  = isend[2];
1781     info->memory       = isend[3];
1782     info->mallocs      = isend[4];
1783   } else if (flag == MAT_GLOBAL_MAX) {
1784     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1785 
1786     info->nz_used      = irecv[0];
1787     info->nz_allocated = irecv[1];
1788     info->nz_unneeded  = irecv[2];
1789     info->memory       = irecv[3];
1790     info->mallocs      = irecv[4];
1791   } else if (flag == MAT_GLOBAL_SUM) {
1792     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1793 
1794     info->nz_used      = irecv[0];
1795     info->nz_allocated = irecv[1];
1796     info->nz_unneeded  = irecv[2];
1797     info->memory       = irecv[3];
1798     info->mallocs      = irecv[4];
1799   }
1800   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1801   info->fill_ratio_needed = 0;
1802   info->factor_mallocs    = 0;
1803   PetscFunctionReturn(0);
1804 }
1805 
1806 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1807 {
1808   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1809   PetscErrorCode ierr;
1810 
1811   PetscFunctionBegin;
1812   switch (op) {
1813   case MAT_NEW_NONZERO_LOCATIONS:
1814   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1815   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1816   case MAT_KEEP_NONZERO_PATTERN:
1817   case MAT_NEW_NONZERO_LOCATION_ERR:
1818   case MAT_USE_INODES:
1819   case MAT_IGNORE_ZERO_ENTRIES:
1820     MatCheckPreallocated(A,1);
1821     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1822     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1823     break;
1824   case MAT_ROW_ORIENTED:
1825     MatCheckPreallocated(A,1);
1826     a->roworiented = flg;
1827 
1828     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1829     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1830     break;
1831   case MAT_FORCE_DIAGONAL_ENTRIES:
1832   case MAT_SORTED_FULL:
1833     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1834     break;
1835   case MAT_IGNORE_OFF_PROC_ENTRIES:
1836     a->donotstash = flg;
1837     break;
1838   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1839   case MAT_SPD:
1840   case MAT_SYMMETRIC:
1841   case MAT_STRUCTURALLY_SYMMETRIC:
1842   case MAT_HERMITIAN:
1843   case MAT_SYMMETRY_ETERNAL:
1844     break;
1845   case MAT_SUBMAT_SINGLEIS:
1846     A->submat_singleis = flg;
1847     break;
1848   case MAT_STRUCTURE_ONLY:
1849     /* The option is handled directly by MatSetOption() */
1850     break;
1851   default:
1852     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1853   }
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1858 {
1859   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1860   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1861   PetscErrorCode ierr;
1862   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1863   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1864   PetscInt       *cmap,*idx_p;
1865 
1866   PetscFunctionBegin;
1867   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1868   mat->getrowactive = PETSC_TRUE;
1869 
1870   if (!mat->rowvalues && (idx || v)) {
1871     /*
1872         allocate enough space to hold information from the longest row.
1873     */
1874     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1875     PetscInt   max = 1,tmp;
1876     for (i=0; i<matin->rmap->n; i++) {
1877       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1878       if (max < tmp) max = tmp;
1879     }
1880     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1881   }
1882 
1883   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1884   lrow = row - rstart;
1885 
1886   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1887   if (!v)   {pvA = NULL; pvB = NULL;}
1888   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1889   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1890   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1891   nztot = nzA + nzB;
1892 
1893   cmap = mat->garray;
1894   if (v  || idx) {
1895     if (nztot) {
1896       /* Sort by increasing column numbers, assuming A and B already sorted */
1897       PetscInt imark = -1;
1898       if (v) {
1899         *v = v_p = mat->rowvalues;
1900         for (i=0; i<nzB; i++) {
1901           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1902           else break;
1903         }
1904         imark = i;
1905         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1906         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1907       }
1908       if (idx) {
1909         *idx = idx_p = mat->rowindices;
1910         if (imark > -1) {
1911           for (i=0; i<imark; i++) {
1912             idx_p[i] = cmap[cworkB[i]];
1913           }
1914         } else {
1915           for (i=0; i<nzB; i++) {
1916             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1917             else break;
1918           }
1919           imark = i;
1920         }
1921         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1922         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1923       }
1924     } else {
1925       if (idx) *idx = NULL;
1926       if (v)   *v   = NULL;
1927     }
1928   }
1929   *nz  = nztot;
1930   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1931   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1936 {
1937   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1938 
1939   PetscFunctionBegin;
1940   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1941   aij->getrowactive = PETSC_FALSE;
1942   PetscFunctionReturn(0);
1943 }
1944 
1945 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1946 {
1947   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1948   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1949   PetscErrorCode ierr;
1950   PetscInt       i,j,cstart = mat->cmap->rstart;
1951   PetscReal      sum = 0.0;
1952   MatScalar      *v;
1953 
1954   PetscFunctionBegin;
1955   if (aij->size == 1) {
1956     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1957   } else {
1958     if (type == NORM_FROBENIUS) {
1959       v = amat->a;
1960       for (i=0; i<amat->nz; i++) {
1961         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1962       }
1963       v = bmat->a;
1964       for (i=0; i<bmat->nz; i++) {
1965         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1966       }
1967       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1968       *norm = PetscSqrtReal(*norm);
1969       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1970     } else if (type == NORM_1) { /* max column norm */
1971       PetscReal *tmp,*tmp2;
1972       PetscInt  *jj,*garray = aij->garray;
1973       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1974       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1975       *norm = 0.0;
1976       v     = amat->a; jj = amat->j;
1977       for (j=0; j<amat->nz; j++) {
1978         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1979       }
1980       v = bmat->a; jj = bmat->j;
1981       for (j=0; j<bmat->nz; j++) {
1982         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1983       }
1984       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1985       for (j=0; j<mat->cmap->N; j++) {
1986         if (tmp2[j] > *norm) *norm = tmp2[j];
1987       }
1988       ierr = PetscFree(tmp);CHKERRQ(ierr);
1989       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1990       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1991     } else if (type == NORM_INFINITY) { /* max row norm */
1992       PetscReal ntemp = 0.0;
1993       for (j=0; j<aij->A->rmap->n; j++) {
1994         v   = amat->a + amat->i[j];
1995         sum = 0.0;
1996         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1997           sum += PetscAbsScalar(*v); v++;
1998         }
1999         v = bmat->a + bmat->i[j];
2000         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2001           sum += PetscAbsScalar(*v); v++;
2002         }
2003         if (sum > ntemp) ntemp = sum;
2004       }
2005       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2006       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2007     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2008   }
2009   PetscFunctionReturn(0);
2010 }
2011 
2012 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2013 {
2014   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2015   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2016   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2017   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2018   PetscErrorCode  ierr;
2019   Mat             B,A_diag,*B_diag;
2020   const MatScalar *array;
2021 
2022   PetscFunctionBegin;
2023   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2024   ai = Aloc->i; aj = Aloc->j;
2025   bi = Bloc->i; bj = Bloc->j;
2026   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2027     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2028     PetscSFNode          *oloc;
2029     PETSC_UNUSED PetscSF sf;
2030 
2031     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2032     /* compute d_nnz for preallocation */
2033     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2034     for (i=0; i<ai[ma]; i++) {
2035       d_nnz[aj[i]]++;
2036     }
2037     /* compute local off-diagonal contributions */
2038     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2039     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2040     /* map those to global */
2041     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2042     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2043     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2044     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2045     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2046     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2047     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2048 
2049     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2050     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2051     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2052     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2053     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2054     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2055   } else {
2056     B    = *matout;
2057     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2058   }
2059 
2060   b           = (Mat_MPIAIJ*)B->data;
2061   A_diag      = a->A;
2062   B_diag      = &b->A;
2063   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2064   A_diag_ncol = A_diag->cmap->N;
2065   B_diag_ilen = sub_B_diag->ilen;
2066   B_diag_i    = sub_B_diag->i;
2067 
2068   /* Set ilen for diagonal of B */
2069   for (i=0; i<A_diag_ncol; i++) {
2070     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2071   }
2072 
2073   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2074   very quickly (=without using MatSetValues), because all writes are local. */
2075   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2076 
2077   /* copy over the B part */
2078   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2079   array = Bloc->a;
2080   row   = A->rmap->rstart;
2081   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2082   cols_tmp = cols;
2083   for (i=0; i<mb; i++) {
2084     ncol = bi[i+1]-bi[i];
2085     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2086     row++;
2087     array += ncol; cols_tmp += ncol;
2088   }
2089   ierr = PetscFree(cols);CHKERRQ(ierr);
2090 
2091   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2092   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2093   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2094     *matout = B;
2095   } else {
2096     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2097   }
2098   PetscFunctionReturn(0);
2099 }
2100 
2101 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2102 {
2103   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2104   Mat            a    = aij->A,b = aij->B;
2105   PetscErrorCode ierr;
2106   PetscInt       s1,s2,s3;
2107 
2108   PetscFunctionBegin;
2109   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2110   if (rr) {
2111     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2112     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2113     /* Overlap communication with computation. */
2114     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2115   }
2116   if (ll) {
2117     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2118     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2119     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
2120   }
2121   /* scale  the diagonal block */
2122   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2123 
2124   if (rr) {
2125     /* Do a scatter end and then right scale the off-diagonal block */
2126     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2127     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
2128   }
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2133 {
2134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2135   PetscErrorCode ierr;
2136 
2137   PetscFunctionBegin;
2138   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2143 {
2144   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2145   Mat            a,b,c,d;
2146   PetscBool      flg;
2147   PetscErrorCode ierr;
2148 
2149   PetscFunctionBegin;
2150   a = matA->A; b = matA->B;
2151   c = matB->A; d = matB->B;
2152 
2153   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2154   if (flg) {
2155     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2156   }
2157   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2158   PetscFunctionReturn(0);
2159 }
2160 
2161 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2162 {
2163   PetscErrorCode ierr;
2164   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2165   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2166 
2167   PetscFunctionBegin;
2168   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2169   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2170     /* because of the column compression in the off-processor part of the matrix a->B,
2171        the number of columns in a->B and b->B may be different, hence we cannot call
2172        the MatCopy() directly on the two parts. If need be, we can provide a more
2173        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2174        then copying the submatrices */
2175     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2176   } else {
2177     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2178     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2179   }
2180   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2181   PetscFunctionReturn(0);
2182 }
2183 
2184 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2185 {
2186   PetscErrorCode ierr;
2187 
2188   PetscFunctionBegin;
2189   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 /*
2194    Computes the number of nonzeros per row needed for preallocation when X and Y
2195    have different nonzero structure.
2196 */
2197 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2198 {
2199   PetscInt       i,j,k,nzx,nzy;
2200 
2201   PetscFunctionBegin;
2202   /* Set the number of nonzeros in the new matrix */
2203   for (i=0; i<m; i++) {
2204     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2205     nzx = xi[i+1] - xi[i];
2206     nzy = yi[i+1] - yi[i];
2207     nnz[i] = 0;
2208     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2209       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2210       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2211       nnz[i]++;
2212     }
2213     for (; k<nzy; k++) nnz[i]++;
2214   }
2215   PetscFunctionReturn(0);
2216 }
2217 
2218 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2219 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2220 {
2221   PetscErrorCode ierr;
2222   PetscInt       m = Y->rmap->N;
2223   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2224   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2225 
2226   PetscFunctionBegin;
2227   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2228   PetscFunctionReturn(0);
2229 }
2230 
2231 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2232 {
2233   PetscErrorCode ierr;
2234   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2235   PetscBLASInt   bnz,one=1;
2236   Mat_SeqAIJ     *x,*y;
2237 
2238   PetscFunctionBegin;
2239   if (str == SAME_NONZERO_PATTERN) {
2240     PetscScalar alpha = a;
2241     x    = (Mat_SeqAIJ*)xx->A->data;
2242     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2243     y    = (Mat_SeqAIJ*)yy->A->data;
2244     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2245     x    = (Mat_SeqAIJ*)xx->B->data;
2246     y    = (Mat_SeqAIJ*)yy->B->data;
2247     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2248     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2249     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2250     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2251        will be updated */
2252 #if defined(PETSC_HAVE_DEVICE)
2253     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2254       Y->offloadmask = PETSC_OFFLOAD_CPU;
2255     }
2256 #endif
2257   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2258     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2259   } else {
2260     Mat      B;
2261     PetscInt *nnz_d,*nnz_o;
2262     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2263     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2264     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2265     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2266     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2267     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2268     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2269     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2270     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2271     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2272     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2273     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2274     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2275   }
2276   PetscFunctionReturn(0);
2277 }
2278 
2279 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2280 
2281 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2282 {
2283 #if defined(PETSC_USE_COMPLEX)
2284   PetscErrorCode ierr;
2285   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2286 
2287   PetscFunctionBegin;
2288   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2289   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2290 #else
2291   PetscFunctionBegin;
2292 #endif
2293   PetscFunctionReturn(0);
2294 }
2295 
2296 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2297 {
2298   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2299   PetscErrorCode ierr;
2300 
2301   PetscFunctionBegin;
2302   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2303   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2304   PetscFunctionReturn(0);
2305 }
2306 
2307 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2308 {
2309   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2310   PetscErrorCode ierr;
2311 
2312   PetscFunctionBegin;
2313   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2314   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2315   PetscFunctionReturn(0);
2316 }
2317 
2318 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2319 {
2320   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2321   PetscErrorCode    ierr;
2322   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2323   PetscScalar       *va,*vv;
2324   Vec               vB,vA;
2325   const PetscScalar *vb;
2326 
2327   PetscFunctionBegin;
2328   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
2329   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2330 
2331   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2332   if (idx) {
2333     for (i=0; i<m; i++) {
2334       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2335     }
2336   }
2337 
2338   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
2339   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
2340   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2341 
2342   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2343   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2344   for (i=0; i<m; i++) {
2345     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2346       vv[i] = vb[i];
2347       if (idx) idx[i] = a->garray[idxb[i]];
2348     } else {
2349       vv[i] = va[i];
2350       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2351         idx[i] = a->garray[idxb[i]];
2352     }
2353   }
2354   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2355   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2356   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2357   ierr = PetscFree(idxb);CHKERRQ(ierr);
2358   ierr = VecDestroy(&vA);CHKERRQ(ierr);
2359   ierr = VecDestroy(&vB);CHKERRQ(ierr);
2360   PetscFunctionReturn(0);
2361 }
2362 
2363 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2364 {
2365   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2366   PetscInt       m = A->rmap->n,n = A->cmap->n;
2367   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2368   PetscInt       *cmap  = mat->garray;
2369   PetscInt       *diagIdx, *offdiagIdx;
2370   Vec            diagV, offdiagV;
2371   PetscScalar    *a, *diagA, *offdiagA, *ba;
2372   PetscInt       r,j,col,ncols,*bi,*bj;
2373   PetscErrorCode ierr;
2374   Mat            B = mat->B;
2375   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2376 
2377   PetscFunctionBegin;
2378   /* When a process holds entire A and other processes have no entry */
2379   if (A->cmap->N == n) {
2380     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2381     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2382     ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr);
2383     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2384     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2385     PetscFunctionReturn(0);
2386   } else if (n == 0) {
2387     if (m) {
2388       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2389       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2390       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2391     }
2392     PetscFunctionReturn(0);
2393   }
2394 
2395   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2396   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2397   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2398   ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2399 
2400   /* Get offdiagIdx[] for implicit 0.0 */
2401   ba = b->a;
2402   bi = b->i;
2403   bj = b->j;
2404   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2405   for (r = 0; r < m; r++) {
2406     ncols = bi[r+1] - bi[r];
2407     if (ncols == A->cmap->N - n) { /* Brow is dense */
2408       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2409     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2410       offdiagA[r] = 0.0;
2411 
2412       /* Find first hole in the cmap */
2413       for (j=0; j<ncols; j++) {
2414         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2415         if (col > j && j < cstart) {
2416           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2417           break;
2418         } else if (col > j + n && j >= cstart) {
2419           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2420           break;
2421         }
2422       }
2423       if (j == ncols && ncols < A->cmap->N - n) {
2424         /* a hole is outside compressed Bcols */
2425         if (ncols == 0) {
2426           if (cstart) {
2427             offdiagIdx[r] = 0;
2428           } else offdiagIdx[r] = cend;
2429         } else { /* ncols > 0 */
2430           offdiagIdx[r] = cmap[ncols-1] + 1;
2431           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2432         }
2433       }
2434     }
2435 
2436     for (j=0; j<ncols; j++) {
2437       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2438       ba++; bj++;
2439     }
2440   }
2441 
2442   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2443   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2444   for (r = 0; r < m; ++r) {
2445     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2446       a[r]   = diagA[r];
2447       if (idx) idx[r] = cstart + diagIdx[r];
2448     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2449       a[r] = diagA[r];
2450       if (idx) {
2451         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2452           idx[r] = cstart + diagIdx[r];
2453         } else idx[r] = offdiagIdx[r];
2454       }
2455     } else {
2456       a[r]   = offdiagA[r];
2457       if (idx) idx[r] = offdiagIdx[r];
2458     }
2459   }
2460   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2461   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2462   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2463   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2464   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2465   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2466   PetscFunctionReturn(0);
2467 }
2468 
2469 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2470 {
2471   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2472   PetscInt       m = A->rmap->n,n = A->cmap->n;
2473   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2474   PetscInt       *cmap  = mat->garray;
2475   PetscInt       *diagIdx, *offdiagIdx;
2476   Vec            diagV, offdiagV;
2477   PetscScalar    *a, *diagA, *offdiagA, *ba;
2478   PetscInt       r,j,col,ncols,*bi,*bj;
2479   PetscErrorCode ierr;
2480   Mat            B = mat->B;
2481   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2482 
2483   PetscFunctionBegin;
2484   /* When a process holds entire A and other processes have no entry */
2485   if (A->cmap->N == n) {
2486     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2487     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2488     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2489     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2490     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2491     PetscFunctionReturn(0);
2492   } else if (n == 0) {
2493     if (m) {
2494       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2495       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2496       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2497     }
2498     PetscFunctionReturn(0);
2499   }
2500 
2501   ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2502   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2503   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2504   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2505 
2506   /* Get offdiagIdx[] for implicit 0.0 */
2507   ba = b->a;
2508   bi = b->i;
2509   bj = b->j;
2510   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2511   for (r = 0; r < m; r++) {
2512     ncols = bi[r+1] - bi[r];
2513     if (ncols == A->cmap->N - n) { /* Brow is dense */
2514       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2515     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2516       offdiagA[r] = 0.0;
2517 
2518       /* Find first hole in the cmap */
2519       for (j=0; j<ncols; j++) {
2520         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2521         if (col > j && j < cstart) {
2522           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2523           break;
2524         } else if (col > j + n && j >= cstart) {
2525           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2526           break;
2527         }
2528       }
2529       if (j == ncols && ncols < A->cmap->N - n) {
2530         /* a hole is outside compressed Bcols */
2531         if (ncols == 0) {
2532           if (cstart) {
2533             offdiagIdx[r] = 0;
2534           } else offdiagIdx[r] = cend;
2535         } else { /* ncols > 0 */
2536           offdiagIdx[r] = cmap[ncols-1] + 1;
2537           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2538         }
2539       }
2540     }
2541 
2542     for (j=0; j<ncols; j++) {
2543       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2544       ba++; bj++;
2545     }
2546   }
2547 
2548   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2549   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2550   for (r = 0; r < m; ++r) {
2551     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2552       a[r]   = diagA[r];
2553       if (idx) idx[r] = cstart + diagIdx[r];
2554     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2555       a[r] = diagA[r];
2556       if (idx) {
2557         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2558           idx[r] = cstart + diagIdx[r];
2559         } else idx[r] = offdiagIdx[r];
2560       }
2561     } else {
2562       a[r]   = offdiagA[r];
2563       if (idx) idx[r] = offdiagIdx[r];
2564     }
2565   }
2566   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2567   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2568   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2569   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2570   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2571   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2572   PetscFunctionReturn(0);
2573 }
2574 
2575 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2576 {
2577   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*)A->data;
2578   PetscInt       m = A->rmap->n,n = A->cmap->n;
2579   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2580   PetscInt       *cmap  = mat->garray;
2581   PetscInt       *diagIdx, *offdiagIdx;
2582   Vec            diagV, offdiagV;
2583   PetscScalar    *a, *diagA, *offdiagA, *ba;
2584   PetscInt       r,j,col,ncols,*bi,*bj;
2585   PetscErrorCode ierr;
2586   Mat            B = mat->B;
2587   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2588 
2589   PetscFunctionBegin;
2590   /* When a process holds entire A and other processes have no entry */
2591   if (A->cmap->N == n) {
2592     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2593     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2594     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2595     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2596     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2597     PetscFunctionReturn(0);
2598   } else if (n == 0) {
2599     if (m) {
2600       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2601       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2602       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2603     }
2604     PetscFunctionReturn(0);
2605   }
2606 
2607   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2608   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2609   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2610   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2611 
2612   /* Get offdiagIdx[] for implicit 0.0 */
2613   ba = b->a;
2614   bi = b->i;
2615   bj = b->j;
2616   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2617   for (r = 0; r < m; r++) {
2618     ncols = bi[r+1] - bi[r];
2619     if (ncols == A->cmap->N - n) { /* Brow is dense */
2620       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2621     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2622       offdiagA[r] = 0.0;
2623 
2624       /* Find first hole in the cmap */
2625       for (j=0; j<ncols; j++) {
2626         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2627         if (col > j && j < cstart) {
2628           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2629           break;
2630         } else if (col > j + n && j >= cstart) {
2631           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2632           break;
2633         }
2634       }
2635       if (j == ncols && ncols < A->cmap->N - n) {
2636         /* a hole is outside compressed Bcols */
2637         if (ncols == 0) {
2638           if (cstart) {
2639             offdiagIdx[r] = 0;
2640           } else offdiagIdx[r] = cend;
2641         } else { /* ncols > 0 */
2642           offdiagIdx[r] = cmap[ncols-1] + 1;
2643           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2644         }
2645       }
2646     }
2647 
2648     for (j=0; j<ncols; j++) {
2649       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2650       ba++; bj++;
2651     }
2652   }
2653 
2654   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2655   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2656   for (r = 0; r < m; ++r) {
2657     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2658       a[r] = diagA[r];
2659       if (idx) idx[r] = cstart + diagIdx[r];
2660     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2661       a[r] = diagA[r];
2662       if (idx) {
2663         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2664           idx[r] = cstart + diagIdx[r];
2665         } else idx[r] = offdiagIdx[r];
2666       }
2667     } else {
2668       a[r] = offdiagA[r];
2669       if (idx) idx[r] = offdiagIdx[r];
2670     }
2671   }
2672   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2673   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2674   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2675   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2676   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2677   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2678   PetscFunctionReturn(0);
2679 }
2680 
2681 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2682 {
2683   PetscErrorCode ierr;
2684   Mat            *dummy;
2685 
2686   PetscFunctionBegin;
2687   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2688   *newmat = *dummy;
2689   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2690   PetscFunctionReturn(0);
2691 }
2692 
2693 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2694 {
2695   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2696   PetscErrorCode ierr;
2697 
2698   PetscFunctionBegin;
2699   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2700   A->factorerrortype = a->A->factorerrortype;
2701   PetscFunctionReturn(0);
2702 }
2703 
2704 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2705 {
2706   PetscErrorCode ierr;
2707   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2708 
2709   PetscFunctionBegin;
2710   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2711   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2712   if (x->assembled) {
2713     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2714   } else {
2715     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2716   }
2717   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2718   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2719   PetscFunctionReturn(0);
2720 }
2721 
2722 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2723 {
2724   PetscFunctionBegin;
2725   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2726   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2727   PetscFunctionReturn(0);
2728 }
2729 
2730 /*@
2731    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2732 
2733    Collective on Mat
2734 
2735    Input Parameters:
2736 +    A - the matrix
2737 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2738 
2739  Level: advanced
2740 
2741 @*/
2742 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2743 {
2744   PetscErrorCode       ierr;
2745 
2746   PetscFunctionBegin;
2747   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2748   PetscFunctionReturn(0);
2749 }
2750 
2751 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2752 {
2753   PetscErrorCode       ierr;
2754   PetscBool            sc = PETSC_FALSE,flg;
2755 
2756   PetscFunctionBegin;
2757   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2758   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2759   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2760   if (flg) {
2761     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2762   }
2763   ierr = PetscOptionsTail();CHKERRQ(ierr);
2764   PetscFunctionReturn(0);
2765 }
2766 
2767 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2768 {
2769   PetscErrorCode ierr;
2770   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2771   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2772 
2773   PetscFunctionBegin;
2774   if (!Y->preallocated) {
2775     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2776   } else if (!aij->nz) {
2777     PetscInt nonew = aij->nonew;
2778     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2779     aij->nonew = nonew;
2780   }
2781   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2782   PetscFunctionReturn(0);
2783 }
2784 
2785 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2786 {
2787   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2788   PetscErrorCode ierr;
2789 
2790   PetscFunctionBegin;
2791   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2792   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2793   if (d) {
2794     PetscInt rstart;
2795     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2796     *d += rstart;
2797 
2798   }
2799   PetscFunctionReturn(0);
2800 }
2801 
2802 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2803 {
2804   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2805   PetscErrorCode ierr;
2806 
2807   PetscFunctionBegin;
2808   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2809   PetscFunctionReturn(0);
2810 }
2811 
2812 /* -------------------------------------------------------------------*/
2813 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2814                                        MatGetRow_MPIAIJ,
2815                                        MatRestoreRow_MPIAIJ,
2816                                        MatMult_MPIAIJ,
2817                                 /* 4*/ MatMultAdd_MPIAIJ,
2818                                        MatMultTranspose_MPIAIJ,
2819                                        MatMultTransposeAdd_MPIAIJ,
2820                                        NULL,
2821                                        NULL,
2822                                        NULL,
2823                                 /*10*/ NULL,
2824                                        NULL,
2825                                        NULL,
2826                                        MatSOR_MPIAIJ,
2827                                        MatTranspose_MPIAIJ,
2828                                 /*15*/ MatGetInfo_MPIAIJ,
2829                                        MatEqual_MPIAIJ,
2830                                        MatGetDiagonal_MPIAIJ,
2831                                        MatDiagonalScale_MPIAIJ,
2832                                        MatNorm_MPIAIJ,
2833                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2834                                        MatAssemblyEnd_MPIAIJ,
2835                                        MatSetOption_MPIAIJ,
2836                                        MatZeroEntries_MPIAIJ,
2837                                 /*24*/ MatZeroRows_MPIAIJ,
2838                                        NULL,
2839                                        NULL,
2840                                        NULL,
2841                                        NULL,
2842                                 /*29*/ MatSetUp_MPIAIJ,
2843                                        NULL,
2844                                        NULL,
2845                                        MatGetDiagonalBlock_MPIAIJ,
2846                                        NULL,
2847                                 /*34*/ MatDuplicate_MPIAIJ,
2848                                        NULL,
2849                                        NULL,
2850                                        NULL,
2851                                        NULL,
2852                                 /*39*/ MatAXPY_MPIAIJ,
2853                                        MatCreateSubMatrices_MPIAIJ,
2854                                        MatIncreaseOverlap_MPIAIJ,
2855                                        MatGetValues_MPIAIJ,
2856                                        MatCopy_MPIAIJ,
2857                                 /*44*/ MatGetRowMax_MPIAIJ,
2858                                        MatScale_MPIAIJ,
2859                                        MatShift_MPIAIJ,
2860                                        MatDiagonalSet_MPIAIJ,
2861                                        MatZeroRowsColumns_MPIAIJ,
2862                                 /*49*/ MatSetRandom_MPIAIJ,
2863                                        NULL,
2864                                        NULL,
2865                                        NULL,
2866                                        NULL,
2867                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2868                                        NULL,
2869                                        MatSetUnfactored_MPIAIJ,
2870                                        MatPermute_MPIAIJ,
2871                                        NULL,
2872                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2873                                        MatDestroy_MPIAIJ,
2874                                        MatView_MPIAIJ,
2875                                        NULL,
2876                                        NULL,
2877                                 /*64*/ NULL,
2878                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2879                                        NULL,
2880                                        NULL,
2881                                        NULL,
2882                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2883                                        MatGetRowMinAbs_MPIAIJ,
2884                                        NULL,
2885                                        NULL,
2886                                        NULL,
2887                                        NULL,
2888                                 /*75*/ MatFDColoringApply_AIJ,
2889                                        MatSetFromOptions_MPIAIJ,
2890                                        NULL,
2891                                        NULL,
2892                                        MatFindZeroDiagonals_MPIAIJ,
2893                                 /*80*/ NULL,
2894                                        NULL,
2895                                        NULL,
2896                                 /*83*/ MatLoad_MPIAIJ,
2897                                        MatIsSymmetric_MPIAIJ,
2898                                        NULL,
2899                                        NULL,
2900                                        NULL,
2901                                        NULL,
2902                                 /*89*/ NULL,
2903                                        NULL,
2904                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2905                                        NULL,
2906                                        NULL,
2907                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2908                                        NULL,
2909                                        NULL,
2910                                        NULL,
2911                                        MatBindToCPU_MPIAIJ,
2912                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2913                                        NULL,
2914                                        NULL,
2915                                        MatConjugate_MPIAIJ,
2916                                        NULL,
2917                                 /*104*/MatSetValuesRow_MPIAIJ,
2918                                        MatRealPart_MPIAIJ,
2919                                        MatImaginaryPart_MPIAIJ,
2920                                        NULL,
2921                                        NULL,
2922                                 /*109*/NULL,
2923                                        NULL,
2924                                        MatGetRowMin_MPIAIJ,
2925                                        NULL,
2926                                        MatMissingDiagonal_MPIAIJ,
2927                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2928                                        NULL,
2929                                        MatGetGhosts_MPIAIJ,
2930                                        NULL,
2931                                        NULL,
2932                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2933                                        NULL,
2934                                        NULL,
2935                                        NULL,
2936                                        MatGetMultiProcBlock_MPIAIJ,
2937                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2938                                        MatGetColumnNorms_MPIAIJ,
2939                                        MatInvertBlockDiagonal_MPIAIJ,
2940                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2941                                        MatCreateSubMatricesMPI_MPIAIJ,
2942                                 /*129*/NULL,
2943                                        NULL,
2944                                        NULL,
2945                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2946                                        NULL,
2947                                 /*134*/NULL,
2948                                        NULL,
2949                                        NULL,
2950                                        NULL,
2951                                        NULL,
2952                                 /*139*/MatSetBlockSizes_MPIAIJ,
2953                                        NULL,
2954                                        NULL,
2955                                        MatFDColoringSetUp_MPIXAIJ,
2956                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2957                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2958                                 /*145*/NULL,
2959                                        NULL,
2960                                        NULL
2961 };
2962 
2963 /* ----------------------------------------------------------------------------------------*/
2964 
2965 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2966 {
2967   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2968   PetscErrorCode ierr;
2969 
2970   PetscFunctionBegin;
2971   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2972   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2973   PetscFunctionReturn(0);
2974 }
2975 
2976 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2977 {
2978   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2979   PetscErrorCode ierr;
2980 
2981   PetscFunctionBegin;
2982   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2983   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2984   PetscFunctionReturn(0);
2985 }
2986 
2987 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2988 {
2989   Mat_MPIAIJ     *b;
2990   PetscErrorCode ierr;
2991   PetscMPIInt    size;
2992 
2993   PetscFunctionBegin;
2994   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2995   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2996   b = (Mat_MPIAIJ*)B->data;
2997 
2998 #if defined(PETSC_USE_CTABLE)
2999   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
3000 #else
3001   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
3002 #endif
3003   ierr = PetscFree(b->garray);CHKERRQ(ierr);
3004   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
3005   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
3006 
3007   /* Because the B will have been resized we simply destroy it and create a new one each time */
3008   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
3009   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3010   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3011   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
3012   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3013   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3014   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3015 
3016   if (!B->preallocated) {
3017     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3018     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3019     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3020     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3021     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3022   }
3023 
3024   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3025   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3026   B->preallocated  = PETSC_TRUE;
3027   B->was_assembled = PETSC_FALSE;
3028   B->assembled     = PETSC_FALSE;
3029   PetscFunctionReturn(0);
3030 }
3031 
3032 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
3033 {
3034   Mat_MPIAIJ     *b;
3035   PetscErrorCode ierr;
3036 
3037   PetscFunctionBegin;
3038   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3039   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3040   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3041   b = (Mat_MPIAIJ*)B->data;
3042 
3043 #if defined(PETSC_USE_CTABLE)
3044   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
3045 #else
3046   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
3047 #endif
3048   ierr = PetscFree(b->garray);CHKERRQ(ierr);
3049   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
3050   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
3051 
3052   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
3053   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
3054   B->preallocated  = PETSC_TRUE;
3055   B->was_assembled = PETSC_FALSE;
3056   B->assembled = PETSC_FALSE;
3057   PetscFunctionReturn(0);
3058 }
3059 
3060 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3061 {
3062   Mat            mat;
3063   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3064   PetscErrorCode ierr;
3065 
3066   PetscFunctionBegin;
3067   *newmat = NULL;
3068   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3069   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3070   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3071   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3072   a       = (Mat_MPIAIJ*)mat->data;
3073 
3074   mat->factortype   = matin->factortype;
3075   mat->assembled    = matin->assembled;
3076   mat->insertmode   = NOT_SET_VALUES;
3077   mat->preallocated = matin->preallocated;
3078 
3079   a->size         = oldmat->size;
3080   a->rank         = oldmat->rank;
3081   a->donotstash   = oldmat->donotstash;
3082   a->roworiented  = oldmat->roworiented;
3083   a->rowindices   = NULL;
3084   a->rowvalues    = NULL;
3085   a->getrowactive = PETSC_FALSE;
3086 
3087   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3088   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3089 
3090   if (oldmat->colmap) {
3091 #if defined(PETSC_USE_CTABLE)
3092     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3093 #else
3094     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
3095     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3096     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
3097 #endif
3098   } else a->colmap = NULL;
3099   if (oldmat->garray) {
3100     PetscInt len;
3101     len  = oldmat->B->cmap->n;
3102     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
3103     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3104     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
3105   } else a->garray = NULL;
3106 
3107   /* It may happen MatDuplicate is called with a non-assembled matrix
3108      In fact, MatDuplicate only requires the matrix to be preallocated
3109      This may happen inside a DMCreateMatrix_Shell */
3110   if (oldmat->lvec) {
3111     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3112     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3113   }
3114   if (oldmat->Mvctx) {
3115     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3116     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3117   }
3118   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3119   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3120   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3121   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3122   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3123   *newmat = mat;
3124   PetscFunctionReturn(0);
3125 }
3126 
3127 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3128 {
3129   PetscBool      isbinary, ishdf5;
3130   PetscErrorCode ierr;
3131 
3132   PetscFunctionBegin;
3133   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
3134   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
3135   /* force binary viewer to load .info file if it has not yet done so */
3136   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3137   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3138   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
3139   if (isbinary) {
3140     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
3141   } else if (ishdf5) {
3142 #if defined(PETSC_HAVE_HDF5)
3143     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
3144 #else
3145     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3146 #endif
3147   } else {
3148     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3149   }
3150   PetscFunctionReturn(0);
3151 }
3152 
3153 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3154 {
3155   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3156   PetscInt       *rowidxs,*colidxs;
3157   PetscScalar    *matvals;
3158   PetscErrorCode ierr;
3159 
3160   PetscFunctionBegin;
3161   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3162 
3163   /* read in matrix header */
3164   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3165   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3166   M  = header[1]; N = header[2]; nz = header[3];
3167   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3168   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3169   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3170 
3171   /* set block sizes from the viewer's .info file */
3172   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3173   /* set global sizes if not set already */
3174   if (mat->rmap->N < 0) mat->rmap->N = M;
3175   if (mat->cmap->N < 0) mat->cmap->N = N;
3176   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3177   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3178 
3179   /* check if the matrix sizes are correct */
3180   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3181   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3182 
3183   /* read in row lengths and build row indices */
3184   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3185   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3186   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3187   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3188   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
3189   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3190   /* read in column indices and matrix values */
3191   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3192   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3193   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3194   /* store matrix indices and values */
3195   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3196   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3197   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3198   PetscFunctionReturn(0);
3199 }
3200 
3201 /* Not scalable because of ISAllGather() unless getting all columns. */
3202 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3203 {
3204   PetscErrorCode ierr;
3205   IS             iscol_local;
3206   PetscBool      isstride;
3207   PetscMPIInt    lisstride=0,gisstride;
3208 
3209   PetscFunctionBegin;
3210   /* check if we are grabbing all columns*/
3211   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3212 
3213   if (isstride) {
3214     PetscInt  start,len,mstart,mlen;
3215     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3216     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3217     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3218     if (mstart == start && mlen-mstart == len) lisstride = 1;
3219   }
3220 
3221   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3222   if (gisstride) {
3223     PetscInt N;
3224     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3225     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3226     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3227     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3228   } else {
3229     PetscInt cbs;
3230     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3231     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3232     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3233   }
3234 
3235   *isseq = iscol_local;
3236   PetscFunctionReturn(0);
3237 }
3238 
3239 /*
3240  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3241  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3242 
3243  Input Parameters:
3244    mat - matrix
3245    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3246            i.e., mat->rstart <= isrow[i] < mat->rend
3247    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3248            i.e., mat->cstart <= iscol[i] < mat->cend
3249  Output Parameter:
3250    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3251    iscol_o - sequential column index set for retrieving mat->B
3252    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3253  */
3254 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3255 {
3256   PetscErrorCode ierr;
3257   Vec            x,cmap;
3258   const PetscInt *is_idx;
3259   PetscScalar    *xarray,*cmaparray;
3260   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3261   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3262   Mat            B=a->B;
3263   Vec            lvec=a->lvec,lcmap;
3264   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3265   MPI_Comm       comm;
3266   VecScatter     Mvctx=a->Mvctx;
3267 
3268   PetscFunctionBegin;
3269   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3270   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3271 
3272   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3273   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3274   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3275   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3276   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3277 
3278   /* Get start indices */
3279   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3280   isstart -= ncols;
3281   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3282 
3283   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3284   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3285   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3286   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3287   for (i=0; i<ncols; i++) {
3288     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3289     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3290     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3291   }
3292   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3293   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3294   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3295 
3296   /* Get iscol_d */
3297   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3298   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3299   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3300 
3301   /* Get isrow_d */
3302   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3303   rstart = mat->rmap->rstart;
3304   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3305   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3306   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3307   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3308 
3309   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3310   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3311   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3312 
3313   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3314   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3315   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3316 
3317   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3318 
3319   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3320   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3321 
3322   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3323   /* off-process column indices */
3324   count = 0;
3325   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3326   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3327 
3328   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3329   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3330   for (i=0; i<Bn; i++) {
3331     if (PetscRealPart(xarray[i]) > -1.0) {
3332       idx[count]     = i;                   /* local column index in off-diagonal part B */
3333       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3334       count++;
3335     }
3336   }
3337   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3338   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3339 
3340   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3341   /* cannot ensure iscol_o has same blocksize as iscol! */
3342 
3343   ierr = PetscFree(idx);CHKERRQ(ierr);
3344   *garray = cmap1;
3345 
3346   ierr = VecDestroy(&x);CHKERRQ(ierr);
3347   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3348   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3349   PetscFunctionReturn(0);
3350 }
3351 
3352 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3353 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3354 {
3355   PetscErrorCode ierr;
3356   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3357   Mat            M = NULL;
3358   MPI_Comm       comm;
3359   IS             iscol_d,isrow_d,iscol_o;
3360   Mat            Asub = NULL,Bsub = NULL;
3361   PetscInt       n;
3362 
3363   PetscFunctionBegin;
3364   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3365 
3366   if (call == MAT_REUSE_MATRIX) {
3367     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3368     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3369     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3370 
3371     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3372     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3373 
3374     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3375     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3376 
3377     /* Update diagonal and off-diagonal portions of submat */
3378     asub = (Mat_MPIAIJ*)(*submat)->data;
3379     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3380     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3381     if (n) {
3382       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3383     }
3384     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3385     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3386 
3387   } else { /* call == MAT_INITIAL_MATRIX) */
3388     const PetscInt *garray;
3389     PetscInt        BsubN;
3390 
3391     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3392     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3393 
3394     /* Create local submatrices Asub and Bsub */
3395     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3396     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3397 
3398     /* Create submatrix M */
3399     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3400 
3401     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3402     asub = (Mat_MPIAIJ*)M->data;
3403 
3404     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3405     n = asub->B->cmap->N;
3406     if (BsubN > n) {
3407       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3408       const PetscInt *idx;
3409       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3410       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3411 
3412       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3413       j = 0;
3414       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3415       for (i=0; i<n; i++) {
3416         if (j >= BsubN) break;
3417         while (subgarray[i] > garray[j]) j++;
3418 
3419         if (subgarray[i] == garray[j]) {
3420           idx_new[i] = idx[j++];
3421         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3422       }
3423       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3424 
3425       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3426       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3427 
3428     } else if (BsubN < n) {
3429       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3430     }
3431 
3432     ierr = PetscFree(garray);CHKERRQ(ierr);
3433     *submat = M;
3434 
3435     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3436     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3437     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3438 
3439     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3440     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3441 
3442     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3443     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3444   }
3445   PetscFunctionReturn(0);
3446 }
3447 
3448 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3449 {
3450   PetscErrorCode ierr;
3451   IS             iscol_local=NULL,isrow_d;
3452   PetscInt       csize;
3453   PetscInt       n,i,j,start,end;
3454   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3455   MPI_Comm       comm;
3456 
3457   PetscFunctionBegin;
3458   /* If isrow has same processor distribution as mat,
3459      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3460   if (call == MAT_REUSE_MATRIX) {
3461     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3462     if (isrow_d) {
3463       sameRowDist  = PETSC_TRUE;
3464       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3465     } else {
3466       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3467       if (iscol_local) {
3468         sameRowDist  = PETSC_TRUE;
3469         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3470       }
3471     }
3472   } else {
3473     /* Check if isrow has same processor distribution as mat */
3474     sameDist[0] = PETSC_FALSE;
3475     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3476     if (!n) {
3477       sameDist[0] = PETSC_TRUE;
3478     } else {
3479       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3480       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3481       if (i >= start && j < end) {
3482         sameDist[0] = PETSC_TRUE;
3483       }
3484     }
3485 
3486     /* Check if iscol has same processor distribution as mat */
3487     sameDist[1] = PETSC_FALSE;
3488     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3489     if (!n) {
3490       sameDist[1] = PETSC_TRUE;
3491     } else {
3492       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3493       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3494       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3495     }
3496 
3497     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3498     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3499     sameRowDist = tsameDist[0];
3500   }
3501 
3502   if (sameRowDist) {
3503     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3504       /* isrow and iscol have same processor distribution as mat */
3505       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3506       PetscFunctionReturn(0);
3507     } else { /* sameRowDist */
3508       /* isrow has same processor distribution as mat */
3509       if (call == MAT_INITIAL_MATRIX) {
3510         PetscBool sorted;
3511         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3512         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3513         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3514         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3515 
3516         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3517         if (sorted) {
3518           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3519           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3520           PetscFunctionReturn(0);
3521         }
3522       } else { /* call == MAT_REUSE_MATRIX */
3523         IS iscol_sub;
3524         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3525         if (iscol_sub) {
3526           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3527           PetscFunctionReturn(0);
3528         }
3529       }
3530     }
3531   }
3532 
3533   /* General case: iscol -> iscol_local which has global size of iscol */
3534   if (call == MAT_REUSE_MATRIX) {
3535     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3536     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3537   } else {
3538     if (!iscol_local) {
3539       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3540     }
3541   }
3542 
3543   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3544   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3545 
3546   if (call == MAT_INITIAL_MATRIX) {
3547     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3548     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3549   }
3550   PetscFunctionReturn(0);
3551 }
3552 
3553 /*@C
3554      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3555          and "off-diagonal" part of the matrix in CSR format.
3556 
3557    Collective
3558 
3559    Input Parameters:
3560 +  comm - MPI communicator
3561 .  A - "diagonal" portion of matrix
3562 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3563 -  garray - global index of B columns
3564 
3565    Output Parameter:
3566 .   mat - the matrix, with input A as its local diagonal matrix
3567    Level: advanced
3568 
3569    Notes:
3570        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3571        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3572 
3573 .seealso: MatCreateMPIAIJWithSplitArrays()
3574 @*/
3575 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3576 {
3577   PetscErrorCode ierr;
3578   Mat_MPIAIJ     *maij;
3579   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3580   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3581   PetscScalar    *oa=b->a;
3582   Mat            Bnew;
3583   PetscInt       m,n,N;
3584 
3585   PetscFunctionBegin;
3586   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3587   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3588   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3589   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3590   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3591   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3592 
3593   /* Get global columns of mat */
3594   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3595 
3596   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3597   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3598   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3599   maij = (Mat_MPIAIJ*)(*mat)->data;
3600 
3601   (*mat)->preallocated = PETSC_TRUE;
3602 
3603   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3604   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3605 
3606   /* Set A as diagonal portion of *mat */
3607   maij->A = A;
3608 
3609   nz = oi[m];
3610   for (i=0; i<nz; i++) {
3611     col   = oj[i];
3612     oj[i] = garray[col];
3613   }
3614 
3615    /* Set Bnew as off-diagonal portion of *mat */
3616   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3617   bnew        = (Mat_SeqAIJ*)Bnew->data;
3618   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3619   maij->B     = Bnew;
3620 
3621   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3622 
3623   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3624   b->free_a       = PETSC_FALSE;
3625   b->free_ij      = PETSC_FALSE;
3626   ierr = MatDestroy(&B);CHKERRQ(ierr);
3627 
3628   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3629   bnew->free_a       = PETSC_TRUE;
3630   bnew->free_ij      = PETSC_TRUE;
3631 
3632   /* condense columns of maij->B */
3633   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3634   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3635   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3636   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3637   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3638   PetscFunctionReturn(0);
3639 }
3640 
3641 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3642 
3643 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3644 {
3645   PetscErrorCode ierr;
3646   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3647   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3648   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3649   Mat            M,Msub,B=a->B;
3650   MatScalar      *aa;
3651   Mat_SeqAIJ     *aij;
3652   PetscInt       *garray = a->garray,*colsub,Ncols;
3653   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3654   IS             iscol_sub,iscmap;
3655   const PetscInt *is_idx,*cmap;
3656   PetscBool      allcolumns=PETSC_FALSE;
3657   MPI_Comm       comm;
3658 
3659   PetscFunctionBegin;
3660   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3661   if (call == MAT_REUSE_MATRIX) {
3662     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3663     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3664     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3665 
3666     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3667     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3668 
3669     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3670     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3671 
3672     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3673 
3674   } else { /* call == MAT_INITIAL_MATRIX) */
3675     PetscBool flg;
3676 
3677     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3678     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3679 
3680     /* (1) iscol -> nonscalable iscol_local */
3681     /* Check for special case: each processor gets entire matrix columns */
3682     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3683     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3684     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3685     if (allcolumns) {
3686       iscol_sub = iscol_local;
3687       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3688       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3689 
3690     } else {
3691       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3692       PetscInt *idx,*cmap1,k;
3693       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3694       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3695       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3696       count = 0;
3697       k     = 0;
3698       for (i=0; i<Ncols; i++) {
3699         j = is_idx[i];
3700         if (j >= cstart && j < cend) {
3701           /* diagonal part of mat */
3702           idx[count]     = j;
3703           cmap1[count++] = i; /* column index in submat */
3704         } else if (Bn) {
3705           /* off-diagonal part of mat */
3706           if (j == garray[k]) {
3707             idx[count]     = j;
3708             cmap1[count++] = i;  /* column index in submat */
3709           } else if (j > garray[k]) {
3710             while (j > garray[k] && k < Bn-1) k++;
3711             if (j == garray[k]) {
3712               idx[count]     = j;
3713               cmap1[count++] = i; /* column index in submat */
3714             }
3715           }
3716         }
3717       }
3718       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3719 
3720       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3721       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3722       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3723 
3724       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3725     }
3726 
3727     /* (3) Create sequential Msub */
3728     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3729   }
3730 
3731   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3732   aij  = (Mat_SeqAIJ*)(Msub)->data;
3733   ii   = aij->i;
3734   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3735 
3736   /*
3737       m - number of local rows
3738       Ncols - number of columns (same on all processors)
3739       rstart - first row in new global matrix generated
3740   */
3741   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3742 
3743   if (call == MAT_INITIAL_MATRIX) {
3744     /* (4) Create parallel newmat */
3745     PetscMPIInt    rank,size;
3746     PetscInt       csize;
3747 
3748     ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3749     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3750 
3751     /*
3752         Determine the number of non-zeros in the diagonal and off-diagonal
3753         portions of the matrix in order to do correct preallocation
3754     */
3755 
3756     /* first get start and end of "diagonal" columns */
3757     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3758     if (csize == PETSC_DECIDE) {
3759       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3760       if (mglobal == Ncols) { /* square matrix */
3761         nlocal = m;
3762       } else {
3763         nlocal = Ncols/size + ((Ncols % size) > rank);
3764       }
3765     } else {
3766       nlocal = csize;
3767     }
3768     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3769     rstart = rend - nlocal;
3770     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3771 
3772     /* next, compute all the lengths */
3773     jj    = aij->j;
3774     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3775     olens = dlens + m;
3776     for (i=0; i<m; i++) {
3777       jend = ii[i+1] - ii[i];
3778       olen = 0;
3779       dlen = 0;
3780       for (j=0; j<jend; j++) {
3781         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3782         else dlen++;
3783         jj++;
3784       }
3785       olens[i] = olen;
3786       dlens[i] = dlen;
3787     }
3788 
3789     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3790     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3791 
3792     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3793     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3794     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3795     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3796     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3797     ierr = PetscFree(dlens);CHKERRQ(ierr);
3798 
3799   } else { /* call == MAT_REUSE_MATRIX */
3800     M    = *newmat;
3801     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3802     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3803     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3804     /*
3805          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3806        rather than the slower MatSetValues().
3807     */
3808     M->was_assembled = PETSC_TRUE;
3809     M->assembled     = PETSC_FALSE;
3810   }
3811 
3812   /* (5) Set values of Msub to *newmat */
3813   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3814   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3815 
3816   jj   = aij->j;
3817   ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3818   for (i=0; i<m; i++) {
3819     row = rstart + i;
3820     nz  = ii[i+1] - ii[i];
3821     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3822     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3823     jj += nz; aa += nz;
3824   }
3825   ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3826   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3827 
3828   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3829   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3830 
3831   ierr = PetscFree(colsub);CHKERRQ(ierr);
3832 
3833   /* save Msub, iscol_sub and iscmap used in processor for next request */
3834   if (call == MAT_INITIAL_MATRIX) {
3835     *newmat = M;
3836     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3837     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3838 
3839     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3840     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3841 
3842     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3843     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3844 
3845     if (iscol_local) {
3846       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3847       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3848     }
3849   }
3850   PetscFunctionReturn(0);
3851 }
3852 
3853 /*
3854     Not great since it makes two copies of the submatrix, first an SeqAIJ
3855   in local and then by concatenating the local matrices the end result.
3856   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3857 
3858   Note: This requires a sequential iscol with all indices.
3859 */
3860 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3861 {
3862   PetscErrorCode ierr;
3863   PetscMPIInt    rank,size;
3864   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3865   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3866   Mat            M,Mreuse;
3867   MatScalar      *aa,*vwork;
3868   MPI_Comm       comm;
3869   Mat_SeqAIJ     *aij;
3870   PetscBool      colflag,allcolumns=PETSC_FALSE;
3871 
3872   PetscFunctionBegin;
3873   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3874   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3875   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3876 
3877   /* Check for special case: each processor gets entire matrix columns */
3878   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3879   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3880   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3881   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3882 
3883   if (call ==  MAT_REUSE_MATRIX) {
3884     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3885     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3886     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3887   } else {
3888     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3889   }
3890 
3891   /*
3892       m - number of local rows
3893       n - number of columns (same on all processors)
3894       rstart - first row in new global matrix generated
3895   */
3896   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3897   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3898   if (call == MAT_INITIAL_MATRIX) {
3899     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3900     ii  = aij->i;
3901     jj  = aij->j;
3902 
3903     /*
3904         Determine the number of non-zeros in the diagonal and off-diagonal
3905         portions of the matrix in order to do correct preallocation
3906     */
3907 
3908     /* first get start and end of "diagonal" columns */
3909     if (csize == PETSC_DECIDE) {
3910       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3911       if (mglobal == n) { /* square matrix */
3912         nlocal = m;
3913       } else {
3914         nlocal = n/size + ((n % size) > rank);
3915       }
3916     } else {
3917       nlocal = csize;
3918     }
3919     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3920     rstart = rend - nlocal;
3921     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3922 
3923     /* next, compute all the lengths */
3924     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3925     olens = dlens + m;
3926     for (i=0; i<m; i++) {
3927       jend = ii[i+1] - ii[i];
3928       olen = 0;
3929       dlen = 0;
3930       for (j=0; j<jend; j++) {
3931         if (*jj < rstart || *jj >= rend) olen++;
3932         else dlen++;
3933         jj++;
3934       }
3935       olens[i] = olen;
3936       dlens[i] = dlen;
3937     }
3938     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3939     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3940     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3941     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3942     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3943     ierr = PetscFree(dlens);CHKERRQ(ierr);
3944   } else {
3945     PetscInt ml,nl;
3946 
3947     M    = *newmat;
3948     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3949     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3950     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3951     /*
3952          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3953        rather than the slower MatSetValues().
3954     */
3955     M->was_assembled = PETSC_TRUE;
3956     M->assembled     = PETSC_FALSE;
3957   }
3958   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3959   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3960   ii   = aij->i;
3961   jj   = aij->j;
3962 
3963   /* trigger copy to CPU if needed */
3964   ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3965   for (i=0; i<m; i++) {
3966     row   = rstart + i;
3967     nz    = ii[i+1] - ii[i];
3968     cwork = jj; jj += nz;
3969     vwork = aa; aa += nz;
3970     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3971   }
3972   ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3973 
3974   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3975   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3976   *newmat = M;
3977 
3978   /* save submatrix used in processor for next request */
3979   if (call ==  MAT_INITIAL_MATRIX) {
3980     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3981     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3982   }
3983   PetscFunctionReturn(0);
3984 }
3985 
3986 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3987 {
3988   PetscInt       m,cstart, cend,j,nnz,i,d;
3989   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3990   const PetscInt *JJ;
3991   PetscErrorCode ierr;
3992   PetscBool      nooffprocentries;
3993 
3994   PetscFunctionBegin;
3995   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3996 
3997   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3998   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3999   m      = B->rmap->n;
4000   cstart = B->cmap->rstart;
4001   cend   = B->cmap->rend;
4002   rstart = B->rmap->rstart;
4003 
4004   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
4005 
4006   if (PetscDefined(USE_DEBUG)) {
4007     for (i=0; i<m; i++) {
4008       nnz = Ii[i+1]- Ii[i];
4009       JJ  = J + Ii[i];
4010       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
4011       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
4012       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
4013     }
4014   }
4015 
4016   for (i=0; i<m; i++) {
4017     nnz     = Ii[i+1]- Ii[i];
4018     JJ      = J + Ii[i];
4019     nnz_max = PetscMax(nnz_max,nnz);
4020     d       = 0;
4021     for (j=0; j<nnz; j++) {
4022       if (cstart <= JJ[j] && JJ[j] < cend) d++;
4023     }
4024     d_nnz[i] = d;
4025     o_nnz[i] = nnz - d;
4026   }
4027   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
4028   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
4029 
4030   for (i=0; i<m; i++) {
4031     ii   = i + rstart;
4032     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
4033   }
4034   nooffprocentries    = B->nooffprocentries;
4035   B->nooffprocentries = PETSC_TRUE;
4036   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4037   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4038   B->nooffprocentries = nooffprocentries;
4039 
4040   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
4041   PetscFunctionReturn(0);
4042 }
4043 
4044 /*@
4045    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
4046    (the default parallel PETSc format).
4047 
4048    Collective
4049 
4050    Input Parameters:
4051 +  B - the matrix
4052 .  i - the indices into j for the start of each local row (starts with zero)
4053 .  j - the column indices for each local row (starts with zero)
4054 -  v - optional values in the matrix
4055 
4056    Level: developer
4057 
4058    Notes:
4059        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
4060      thus you CANNOT change the matrix entries by changing the values of v[] after you have
4061      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4062 
4063        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4064 
4065        The format which is used for the sparse matrix input, is equivalent to a
4066     row-major ordering.. i.e for the following matrix, the input data expected is
4067     as shown
4068 
4069 $        1 0 0
4070 $        2 0 3     P0
4071 $       -------
4072 $        4 5 6     P1
4073 $
4074 $     Process0 [P0]: rows_owned=[0,1]
4075 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4076 $        j =  {0,0,2}  [size = 3]
4077 $        v =  {1,2,3}  [size = 3]
4078 $
4079 $     Process1 [P1]: rows_owned=[2]
4080 $        i =  {0,3}    [size = nrow+1  = 1+1]
4081 $        j =  {0,1,2}  [size = 3]
4082 $        v =  {4,5,6}  [size = 3]
4083 
4084 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4085           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4086 @*/
4087 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4088 {
4089   PetscErrorCode ierr;
4090 
4091   PetscFunctionBegin;
4092   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4093   PetscFunctionReturn(0);
4094 }
4095 
4096 /*@C
4097    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4098    (the default parallel PETSc format).  For good matrix assembly performance
4099    the user should preallocate the matrix storage by setting the parameters
4100    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4101    performance can be increased by more than a factor of 50.
4102 
4103    Collective
4104 
4105    Input Parameters:
4106 +  B - the matrix
4107 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4108            (same value is used for all local rows)
4109 .  d_nnz - array containing the number of nonzeros in the various rows of the
4110            DIAGONAL portion of the local submatrix (possibly different for each row)
4111            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4112            The size of this array is equal to the number of local rows, i.e 'm'.
4113            For matrices that will be factored, you must leave room for (and set)
4114            the diagonal entry even if it is zero.
4115 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4116            submatrix (same value is used for all local rows).
4117 -  o_nnz - array containing the number of nonzeros in the various rows of the
4118            OFF-DIAGONAL portion of the local submatrix (possibly different for
4119            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4120            structure. The size of this array is equal to the number
4121            of local rows, i.e 'm'.
4122 
4123    If the *_nnz parameter is given then the *_nz parameter is ignored
4124 
4125    The AIJ format (also called the Yale sparse matrix format or
4126    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4127    storage.  The stored row and column indices begin with zero.
4128    See Users-Manual: ch_mat for details.
4129 
4130    The parallel matrix is partitioned such that the first m0 rows belong to
4131    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4132    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4133 
4134    The DIAGONAL portion of the local submatrix of a processor can be defined
4135    as the submatrix which is obtained by extraction the part corresponding to
4136    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4137    first row that belongs to the processor, r2 is the last row belonging to
4138    the this processor, and c1-c2 is range of indices of the local part of a
4139    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4140    common case of a square matrix, the row and column ranges are the same and
4141    the DIAGONAL part is also square. The remaining portion of the local
4142    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4143 
4144    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4145 
4146    You can call MatGetInfo() to get information on how effective the preallocation was;
4147    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4148    You can also run with the option -info and look for messages with the string
4149    malloc in them to see if additional memory allocation was needed.
4150 
4151    Example usage:
4152 
4153    Consider the following 8x8 matrix with 34 non-zero values, that is
4154    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4155    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4156    as follows:
4157 
4158 .vb
4159             1  2  0  |  0  3  0  |  0  4
4160     Proc0   0  5  6  |  7  0  0  |  8  0
4161             9  0 10  | 11  0  0  | 12  0
4162     -------------------------------------
4163            13  0 14  | 15 16 17  |  0  0
4164     Proc1   0 18  0  | 19 20 21  |  0  0
4165             0  0  0  | 22 23  0  | 24  0
4166     -------------------------------------
4167     Proc2  25 26 27  |  0  0 28  | 29  0
4168            30  0  0  | 31 32 33  |  0 34
4169 .ve
4170 
4171    This can be represented as a collection of submatrices as:
4172 
4173 .vb
4174       A B C
4175       D E F
4176       G H I
4177 .ve
4178 
4179    Where the submatrices A,B,C are owned by proc0, D,E,F are
4180    owned by proc1, G,H,I are owned by proc2.
4181 
4182    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4183    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4184    The 'M','N' parameters are 8,8, and have the same values on all procs.
4185 
4186    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4187    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4188    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4189    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4190    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4191    matrix, ans [DF] as another SeqAIJ matrix.
4192 
4193    When d_nz, o_nz parameters are specified, d_nz storage elements are
4194    allocated for every row of the local diagonal submatrix, and o_nz
4195    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4196    One way to choose d_nz and o_nz is to use the max nonzerors per local
4197    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4198    In this case, the values of d_nz,o_nz are:
4199 .vb
4200      proc0 : dnz = 2, o_nz = 2
4201      proc1 : dnz = 3, o_nz = 2
4202      proc2 : dnz = 1, o_nz = 4
4203 .ve
4204    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4205    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4206    for proc3. i.e we are using 12+15+10=37 storage locations to store
4207    34 values.
4208 
4209    When d_nnz, o_nnz parameters are specified, the storage is specified
4210    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4211    In the above case the values for d_nnz,o_nnz are:
4212 .vb
4213      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4214      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4215      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4216 .ve
4217    Here the space allocated is sum of all the above values i.e 34, and
4218    hence pre-allocation is perfect.
4219 
4220    Level: intermediate
4221 
4222 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4223           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4224 @*/
4225 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4226 {
4227   PetscErrorCode ierr;
4228 
4229   PetscFunctionBegin;
4230   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4231   PetscValidType(B,1);
4232   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4233   PetscFunctionReturn(0);
4234 }
4235 
4236 /*@
4237      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4238          CSR format for the local rows.
4239 
4240    Collective
4241 
4242    Input Parameters:
4243 +  comm - MPI communicator
4244 .  m - number of local rows (Cannot be PETSC_DECIDE)
4245 .  n - This value should be the same as the local size used in creating the
4246        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4247        calculated if N is given) For square matrices n is almost always m.
4248 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4249 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4250 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4251 .   j - column indices
4252 -   a - matrix values
4253 
4254    Output Parameter:
4255 .   mat - the matrix
4256 
4257    Level: intermediate
4258 
4259    Notes:
4260        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4261      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4262      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4263 
4264        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4265 
4266        The format which is used for the sparse matrix input, is equivalent to a
4267     row-major ordering.. i.e for the following matrix, the input data expected is
4268     as shown
4269 
4270        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4271 
4272 $        1 0 0
4273 $        2 0 3     P0
4274 $       -------
4275 $        4 5 6     P1
4276 $
4277 $     Process0 [P0]: rows_owned=[0,1]
4278 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4279 $        j =  {0,0,2}  [size = 3]
4280 $        v =  {1,2,3}  [size = 3]
4281 $
4282 $     Process1 [P1]: rows_owned=[2]
4283 $        i =  {0,3}    [size = nrow+1  = 1+1]
4284 $        j =  {0,1,2}  [size = 3]
4285 $        v =  {4,5,6}  [size = 3]
4286 
4287 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4288           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4289 @*/
4290 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4291 {
4292   PetscErrorCode ierr;
4293 
4294   PetscFunctionBegin;
4295   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4296   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4297   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4298   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4299   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4300   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4301   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4302   PetscFunctionReturn(0);
4303 }
4304 
4305 /*@
4306      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4307          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4308 
4309    Collective
4310 
4311    Input Parameters:
4312 +  mat - the matrix
4313 .  m - number of local rows (Cannot be PETSC_DECIDE)
4314 .  n - This value should be the same as the local size used in creating the
4315        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4316        calculated if N is given) For square matrices n is almost always m.
4317 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4318 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4319 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4320 .  J - column indices
4321 -  v - matrix values
4322 
4323    Level: intermediate
4324 
4325 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4326           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4327 @*/
4328 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4329 {
4330   PetscErrorCode ierr;
4331   PetscInt       cstart,nnz,i,j;
4332   PetscInt       *ld;
4333   PetscBool      nooffprocentries;
4334   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4335   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4336   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4337   const PetscInt *Adi = Ad->i;
4338   PetscInt       ldi,Iii,md;
4339 
4340   PetscFunctionBegin;
4341   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4342   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4343   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4344   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4345 
4346   cstart = mat->cmap->rstart;
4347   if (!Aij->ld) {
4348     /* count number of entries below block diagonal */
4349     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4350     Aij->ld = ld;
4351     for (i=0; i<m; i++) {
4352       nnz  = Ii[i+1]- Ii[i];
4353       j     = 0;
4354       while  (J[j] < cstart && j < nnz) {j++;}
4355       J    += nnz;
4356       ld[i] = j;
4357     }
4358   } else {
4359     ld = Aij->ld;
4360   }
4361 
4362   for (i=0; i<m; i++) {
4363     nnz  = Ii[i+1]- Ii[i];
4364     Iii  = Ii[i];
4365     ldi  = ld[i];
4366     md   = Adi[i+1]-Adi[i];
4367     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4368     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4369     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4370     ad  += md;
4371     ao  += nnz - md;
4372   }
4373   nooffprocentries      = mat->nooffprocentries;
4374   mat->nooffprocentries = PETSC_TRUE;
4375   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4376   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4377   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4378   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4379   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4380   mat->nooffprocentries = nooffprocentries;
4381   PetscFunctionReturn(0);
4382 }
4383 
4384 /*@C
4385    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4386    (the default parallel PETSc format).  For good matrix assembly performance
4387    the user should preallocate the matrix storage by setting the parameters
4388    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4389    performance can be increased by more than a factor of 50.
4390 
4391    Collective
4392 
4393    Input Parameters:
4394 +  comm - MPI communicator
4395 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4396            This value should be the same as the local size used in creating the
4397            y vector for the matrix-vector product y = Ax.
4398 .  n - This value should be the same as the local size used in creating the
4399        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4400        calculated if N is given) For square matrices n is almost always m.
4401 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4402 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4403 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4404            (same value is used for all local rows)
4405 .  d_nnz - array containing the number of nonzeros in the various rows of the
4406            DIAGONAL portion of the local submatrix (possibly different for each row)
4407            or NULL, if d_nz is used to specify the nonzero structure.
4408            The size of this array is equal to the number of local rows, i.e 'm'.
4409 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4410            submatrix (same value is used for all local rows).
4411 -  o_nnz - array containing the number of nonzeros in the various rows of the
4412            OFF-DIAGONAL portion of the local submatrix (possibly different for
4413            each row) or NULL, if o_nz is used to specify the nonzero
4414            structure. The size of this array is equal to the number
4415            of local rows, i.e 'm'.
4416 
4417    Output Parameter:
4418 .  A - the matrix
4419 
4420    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4421    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4422    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4423 
4424    Notes:
4425    If the *_nnz parameter is given then the *_nz parameter is ignored
4426 
4427    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4428    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4429    storage requirements for this matrix.
4430 
4431    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4432    processor than it must be used on all processors that share the object for
4433    that argument.
4434 
4435    The user MUST specify either the local or global matrix dimensions
4436    (possibly both).
4437 
4438    The parallel matrix is partitioned across processors such that the
4439    first m0 rows belong to process 0, the next m1 rows belong to
4440    process 1, the next m2 rows belong to process 2 etc.. where
4441    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4442    values corresponding to [m x N] submatrix.
4443 
4444    The columns are logically partitioned with the n0 columns belonging
4445    to 0th partition, the next n1 columns belonging to the next
4446    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4447 
4448    The DIAGONAL portion of the local submatrix on any given processor
4449    is the submatrix corresponding to the rows and columns m,n
4450    corresponding to the given processor. i.e diagonal matrix on
4451    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4452    etc. The remaining portion of the local submatrix [m x (N-n)]
4453    constitute the OFF-DIAGONAL portion. The example below better
4454    illustrates this concept.
4455 
4456    For a square global matrix we define each processor's diagonal portion
4457    to be its local rows and the corresponding columns (a square submatrix);
4458    each processor's off-diagonal portion encompasses the remainder of the
4459    local matrix (a rectangular submatrix).
4460 
4461    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4462 
4463    When calling this routine with a single process communicator, a matrix of
4464    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4465    type of communicator, use the construction mechanism
4466 .vb
4467      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4468 .ve
4469 
4470 $     MatCreate(...,&A);
4471 $     MatSetType(A,MATMPIAIJ);
4472 $     MatSetSizes(A, m,n,M,N);
4473 $     MatMPIAIJSetPreallocation(A,...);
4474 
4475    By default, this format uses inodes (identical nodes) when possible.
4476    We search for consecutive rows with the same nonzero structure, thereby
4477    reusing matrix information to achieve increased efficiency.
4478 
4479    Options Database Keys:
4480 +  -mat_no_inode  - Do not use inodes
4481 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4482 
4483 
4484 
4485    Example usage:
4486 
4487    Consider the following 8x8 matrix with 34 non-zero values, that is
4488    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4489    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4490    as follows
4491 
4492 .vb
4493             1  2  0  |  0  3  0  |  0  4
4494     Proc0   0  5  6  |  7  0  0  |  8  0
4495             9  0 10  | 11  0  0  | 12  0
4496     -------------------------------------
4497            13  0 14  | 15 16 17  |  0  0
4498     Proc1   0 18  0  | 19 20 21  |  0  0
4499             0  0  0  | 22 23  0  | 24  0
4500     -------------------------------------
4501     Proc2  25 26 27  |  0  0 28  | 29  0
4502            30  0  0  | 31 32 33  |  0 34
4503 .ve
4504 
4505    This can be represented as a collection of submatrices as
4506 
4507 .vb
4508       A B C
4509       D E F
4510       G H I
4511 .ve
4512 
4513    Where the submatrices A,B,C are owned by proc0, D,E,F are
4514    owned by proc1, G,H,I are owned by proc2.
4515 
4516    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4517    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4518    The 'M','N' parameters are 8,8, and have the same values on all procs.
4519 
4520    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4521    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4522    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4523    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4524    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4525    matrix, ans [DF] as another SeqAIJ matrix.
4526 
4527    When d_nz, o_nz parameters are specified, d_nz storage elements are
4528    allocated for every row of the local diagonal submatrix, and o_nz
4529    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4530    One way to choose d_nz and o_nz is to use the max nonzerors per local
4531    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4532    In this case, the values of d_nz,o_nz are
4533 .vb
4534      proc0 : dnz = 2, o_nz = 2
4535      proc1 : dnz = 3, o_nz = 2
4536      proc2 : dnz = 1, o_nz = 4
4537 .ve
4538    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4539    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4540    for proc3. i.e we are using 12+15+10=37 storage locations to store
4541    34 values.
4542 
4543    When d_nnz, o_nnz parameters are specified, the storage is specified
4544    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4545    In the above case the values for d_nnz,o_nnz are
4546 .vb
4547      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4548      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4549      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4550 .ve
4551    Here the space allocated is sum of all the above values i.e 34, and
4552    hence pre-allocation is perfect.
4553 
4554    Level: intermediate
4555 
4556 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4557           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4558 @*/
4559 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4560 {
4561   PetscErrorCode ierr;
4562   PetscMPIInt    size;
4563 
4564   PetscFunctionBegin;
4565   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4566   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4567   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4568   if (size > 1) {
4569     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4570     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4571   } else {
4572     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4573     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4574   }
4575   PetscFunctionReturn(0);
4576 }
4577 
4578 /*@C
4579   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4580 
4581   Not collective
4582 
4583   Input Parameter:
4584 . A - The MPIAIJ matrix
4585 
4586   Output Parameters:
4587 + Ad - The local diagonal block as a SeqAIJ matrix
4588 . Ao - The local off-diagonal block as a SeqAIJ matrix
4589 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4590 
4591   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4592   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4593   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4594   local column numbers to global column numbers in the original matrix.
4595 
4596   Level: intermediate
4597 
4598 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4599 @*/
4600 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4601 {
4602   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4603   PetscBool      flg;
4604   PetscErrorCode ierr;
4605 
4606   PetscFunctionBegin;
4607   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4608   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4609   if (Ad)     *Ad     = a->A;
4610   if (Ao)     *Ao     = a->B;
4611   if (colmap) *colmap = a->garray;
4612   PetscFunctionReturn(0);
4613 }
4614 
4615 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4616 {
4617   PetscErrorCode ierr;
4618   PetscInt       m,N,i,rstart,nnz,Ii;
4619   PetscInt       *indx;
4620   PetscScalar    *values;
4621 
4622   PetscFunctionBegin;
4623   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4624   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4625     PetscInt       *dnz,*onz,sum,bs,cbs;
4626 
4627     if (n == PETSC_DECIDE) {
4628       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4629     }
4630     /* Check sum(n) = N */
4631     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4632     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4633 
4634     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4635     rstart -= m;
4636 
4637     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4638     for (i=0; i<m; i++) {
4639       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4640       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4641       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4642     }
4643 
4644     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4645     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4646     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4647     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4648     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4649     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4650     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4651     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4652   }
4653 
4654   /* numeric phase */
4655   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4656   for (i=0; i<m; i++) {
4657     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4658     Ii   = i + rstart;
4659     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4660     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4661   }
4662   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4663   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4664   PetscFunctionReturn(0);
4665 }
4666 
4667 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4668 {
4669   PetscErrorCode    ierr;
4670   PetscMPIInt       rank;
4671   PetscInt          m,N,i,rstart,nnz;
4672   size_t            len;
4673   const PetscInt    *indx;
4674   PetscViewer       out;
4675   char              *name;
4676   Mat               B;
4677   const PetscScalar *values;
4678 
4679   PetscFunctionBegin;
4680   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4681   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4682   /* Should this be the type of the diagonal block of A? */
4683   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4684   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4685   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4686   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4687   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4688   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4689   for (i=0; i<m; i++) {
4690     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4691     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4692     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4693   }
4694   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4695   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4696 
4697   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
4698   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4699   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4700   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4701   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4702   ierr = PetscFree(name);CHKERRQ(ierr);
4703   ierr = MatView(B,out);CHKERRQ(ierr);
4704   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4705   ierr = MatDestroy(&B);CHKERRQ(ierr);
4706   PetscFunctionReturn(0);
4707 }
4708 
4709 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4710 {
4711   PetscErrorCode      ierr;
4712   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4713 
4714   PetscFunctionBegin;
4715   if (!merge) PetscFunctionReturn(0);
4716   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4717   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4718   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4719   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4720   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4721   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4722   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4723   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4724   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4725   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4726   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4727   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4728   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4729   ierr = PetscFree(merge);CHKERRQ(ierr);
4730   PetscFunctionReturn(0);
4731 }
4732 
4733 #include <../src/mat/utils/freespace.h>
4734 #include <petscbt.h>
4735 
4736 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4737 {
4738   PetscErrorCode      ierr;
4739   MPI_Comm            comm;
4740   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4741   PetscMPIInt         size,rank,taga,*len_s;
4742   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4743   PetscInt            proc,m;
4744   PetscInt            **buf_ri,**buf_rj;
4745   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4746   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4747   MPI_Request         *s_waits,*r_waits;
4748   MPI_Status          *status;
4749   MatScalar           *aa=a->a;
4750   MatScalar           **abuf_r,*ba_i;
4751   Mat_Merge_SeqsToMPI *merge;
4752   PetscContainer      container;
4753 
4754   PetscFunctionBegin;
4755   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4756   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4757 
4758   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4759   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4760 
4761   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4762   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4763   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4764 
4765   bi     = merge->bi;
4766   bj     = merge->bj;
4767   buf_ri = merge->buf_ri;
4768   buf_rj = merge->buf_rj;
4769 
4770   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4771   owners = merge->rowmap->range;
4772   len_s  = merge->len_s;
4773 
4774   /* send and recv matrix values */
4775   /*-----------------------------*/
4776   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4777   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4778 
4779   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4780   for (proc=0,k=0; proc<size; proc++) {
4781     if (!len_s[proc]) continue;
4782     i    = owners[proc];
4783     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr);
4784     k++;
4785   }
4786 
4787   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);}
4788   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);}
4789   ierr = PetscFree(status);CHKERRQ(ierr);
4790 
4791   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4792   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4793 
4794   /* insert mat values of mpimat */
4795   /*----------------------------*/
4796   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4797   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4798 
4799   for (k=0; k<merge->nrecv; k++) {
4800     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4801     nrows       = *(buf_ri_k[k]);
4802     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4803     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4804   }
4805 
4806   /* set values of ba */
4807   m = merge->rowmap->n;
4808   for (i=0; i<m; i++) {
4809     arow = owners[rank] + i;
4810     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4811     bnzi = bi[i+1] - bi[i];
4812     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4813 
4814     /* add local non-zero vals of this proc's seqmat into ba */
4815     anzi   = ai[arow+1] - ai[arow];
4816     aj     = a->j + ai[arow];
4817     aa     = a->a + ai[arow];
4818     nextaj = 0;
4819     for (j=0; nextaj<anzi; j++) {
4820       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4821         ba_i[j] += aa[nextaj++];
4822       }
4823     }
4824 
4825     /* add received vals into ba */
4826     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4827       /* i-th row */
4828       if (i == *nextrow[k]) {
4829         anzi   = *(nextai[k]+1) - *nextai[k];
4830         aj     = buf_rj[k] + *(nextai[k]);
4831         aa     = abuf_r[k] + *(nextai[k]);
4832         nextaj = 0;
4833         for (j=0; nextaj<anzi; j++) {
4834           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4835             ba_i[j] += aa[nextaj++];
4836           }
4837         }
4838         nextrow[k]++; nextai[k]++;
4839       }
4840     }
4841     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4842   }
4843   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4844   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4845 
4846   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4847   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4848   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4849   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4850   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4851   PetscFunctionReturn(0);
4852 }
4853 
4854 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4855 {
4856   PetscErrorCode      ierr;
4857   Mat                 B_mpi;
4858   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4859   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4860   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4861   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4862   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4863   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4864   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4865   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4866   MPI_Status          *status;
4867   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4868   PetscBT             lnkbt;
4869   Mat_Merge_SeqsToMPI *merge;
4870   PetscContainer      container;
4871 
4872   PetscFunctionBegin;
4873   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4874 
4875   /* make sure it is a PETSc comm */
4876   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4877   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4878   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4879 
4880   ierr = PetscNew(&merge);CHKERRQ(ierr);
4881   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4882 
4883   /* determine row ownership */
4884   /*---------------------------------------------------------*/
4885   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4886   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4887   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4888   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4889   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4890   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4891   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4892 
4893   m      = merge->rowmap->n;
4894   owners = merge->rowmap->range;
4895 
4896   /* determine the number of messages to send, their lengths */
4897   /*---------------------------------------------------------*/
4898   len_s = merge->len_s;
4899 
4900   len          = 0; /* length of buf_si[] */
4901   merge->nsend = 0;
4902   for (proc=0; proc<size; proc++) {
4903     len_si[proc] = 0;
4904     if (proc == rank) {
4905       len_s[proc] = 0;
4906     } else {
4907       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4908       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4909     }
4910     if (len_s[proc]) {
4911       merge->nsend++;
4912       nrows = 0;
4913       for (i=owners[proc]; i<owners[proc+1]; i++) {
4914         if (ai[i+1] > ai[i]) nrows++;
4915       }
4916       len_si[proc] = 2*(nrows+1);
4917       len         += len_si[proc];
4918     }
4919   }
4920 
4921   /* determine the number and length of messages to receive for ij-structure */
4922   /*-------------------------------------------------------------------------*/
4923   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4924   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4925 
4926   /* post the Irecv of j-structure */
4927   /*-------------------------------*/
4928   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4929   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4930 
4931   /* post the Isend of j-structure */
4932   /*--------------------------------*/
4933   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4934 
4935   for (proc=0, k=0; proc<size; proc++) {
4936     if (!len_s[proc]) continue;
4937     i    = owners[proc];
4938     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr);
4939     k++;
4940   }
4941 
4942   /* receives and sends of j-structure are complete */
4943   /*------------------------------------------------*/
4944   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);}
4945   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);}
4946 
4947   /* send and recv i-structure */
4948   /*---------------------------*/
4949   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4950   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4951 
4952   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4953   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4954   for (proc=0,k=0; proc<size; proc++) {
4955     if (!len_s[proc]) continue;
4956     /* form outgoing message for i-structure:
4957          buf_si[0]:                 nrows to be sent
4958                [1:nrows]:           row index (global)
4959                [nrows+1:2*nrows+1]: i-structure index
4960     */
4961     /*-------------------------------------------*/
4962     nrows       = len_si[proc]/2 - 1;
4963     buf_si_i    = buf_si + nrows+1;
4964     buf_si[0]   = nrows;
4965     buf_si_i[0] = 0;
4966     nrows       = 0;
4967     for (i=owners[proc]; i<owners[proc+1]; i++) {
4968       anzi = ai[i+1] - ai[i];
4969       if (anzi) {
4970         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4971         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4972         nrows++;
4973       }
4974     }
4975     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr);
4976     k++;
4977     buf_si += len_si[proc];
4978   }
4979 
4980   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);}
4981   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);}
4982 
4983   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4984   for (i=0; i<merge->nrecv; i++) {
4985     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4986   }
4987 
4988   ierr = PetscFree(len_si);CHKERRQ(ierr);
4989   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4990   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4991   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4992   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4993   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4994   ierr = PetscFree(status);CHKERRQ(ierr);
4995 
4996   /* compute a local seq matrix in each processor */
4997   /*----------------------------------------------*/
4998   /* allocate bi array and free space for accumulating nonzero column info */
4999   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
5000   bi[0] = 0;
5001 
5002   /* create and initialize a linked list */
5003   nlnk = N+1;
5004   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5005 
5006   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
5007   len  = ai[owners[rank+1]] - ai[owners[rank]];
5008   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
5009 
5010   current_space = free_space;
5011 
5012   /* determine symbolic info for each local row */
5013   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
5014 
5015   for (k=0; k<merge->nrecv; k++) {
5016     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5017     nrows       = *buf_ri_k[k];
5018     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
5019     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
5020   }
5021 
5022   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
5023   len  = 0;
5024   for (i=0; i<m; i++) {
5025     bnzi = 0;
5026     /* add local non-zero cols of this proc's seqmat into lnk */
5027     arow  = owners[rank] + i;
5028     anzi  = ai[arow+1] - ai[arow];
5029     aj    = a->j + ai[arow];
5030     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5031     bnzi += nlnk;
5032     /* add received col data into lnk */
5033     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
5034       if (i == *nextrow[k]) { /* i-th row */
5035         anzi  = *(nextai[k]+1) - *nextai[k];
5036         aj    = buf_rj[k] + *nextai[k];
5037         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5038         bnzi += nlnk;
5039         nextrow[k]++; nextai[k]++;
5040       }
5041     }
5042     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
5043 
5044     /* if free space is not available, make more free space */
5045     if (current_space->local_remaining<bnzi) {
5046       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
5047       nspacedouble++;
5048     }
5049     /* copy data into free space, then initialize lnk */
5050     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
5051     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
5052 
5053     current_space->array           += bnzi;
5054     current_space->local_used      += bnzi;
5055     current_space->local_remaining -= bnzi;
5056 
5057     bi[i+1] = bi[i] + bnzi;
5058   }
5059 
5060   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5061 
5062   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
5063   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5064   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5065 
5066   /* create symbolic parallel matrix B_mpi */
5067   /*---------------------------------------*/
5068   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5069   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5070   if (n==PETSC_DECIDE) {
5071     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5072   } else {
5073     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5074   }
5075   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5076   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5077   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5078   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5079   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5080 
5081   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5082   B_mpi->assembled  = PETSC_FALSE;
5083   merge->bi         = bi;
5084   merge->bj         = bj;
5085   merge->buf_ri     = buf_ri;
5086   merge->buf_rj     = buf_rj;
5087   merge->coi        = NULL;
5088   merge->coj        = NULL;
5089   merge->owners_co  = NULL;
5090 
5091   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5092 
5093   /* attach the supporting struct to B_mpi for reuse */
5094   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5095   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5096   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
5097   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5098   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5099   *mpimat = B_mpi;
5100 
5101   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5102   PetscFunctionReturn(0);
5103 }
5104 
5105 /*@C
5106       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5107                  matrices from each processor
5108 
5109     Collective
5110 
5111    Input Parameters:
5112 +    comm - the communicators the parallel matrix will live on
5113 .    seqmat - the input sequential matrices
5114 .    m - number of local rows (or PETSC_DECIDE)
5115 .    n - number of local columns (or PETSC_DECIDE)
5116 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5117 
5118    Output Parameter:
5119 .    mpimat - the parallel matrix generated
5120 
5121     Level: advanced
5122 
5123    Notes:
5124      The dimensions of the sequential matrix in each processor MUST be the same.
5125      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5126      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5127 @*/
5128 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5129 {
5130   PetscErrorCode ierr;
5131   PetscMPIInt    size;
5132 
5133   PetscFunctionBegin;
5134   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5135   if (size == 1) {
5136     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5137     if (scall == MAT_INITIAL_MATRIX) {
5138       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5139     } else {
5140       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5141     }
5142     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5143     PetscFunctionReturn(0);
5144   }
5145   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5146   if (scall == MAT_INITIAL_MATRIX) {
5147     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5148   }
5149   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5150   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5151   PetscFunctionReturn(0);
5152 }
5153 
5154 /*@
5155      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5156           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5157           with MatGetSize()
5158 
5159     Not Collective
5160 
5161    Input Parameters:
5162 +    A - the matrix
5163 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5164 
5165    Output Parameter:
5166 .    A_loc - the local sequential matrix generated
5167 
5168     Level: developer
5169 
5170    Notes:
5171      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5172      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5173      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5174      modify the values of the returned A_loc.
5175 
5176 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
5177 @*/
5178 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5179 {
5180   PetscErrorCode ierr;
5181   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5182   Mat_SeqAIJ     *mat,*a,*b;
5183   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5184   MatScalar      *aa,*ba,*cam;
5185   PetscScalar    *ca;
5186   PetscMPIInt    size;
5187   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5188   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5189   PetscBool      match;
5190 
5191   PetscFunctionBegin;
5192   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5193   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5194   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5195   if (size == 1) {
5196     if (scall == MAT_INITIAL_MATRIX) {
5197       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5198       *A_loc = mpimat->A;
5199     } else if (scall == MAT_REUSE_MATRIX) {
5200       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5201     }
5202     PetscFunctionReturn(0);
5203   }
5204 
5205   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5206   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5207   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5208   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5209   aa = a->a; ba = b->a;
5210   if (scall == MAT_INITIAL_MATRIX) {
5211     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5212     ci[0] = 0;
5213     for (i=0; i<am; i++) {
5214       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5215     }
5216     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5217     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5218     k    = 0;
5219     for (i=0; i<am; i++) {
5220       ncols_o = bi[i+1] - bi[i];
5221       ncols_d = ai[i+1] - ai[i];
5222       /* off-diagonal portion of A */
5223       for (jo=0; jo<ncols_o; jo++) {
5224         col = cmap[*bj];
5225         if (col >= cstart) break;
5226         cj[k]   = col; bj++;
5227         ca[k++] = *ba++;
5228       }
5229       /* diagonal portion of A */
5230       for (j=0; j<ncols_d; j++) {
5231         cj[k]   = cstart + *aj++;
5232         ca[k++] = *aa++;
5233       }
5234       /* off-diagonal portion of A */
5235       for (j=jo; j<ncols_o; j++) {
5236         cj[k]   = cmap[*bj++];
5237         ca[k++] = *ba++;
5238       }
5239     }
5240     /* put together the new matrix */
5241     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5242     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5243     /* Since these are PETSc arrays, change flags to free them as necessary. */
5244     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5245     mat->free_a  = PETSC_TRUE;
5246     mat->free_ij = PETSC_TRUE;
5247     mat->nonew   = 0;
5248   } else if (scall == MAT_REUSE_MATRIX) {
5249     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5250     ci = mat->i; cj = mat->j; cam = mat->a;
5251     for (i=0; i<am; i++) {
5252       /* off-diagonal portion of A */
5253       ncols_o = bi[i+1] - bi[i];
5254       for (jo=0; jo<ncols_o; jo++) {
5255         col = cmap[*bj];
5256         if (col >= cstart) break;
5257         *cam++ = *ba++; bj++;
5258       }
5259       /* diagonal portion of A */
5260       ncols_d = ai[i+1] - ai[i];
5261       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5262       /* off-diagonal portion of A */
5263       for (j=jo; j<ncols_o; j++) {
5264         *cam++ = *ba++; bj++;
5265       }
5266     }
5267   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5268   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5269   PetscFunctionReturn(0);
5270 }
5271 
5272 /*@
5273      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5274           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5275 
5276     Not Collective
5277 
5278    Input Parameters:
5279 +    A - the matrix
5280 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5281 
5282    Output Parameter:
5283 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5284 -    A_loc - the local sequential matrix generated
5285 
5286     Level: developer
5287 
5288    Notes:
5289      This is different from MatMPIAIJGetLocalMat since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5290 
5291 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()
5292 
5293 @*/
5294 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5295 {
5296   PetscErrorCode ierr;
5297   Mat            Ao,Ad;
5298   const PetscInt *cmap;
5299   PetscMPIInt    size;
5300   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5301 
5302   PetscFunctionBegin;
5303   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr);
5304   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
5305   if (size == 1) {
5306     if (scall == MAT_INITIAL_MATRIX) {
5307       ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr);
5308       *A_loc = Ad;
5309     } else if (scall == MAT_REUSE_MATRIX) {
5310       ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5311     }
5312     if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); }
5313     PetscFunctionReturn(0);
5314   }
5315   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr);
5316   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5317   if (f) {
5318     ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr);
5319   } else {
5320     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5321     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5322     Mat_SeqAIJ        *c;
5323     PetscInt          *ai = a->i, *aj = a->j;
5324     PetscInt          *bi = b->i, *bj = b->j;
5325     PetscInt          *ci,*cj;
5326     const PetscScalar *aa,*ba;
5327     PetscScalar       *ca;
5328     PetscInt          i,j,am,dn,on;
5329 
5330     ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr);
5331     ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr);
5332     ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr);
5333     ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr);
5334     if (scall == MAT_INITIAL_MATRIX) {
5335       PetscInt k;
5336       ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5337       ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr);
5338       ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr);
5339       ci[0] = 0;
5340       for (i=0,k=0; i<am; i++) {
5341         const PetscInt ncols_o = bi[i+1] - bi[i];
5342         const PetscInt ncols_d = ai[i+1] - ai[i];
5343         ci[i+1] = ci[i] + ncols_o + ncols_d;
5344         /* diagonal portion of A */
5345         for (j=0; j<ncols_d; j++,k++) {
5346           cj[k] = *aj++;
5347           ca[k] = *aa++;
5348         }
5349         /* off-diagonal portion of A */
5350         for (j=0; j<ncols_o; j++,k++) {
5351           cj[k] = dn + *bj++;
5352           ca[k] = *ba++;
5353         }
5354       }
5355       /* put together the new matrix */
5356       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr);
5357       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5358       /* Since these are PETSc arrays, change flags to free them as necessary. */
5359       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5360       c->free_a  = PETSC_TRUE;
5361       c->free_ij = PETSC_TRUE;
5362       c->nonew   = 0;
5363       ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr);
5364     } else if (scall == MAT_REUSE_MATRIX) {
5365 #if defined(PETSC_HAVE_DEVICE)
5366       (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5367 #endif
5368       c  = (Mat_SeqAIJ*)(*A_loc)->data;
5369       ca = c->a;
5370       for (i=0; i<am; i++) {
5371         const PetscInt ncols_d = ai[i+1] - ai[i];
5372         const PetscInt ncols_o = bi[i+1] - bi[i];
5373         /* diagonal portion of A */
5374         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5375         /* off-diagonal portion of A */
5376         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5377       }
5378     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5379     ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr);
5380     ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr);
5381     if (glob) {
5382       PetscInt cst, *gidx;
5383 
5384       ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr);
5385       ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr);
5386       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5387       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5388       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr);
5389     }
5390   }
5391   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5392   PetscFunctionReturn(0);
5393 }
5394 
5395 /*@C
5396      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5397 
5398     Not Collective
5399 
5400    Input Parameters:
5401 +    A - the matrix
5402 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5403 -    row, col - index sets of rows and columns to extract (or NULL)
5404 
5405    Output Parameter:
5406 .    A_loc - the local sequential matrix generated
5407 
5408     Level: developer
5409 
5410 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5411 
5412 @*/
5413 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5414 {
5415   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5416   PetscErrorCode ierr;
5417   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5418   IS             isrowa,iscola;
5419   Mat            *aloc;
5420   PetscBool      match;
5421 
5422   PetscFunctionBegin;
5423   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5424   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5425   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5426   if (!row) {
5427     start = A->rmap->rstart; end = A->rmap->rend;
5428     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5429   } else {
5430     isrowa = *row;
5431   }
5432   if (!col) {
5433     start = A->cmap->rstart;
5434     cmap  = a->garray;
5435     nzA   = a->A->cmap->n;
5436     nzB   = a->B->cmap->n;
5437     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5438     ncols = 0;
5439     for (i=0; i<nzB; i++) {
5440       if (cmap[i] < start) idx[ncols++] = cmap[i];
5441       else break;
5442     }
5443     imark = i;
5444     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5445     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5446     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5447   } else {
5448     iscola = *col;
5449   }
5450   if (scall != MAT_INITIAL_MATRIX) {
5451     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5452     aloc[0] = *A_loc;
5453   }
5454   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5455   if (!col) { /* attach global id of condensed columns */
5456     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5457   }
5458   *A_loc = aloc[0];
5459   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5460   if (!row) {
5461     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5462   }
5463   if (!col) {
5464     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5465   }
5466   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5467   PetscFunctionReturn(0);
5468 }
5469 
5470 /*
5471  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5472  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5473  * on a global size.
5474  * */
5475 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5476 {
5477   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5478   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5479   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5480   PetscMPIInt              owner;
5481   PetscSFNode              *iremote,*oiremote;
5482   const PetscInt           *lrowindices;
5483   PetscErrorCode           ierr;
5484   PetscSF                  sf,osf;
5485   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5486   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5487   MPI_Comm                 comm;
5488   ISLocalToGlobalMapping   mapping;
5489 
5490   PetscFunctionBegin;
5491   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5492   /* plocalsize is the number of roots
5493    * nrows is the number of leaves
5494    * */
5495   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5496   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5497   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5498   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5499   for (i=0;i<nrows;i++) {
5500     /* Find a remote index and an owner for a row
5501      * The row could be local or remote
5502      * */
5503     owner = 0;
5504     lidx  = 0;
5505     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5506     iremote[i].index = lidx;
5507     iremote[i].rank  = owner;
5508   }
5509   /* Create SF to communicate how many nonzero columns for each row */
5510   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5511   /* SF will figure out the number of nonzero colunms for each row, and their
5512    * offsets
5513    * */
5514   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5515   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5516   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5517 
5518   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5519   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5520   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5521   roffsets[0] = 0;
5522   roffsets[1] = 0;
5523   for (i=0;i<plocalsize;i++) {
5524     /* diag */
5525     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5526     /* off diag */
5527     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5528     /* compute offsets so that we relative location for each row */
5529     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5530     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5531   }
5532   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5533   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5534   /* 'r' means root, and 'l' means leaf */
5535   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5536   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5537   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5538   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5539   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5540   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5541   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5542   dntotalcols = 0;
5543   ontotalcols = 0;
5544   ncol = 0;
5545   for (i=0;i<nrows;i++) {
5546     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5547     ncol = PetscMax(pnnz[i],ncol);
5548     /* diag */
5549     dntotalcols += nlcols[i*2+0];
5550     /* off diag */
5551     ontotalcols += nlcols[i*2+1];
5552   }
5553   /* We do not need to figure the right number of columns
5554    * since all the calculations will be done by going through the raw data
5555    * */
5556   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5557   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5558   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5559   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5560   /* diag */
5561   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5562   /* off diag */
5563   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5564   /* diag */
5565   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5566   /* off diag */
5567   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5568   dntotalcols = 0;
5569   ontotalcols = 0;
5570   ntotalcols  = 0;
5571   for (i=0;i<nrows;i++) {
5572     owner = 0;
5573     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5574     /* Set iremote for diag matrix */
5575     for (j=0;j<nlcols[i*2+0];j++) {
5576       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5577       iremote[dntotalcols].rank    = owner;
5578       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5579       ilocal[dntotalcols++]        = ntotalcols++;
5580     }
5581     /* off diag */
5582     for (j=0;j<nlcols[i*2+1];j++) {
5583       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5584       oiremote[ontotalcols].rank    = owner;
5585       oilocal[ontotalcols++]        = ntotalcols++;
5586     }
5587   }
5588   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5589   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5590   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5591   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5592   /* P serves as roots and P_oth is leaves
5593    * Diag matrix
5594    * */
5595   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5596   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5597   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5598 
5599   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5600   /* Off diag */
5601   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5602   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5603   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5604   /* We operate on the matrix internal data for saving memory */
5605   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5606   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5607   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5608   /* Convert to global indices for diag matrix */
5609   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5610   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5611   /* We want P_oth store global indices */
5612   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5613   /* Use memory scalable approach */
5614   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5615   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5616   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5617   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5618   /* Convert back to local indices */
5619   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5620   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5621   nout = 0;
5622   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5623   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5624   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5625   /* Exchange values */
5626   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5627   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5628   /* Stop PETSc from shrinking memory */
5629   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5630   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5631   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5632   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5633   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5634   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5635   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5636   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5637   PetscFunctionReturn(0);
5638 }
5639 
5640 /*
5641  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5642  * This supports MPIAIJ and MAIJ
5643  * */
5644 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5645 {
5646   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5647   Mat_SeqAIJ            *p_oth;
5648   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5649   IS                    rows,map;
5650   PetscHMapI            hamp;
5651   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5652   MPI_Comm              comm;
5653   PetscSF               sf,osf;
5654   PetscBool             has;
5655   PetscErrorCode        ierr;
5656 
5657   PetscFunctionBegin;
5658   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5659   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5660   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5661    *  and then create a submatrix (that often is an overlapping matrix)
5662    * */
5663   if (reuse == MAT_INITIAL_MATRIX) {
5664     /* Use a hash table to figure out unique keys */
5665     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5666     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5667     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5668     count = 0;
5669     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5670     for (i=0;i<a->B->cmap->n;i++) {
5671       key  = a->garray[i]/dof;
5672       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5673       if (!has) {
5674         mapping[i] = count;
5675         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5676       } else {
5677         /* Current 'i' has the same value the previous step */
5678         mapping[i] = count-1;
5679       }
5680     }
5681     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5682     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5683     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5684     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5685     off = 0;
5686     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5687     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5688     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5689     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5690     /* In case, the matrix was already created but users want to recreate the matrix */
5691     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5692     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5693     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5694     ierr = ISDestroy(&map);CHKERRQ(ierr);
5695     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5696   } else if (reuse == MAT_REUSE_MATRIX) {
5697     /* If matrix was already created, we simply update values using SF objects
5698      * that as attached to the matrix ealier.
5699      *  */
5700     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5701     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5702     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5703     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5704     /* Update values in place */
5705     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5706     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5707     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5708     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5709   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5710   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5711   PetscFunctionReturn(0);
5712 }
5713 
5714 /*@C
5715     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5716 
5717     Collective on Mat
5718 
5719    Input Parameters:
5720 +    A,B - the matrices in mpiaij format
5721 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5722 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5723 
5724    Output Parameter:
5725 +    rowb, colb - index sets of rows and columns of B to extract
5726 -    B_seq - the sequential matrix generated
5727 
5728     Level: developer
5729 
5730 @*/
5731 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5732 {
5733   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5734   PetscErrorCode ierr;
5735   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5736   IS             isrowb,iscolb;
5737   Mat            *bseq=NULL;
5738 
5739   PetscFunctionBegin;
5740   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5741     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5742   }
5743   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5744 
5745   if (scall == MAT_INITIAL_MATRIX) {
5746     start = A->cmap->rstart;
5747     cmap  = a->garray;
5748     nzA   = a->A->cmap->n;
5749     nzB   = a->B->cmap->n;
5750     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5751     ncols = 0;
5752     for (i=0; i<nzB; i++) {  /* row < local row index */
5753       if (cmap[i] < start) idx[ncols++] = cmap[i];
5754       else break;
5755     }
5756     imark = i;
5757     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5758     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5759     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5760     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5761   } else {
5762     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5763     isrowb  = *rowb; iscolb = *colb;
5764     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5765     bseq[0] = *B_seq;
5766   }
5767   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5768   *B_seq = bseq[0];
5769   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5770   if (!rowb) {
5771     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5772   } else {
5773     *rowb = isrowb;
5774   }
5775   if (!colb) {
5776     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5777   } else {
5778     *colb = iscolb;
5779   }
5780   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5781   PetscFunctionReturn(0);
5782 }
5783 
5784 /*
5785     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5786     of the OFF-DIAGONAL portion of local A
5787 
5788     Collective on Mat
5789 
5790    Input Parameters:
5791 +    A,B - the matrices in mpiaij format
5792 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5793 
5794    Output Parameter:
5795 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5796 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5797 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5798 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5799 
5800     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5801      for this matrix. This is not desirable..
5802 
5803     Level: developer
5804 
5805 */
5806 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5807 {
5808   PetscErrorCode         ierr;
5809   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5810   Mat_SeqAIJ             *b_oth;
5811   VecScatter             ctx;
5812   MPI_Comm               comm;
5813   const PetscMPIInt      *rprocs,*sprocs;
5814   const PetscInt         *srow,*rstarts,*sstarts;
5815   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5816   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5817   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5818   MPI_Request            *rwaits = NULL,*swaits = NULL;
5819   MPI_Status             rstatus;
5820   PetscMPIInt            size,tag,rank,nsends_mpi,nrecvs_mpi;
5821   PETSC_UNUSED PetscMPIInt jj;
5822 
5823   PetscFunctionBegin;
5824   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5825   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5826 
5827   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5828     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5829   }
5830   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5831   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
5832 
5833   if (size == 1) {
5834     startsj_s = NULL;
5835     bufa_ptr  = NULL;
5836     *B_oth    = NULL;
5837     PetscFunctionReturn(0);
5838   }
5839 
5840   ctx = a->Mvctx;
5841   tag = ((PetscObject)ctx)->tag;
5842 
5843   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5844   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5845   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5846   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5847   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5848   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5849 
5850   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5851   if (scall == MAT_INITIAL_MATRIX) {
5852     /* i-array */
5853     /*---------*/
5854     /*  post receives */
5855     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5856     for (i=0; i<nrecvs; i++) {
5857       rowlen = rvalues + rstarts[i]*rbs;
5858       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5859       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5860     }
5861 
5862     /* pack the outgoing message */
5863     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5864 
5865     sstartsj[0] = 0;
5866     rstartsj[0] = 0;
5867     len         = 0; /* total length of j or a array to be sent */
5868     if (nsends) {
5869       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5870       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5871     }
5872     for (i=0; i<nsends; i++) {
5873       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5874       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5875       for (j=0; j<nrows; j++) {
5876         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5877         for (l=0; l<sbs; l++) {
5878           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5879 
5880           rowlen[j*sbs+l] = ncols;
5881 
5882           len += ncols;
5883           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5884         }
5885         k++;
5886       }
5887       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5888 
5889       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5890     }
5891     /* recvs and sends of i-array are completed */
5892     i = nrecvs;
5893     while (i--) {
5894       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr);
5895     }
5896     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5897     ierr = PetscFree(svalues);CHKERRQ(ierr);
5898 
5899     /* allocate buffers for sending j and a arrays */
5900     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5901     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5902 
5903     /* create i-array of B_oth */
5904     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5905 
5906     b_othi[0] = 0;
5907     len       = 0; /* total length of j or a array to be received */
5908     k         = 0;
5909     for (i=0; i<nrecvs; i++) {
5910       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5911       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5912       for (j=0; j<nrows; j++) {
5913         b_othi[k+1] = b_othi[k] + rowlen[j];
5914         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5915         k++;
5916       }
5917       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5918     }
5919     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5920 
5921     /* allocate space for j and a arrrays of B_oth */
5922     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5923     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5924 
5925     /* j-array */
5926     /*---------*/
5927     /*  post receives of j-array */
5928     for (i=0; i<nrecvs; i++) {
5929       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5930       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5931     }
5932 
5933     /* pack the outgoing message j-array */
5934     if (nsends) k = sstarts[0];
5935     for (i=0; i<nsends; i++) {
5936       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5937       bufJ  = bufj+sstartsj[i];
5938       for (j=0; j<nrows; j++) {
5939         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5940         for (ll=0; ll<sbs; ll++) {
5941           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5942           for (l=0; l<ncols; l++) {
5943             *bufJ++ = cols[l];
5944           }
5945           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5946         }
5947       }
5948       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5949     }
5950 
5951     /* recvs and sends of j-array are completed */
5952     i = nrecvs;
5953     while (i--) {
5954       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr);
5955     }
5956     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5957   } else if (scall == MAT_REUSE_MATRIX) {
5958     sstartsj = *startsj_s;
5959     rstartsj = *startsj_r;
5960     bufa     = *bufa_ptr;
5961     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5962     b_otha   = b_oth->a;
5963 #if defined(PETSC_HAVE_DEVICE)
5964     (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU;
5965 #endif
5966   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5967 
5968   /* a-array */
5969   /*---------*/
5970   /*  post receives of a-array */
5971   for (i=0; i<nrecvs; i++) {
5972     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5973     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5974   }
5975 
5976   /* pack the outgoing message a-array */
5977   if (nsends) k = sstarts[0];
5978   for (i=0; i<nsends; i++) {
5979     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5980     bufA  = bufa+sstartsj[i];
5981     for (j=0; j<nrows; j++) {
5982       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5983       for (ll=0; ll<sbs; ll++) {
5984         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5985         for (l=0; l<ncols; l++) {
5986           *bufA++ = vals[l];
5987         }
5988         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5989       }
5990     }
5991     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5992   }
5993   /* recvs and sends of a-array are completed */
5994   i = nrecvs;
5995   while (i--) {
5996     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr);
5997   }
5998   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5999   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
6000 
6001   if (scall == MAT_INITIAL_MATRIX) {
6002     /* put together the new matrix */
6003     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
6004 
6005     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6006     /* Since these are PETSc arrays, change flags to free them as necessary. */
6007     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
6008     b_oth->free_a  = PETSC_TRUE;
6009     b_oth->free_ij = PETSC_TRUE;
6010     b_oth->nonew   = 0;
6011 
6012     ierr = PetscFree(bufj);CHKERRQ(ierr);
6013     if (!startsj_s || !bufa_ptr) {
6014       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
6015       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
6016     } else {
6017       *startsj_s = sstartsj;
6018       *startsj_r = rstartsj;
6019       *bufa_ptr  = bufa;
6020     }
6021   }
6022 
6023   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
6024   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
6025   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
6026   PetscFunctionReturn(0);
6027 }
6028 
6029 /*@C
6030   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
6031 
6032   Not Collective
6033 
6034   Input Parameters:
6035 . A - The matrix in mpiaij format
6036 
6037   Output Parameter:
6038 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
6039 . colmap - A map from global column index to local index into lvec
6040 - multScatter - A scatter from the argument of a matrix-vector product to lvec
6041 
6042   Level: developer
6043 
6044 @*/
6045 #if defined(PETSC_USE_CTABLE)
6046 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
6047 #else
6048 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
6049 #endif
6050 {
6051   Mat_MPIAIJ *a;
6052 
6053   PetscFunctionBegin;
6054   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
6055   PetscValidPointer(lvec, 2);
6056   PetscValidPointer(colmap, 3);
6057   PetscValidPointer(multScatter, 4);
6058   a = (Mat_MPIAIJ*) A->data;
6059   if (lvec) *lvec = a->lvec;
6060   if (colmap) *colmap = a->colmap;
6061   if (multScatter) *multScatter = a->Mvctx;
6062   PetscFunctionReturn(0);
6063 }
6064 
6065 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
6066 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
6067 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
6068 #if defined(PETSC_HAVE_MKL_SPARSE)
6069 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
6070 #endif
6071 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
6072 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
6073 #if defined(PETSC_HAVE_ELEMENTAL)
6074 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
6075 #endif
6076 #if defined(PETSC_HAVE_SCALAPACK)
6077 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
6078 #endif
6079 #if defined(PETSC_HAVE_HYPRE)
6080 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
6081 #endif
6082 #if defined(PETSC_HAVE_CUDA)
6083 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
6084 #endif
6085 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6086 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
6087 #endif
6088 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
6089 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
6090 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6091 
6092 /*
6093     Computes (B'*A')' since computing B*A directly is untenable
6094 
6095                n                       p                          p
6096         [             ]       [             ]         [                 ]
6097       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6098         [             ]       [             ]         [                 ]
6099 
6100 */
6101 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
6102 {
6103   PetscErrorCode ierr;
6104   Mat            At,Bt,Ct;
6105 
6106   PetscFunctionBegin;
6107   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
6108   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
6109   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
6110   ierr = MatDestroy(&At);CHKERRQ(ierr);
6111   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
6112   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
6113   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
6114   PetscFunctionReturn(0);
6115 }
6116 
6117 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
6118 {
6119   PetscErrorCode ierr;
6120   PetscBool      cisdense;
6121 
6122   PetscFunctionBegin;
6123   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
6124   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
6125   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
6126   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
6127   if (!cisdense) {
6128     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6129   }
6130   ierr = MatSetUp(C);CHKERRQ(ierr);
6131 
6132   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6133   PetscFunctionReturn(0);
6134 }
6135 
6136 /* ----------------------------------------------------------------*/
6137 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6138 {
6139   Mat_Product *product = C->product;
6140   Mat         A = product->A,B=product->B;
6141 
6142   PetscFunctionBegin;
6143   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6144     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6145 
6146   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6147   C->ops->productsymbolic = MatProductSymbolic_AB;
6148   PetscFunctionReturn(0);
6149 }
6150 
6151 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6152 {
6153   PetscErrorCode ierr;
6154   Mat_Product    *product = C->product;
6155 
6156   PetscFunctionBegin;
6157   if (product->type == MATPRODUCT_AB) {
6158     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
6159   }
6160   PetscFunctionReturn(0);
6161 }
6162 /* ----------------------------------------------------------------*/
6163 
6164 /*MC
6165    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6166 
6167    Options Database Keys:
6168 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6169 
6170    Level: beginner
6171 
6172    Notes:
6173     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6174     in this case the values associated with the rows and columns one passes in are set to zero
6175     in the matrix
6176 
6177     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6178     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6179 
6180 .seealso: MatCreateAIJ()
6181 M*/
6182 
6183 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6184 {
6185   Mat_MPIAIJ     *b;
6186   PetscErrorCode ierr;
6187   PetscMPIInt    size;
6188 
6189   PetscFunctionBegin;
6190   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
6191 
6192   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6193   B->data       = (void*)b;
6194   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6195   B->assembled  = PETSC_FALSE;
6196   B->insertmode = NOT_SET_VALUES;
6197   b->size       = size;
6198 
6199   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
6200 
6201   /* build cache for off array entries formed */
6202   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6203 
6204   b->donotstash  = PETSC_FALSE;
6205   b->colmap      = NULL;
6206   b->garray      = NULL;
6207   b->roworiented = PETSC_TRUE;
6208 
6209   /* stuff used for matrix vector multiply */
6210   b->lvec  = NULL;
6211   b->Mvctx = NULL;
6212 
6213   /* stuff for MatGetRow() */
6214   b->rowindices   = NULL;
6215   b->rowvalues    = NULL;
6216   b->getrowactive = PETSC_FALSE;
6217 
6218   /* flexible pointer used in CUSPARSE classes */
6219   b->spptr = NULL;
6220 
6221   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6222   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6223   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6224   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6225   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6226   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6227   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6228   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6229   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6230   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6231 #if defined(PETSC_HAVE_CUDA)
6232   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr);
6233 #endif
6234 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6235   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr);
6236 #endif
6237 #if defined(PETSC_HAVE_MKL_SPARSE)
6238   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6239 #endif
6240   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6241   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6242   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6243 #if defined(PETSC_HAVE_ELEMENTAL)
6244   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6245 #endif
6246 #if defined(PETSC_HAVE_SCALAPACK)
6247   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6248 #endif
6249   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6250   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6251 #if defined(PETSC_HAVE_HYPRE)
6252   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6253   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6254 #endif
6255   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6256   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6257   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6258   PetscFunctionReturn(0);
6259 }
6260 
6261 /*@C
6262      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6263          and "off-diagonal" part of the matrix in CSR format.
6264 
6265    Collective
6266 
6267    Input Parameters:
6268 +  comm - MPI communicator
6269 .  m - number of local rows (Cannot be PETSC_DECIDE)
6270 .  n - This value should be the same as the local size used in creating the
6271        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6272        calculated if N is given) For square matrices n is almost always m.
6273 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6274 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6275 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6276 .   j - column indices
6277 .   a - matrix values
6278 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6279 .   oj - column indices
6280 -   oa - matrix values
6281 
6282    Output Parameter:
6283 .   mat - the matrix
6284 
6285    Level: advanced
6286 
6287    Notes:
6288        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6289        must free the arrays once the matrix has been destroyed and not before.
6290 
6291        The i and j indices are 0 based
6292 
6293        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6294 
6295        This sets local rows and cannot be used to set off-processor values.
6296 
6297        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6298        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6299        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6300        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6301        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6302        communication if it is known that only local entries will be set.
6303 
6304 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6305           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6306 @*/
6307 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6308 {
6309   PetscErrorCode ierr;
6310   Mat_MPIAIJ     *maij;
6311 
6312   PetscFunctionBegin;
6313   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6314   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6315   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6316   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6317   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6318   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6319   maij = (Mat_MPIAIJ*) (*mat)->data;
6320 
6321   (*mat)->preallocated = PETSC_TRUE;
6322 
6323   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6324   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6325 
6326   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6327   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6328 
6329   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6330   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6331   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6332   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6333 
6334   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6335   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6336   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6337   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6338   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6339   PetscFunctionReturn(0);
6340 }
6341 
6342 /*
6343     Special version for direct calls from Fortran
6344 */
6345 #include <petsc/private/fortranimpl.h>
6346 
6347 /* Change these macros so can be used in void function */
6348 #undef CHKERRQ
6349 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6350 #undef SETERRQ2
6351 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6352 #undef SETERRQ3
6353 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6354 #undef SETERRQ
6355 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6356 
6357 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6358 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6359 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6360 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6361 #else
6362 #endif
6363 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6364 {
6365   Mat            mat  = *mmat;
6366   PetscInt       m    = *mm, n = *mn;
6367   InsertMode     addv = *maddv;
6368   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6369   PetscScalar    value;
6370   PetscErrorCode ierr;
6371 
6372   MatCheckPreallocated(mat,1);
6373   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6374   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6375   {
6376     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6377     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6378     PetscBool roworiented = aij->roworiented;
6379 
6380     /* Some Variables required in the macro */
6381     Mat        A                    = aij->A;
6382     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6383     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6384     MatScalar  *aa                  = a->a;
6385     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6386     Mat        B                    = aij->B;
6387     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6388     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6389     MatScalar  *ba                  = b->a;
6390     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6391      * cannot use "#if defined" inside a macro. */
6392     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6393 
6394     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6395     PetscInt  nonew = a->nonew;
6396     MatScalar *ap1,*ap2;
6397 
6398     PetscFunctionBegin;
6399     for (i=0; i<m; i++) {
6400       if (im[i] < 0) continue;
6401       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6402       if (im[i] >= rstart && im[i] < rend) {
6403         row      = im[i] - rstart;
6404         lastcol1 = -1;
6405         rp1      = aj + ai[row];
6406         ap1      = aa + ai[row];
6407         rmax1    = aimax[row];
6408         nrow1    = ailen[row];
6409         low1     = 0;
6410         high1    = nrow1;
6411         lastcol2 = -1;
6412         rp2      = bj + bi[row];
6413         ap2      = ba + bi[row];
6414         rmax2    = bimax[row];
6415         nrow2    = bilen[row];
6416         low2     = 0;
6417         high2    = nrow2;
6418 
6419         for (j=0; j<n; j++) {
6420           if (roworiented) value = v[i*n+j];
6421           else value = v[i+j*m];
6422           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6423           if (in[j] >= cstart && in[j] < cend) {
6424             col = in[j] - cstart;
6425             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6426 #if defined(PETSC_HAVE_DEVICE)
6427             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6428 #endif
6429           } else if (in[j] < 0) continue;
6430           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6431             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6432             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6433           } else {
6434             if (mat->was_assembled) {
6435               if (!aij->colmap) {
6436                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6437               }
6438 #if defined(PETSC_USE_CTABLE)
6439               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6440               col--;
6441 #else
6442               col = aij->colmap[in[j]] - 1;
6443 #endif
6444               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6445                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6446                 col  =  in[j];
6447                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6448                 B        = aij->B;
6449                 b        = (Mat_SeqAIJ*)B->data;
6450                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6451                 rp2      = bj + bi[row];
6452                 ap2      = ba + bi[row];
6453                 rmax2    = bimax[row];
6454                 nrow2    = bilen[row];
6455                 low2     = 0;
6456                 high2    = nrow2;
6457                 bm       = aij->B->rmap->n;
6458                 ba       = b->a;
6459                 inserted = PETSC_FALSE;
6460               }
6461             } else col = in[j];
6462             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6463 #if defined(PETSC_HAVE_DEVICE)
6464             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6465 #endif
6466           }
6467         }
6468       } else if (!aij->donotstash) {
6469         if (roworiented) {
6470           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6471         } else {
6472           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6473         }
6474       }
6475     }
6476   }
6477   PetscFunctionReturnVoid();
6478 }
6479