xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 80dbbc5daea1d94234d1c522b1926f2e2aee80ec)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = 0;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           inserted = PETSC_TRUE; \
468           goto a_noinsert; \
469         } \
470       }  \
471       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
473       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475       N = nrow1++ - 1; a->nz++; high1++; \
476       /* shift up all the later entries in this row */ \
477       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
478       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
479       rp1[_i] = col;  \
480       ap1[_i] = value;  \
481       A->nonzerostate++;\
482       a_noinsert: ; \
483       ailen[row] = nrow1; \
484 }
485 
486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487   { \
488     if (col <= lastcol2) low2 = 0;                        \
489     else high2 = nrow2;                                   \
490     lastcol2 = col;                                       \
491     while (high2-low2 > 5) {                              \
492       t = (low2+high2)/2;                                 \
493       if (rp2[t] > col) high2 = t;                        \
494       else             low2  = t;                         \
495     }                                                     \
496     for (_i=low2; _i<high2; _i++) {                       \
497       if (rp2[_i] > col) break;                           \
498       if (rp2[_i] == col) {                               \
499         if (addv == ADD_VALUES) {                         \
500           ap2[_i] += value;                               \
501           (void)PetscLogFlops(1.0);                       \
502         }                                                 \
503         else                    ap2[_i] = value;          \
504         inserted = PETSC_TRUE;                            \
505         goto b_noinsert;                                  \
506       }                                                   \
507     }                                                     \
508     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
510     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512     N = nrow2++ - 1; b->nz++; high2++;                    \
513     /* shift up all the later entries in this row */      \
514     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
515     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
516     rp2[_i] = col;                                        \
517     ap2[_i] = value;                                      \
518     B->nonzerostate++;                                    \
519     b_noinsert: ;                                         \
520     bilen[row] = nrow2;                                   \
521   }
522 
523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524 {
525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
526   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
527   PetscErrorCode ierr;
528   PetscInt       l,*garray = mat->garray,diag;
529 
530   PetscFunctionBegin;
531   /* code only works for square matrices A */
532 
533   /* find size of row to the left of the diagonal part */
534   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
535   row  = row - diag;
536   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537     if (garray[b->j[b->i[row]+l]] > diag) break;
538   }
539   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
540 
541   /* diagonal part */
542   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
543 
544   /* right of diagonal part */
545   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
547   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548 #endif
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553 {
554   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
555   PetscScalar    value = 0.0;
556   PetscErrorCode ierr;
557   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
558   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559   PetscBool      roworiented = aij->roworiented;
560 
561   /* Some Variables required in the macro */
562   Mat        A                    = aij->A;
563   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
564   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565   MatScalar  *aa                  = a->a;
566   PetscBool  ignorezeroentries    = a->ignorezeroentries;
567   Mat        B                    = aij->B;
568   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
569   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570   MatScalar  *ba                  = b->a;
571   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572    * cannot use "#if defined" inside a macro. */
573   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
574 
575   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576   PetscInt  nonew;
577   MatScalar *ap1,*ap2;
578 
579   PetscFunctionBegin;
580   for (i=0; i<m; i++) {
581     if (im[i] < 0) continue;
582 #if defined(PETSC_USE_DEBUG)
583     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
584 #endif
585     if (im[i] >= rstart && im[i] < rend) {
586       row      = im[i] - rstart;
587       lastcol1 = -1;
588       rp1      = aj + ai[row];
589       ap1      = aa + ai[row];
590       rmax1    = aimax[row];
591       nrow1    = ailen[row];
592       low1     = 0;
593       high1    = nrow1;
594       lastcol2 = -1;
595       rp2      = bj + bi[row];
596       ap2      = ba + bi[row];
597       rmax2    = bimax[row];
598       nrow2    = bilen[row];
599       low2     = 0;
600       high2    = nrow2;
601 
602       for (j=0; j<n; j++) {
603         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
604         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
605         if (in[j] >= cstart && in[j] < cend) {
606           col   = in[j] - cstart;
607           nonew = a->nonew;
608           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
609 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
610           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
611 #endif
612         } else if (in[j] < 0) continue;
613 #if defined(PETSC_USE_DEBUG)
614         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
615 #endif
616         else {
617           if (mat->was_assembled) {
618             if (!aij->colmap) {
619               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
620             }
621 #if defined(PETSC_USE_CTABLE)
622             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
623             col--;
624 #else
625             col = aij->colmap[in[j]] - 1;
626 #endif
627             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
628               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
629               col  =  in[j];
630               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
631               B        = aij->B;
632               b        = (Mat_SeqAIJ*)B->data;
633               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
634               rp2      = bj + bi[row];
635               ap2      = ba + bi[row];
636               rmax2    = bimax[row];
637               nrow2    = bilen[row];
638               low2     = 0;
639               high2    = nrow2;
640               bm       = aij->B->rmap->n;
641               ba       = b->a;
642               inserted = PETSC_FALSE;
643             } else if (col < 0) {
644               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
645                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
646               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
647             }
648           } else col = in[j];
649           nonew = b->nonew;
650           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
651 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
652           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
653 #endif
654         }
655       }
656     } else {
657       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
658       if (!aij->donotstash) {
659         mat->assembled = PETSC_FALSE;
660         if (roworiented) {
661           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
662         } else {
663           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
664         }
665       }
666     }
667   }
668   PetscFunctionReturn(0);
669 }
670 
671 /*
672     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
673     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
674     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
675 */
676 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
677 {
678   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
679   Mat            A           = aij->A; /* diagonal part of the matrix */
680   Mat            B           = aij->B; /* offdiagonal part of the matrix */
681   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
682   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
683   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
684   PetscInt       *ailen      = a->ilen,*aj = a->j;
685   PetscInt       *bilen      = b->ilen,*bj = b->j;
686   PetscInt       am          = aij->A->rmap->n,j;
687   PetscInt       diag_so_far = 0,dnz;
688   PetscInt       offd_so_far = 0,onz;
689 
690   PetscFunctionBegin;
691   /* Iterate over all rows of the matrix */
692   for (j=0; j<am; j++) {
693     dnz = onz = 0;
694     /*  Iterate over all non-zero columns of the current row */
695     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
696       /* If column is in the diagonal */
697       if (mat_j[col] >= cstart && mat_j[col] < cend) {
698         aj[diag_so_far++] = mat_j[col] - cstart;
699         dnz++;
700       } else { /* off-diagonal entries */
701         bj[offd_so_far++] = mat_j[col];
702         onz++;
703       }
704     }
705     ailen[j] = dnz;
706     bilen[j] = onz;
707   }
708   PetscFunctionReturn(0);
709 }
710 
711 /*
712     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
713     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
714     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
715     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
716     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
717 */
718 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
719 {
720   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
721   Mat            A      = aij->A; /* diagonal part of the matrix */
722   Mat            B      = aij->B; /* offdiagonal part of the matrix */
723   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
724   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
725   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
726   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
727   PetscInt       *ailen = a->ilen,*aj = a->j;
728   PetscInt       *bilen = b->ilen,*bj = b->j;
729   PetscInt       am     = aij->A->rmap->n,j;
730   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
731   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
732   PetscScalar    *aa = a->a,*ba = b->a;
733 
734   PetscFunctionBegin;
735   /* Iterate over all rows of the matrix */
736   for (j=0; j<am; j++) {
737     dnz_row = onz_row = 0;
738     rowstart_offd = full_offd_i[j];
739     rowstart_diag = full_diag_i[j];
740     /*  Iterate over all non-zero columns of the current row */
741     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
742       /* If column is in the diagonal */
743       if (mat_j[col] >= cstart && mat_j[col] < cend) {
744         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
745         aa[rowstart_diag+dnz_row] = mat_a[col];
746         dnz_row++;
747       } else { /* off-diagonal entries */
748         bj[rowstart_offd+onz_row] = mat_j[col];
749         ba[rowstart_offd+onz_row] = mat_a[col];
750         onz_row++;
751       }
752     }
753     ailen[j] = dnz_row;
754     bilen[j] = onz_row;
755   }
756   PetscFunctionReturn(0);
757 }
758 
759 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
760 {
761   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
762   PetscErrorCode ierr;
763   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
764   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
765 
766   PetscFunctionBegin;
767   for (i=0; i<m; i++) {
768     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
769     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
770     if (idxm[i] >= rstart && idxm[i] < rend) {
771       row = idxm[i] - rstart;
772       for (j=0; j<n; j++) {
773         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
774         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
775         if (idxn[j] >= cstart && idxn[j] < cend) {
776           col  = idxn[j] - cstart;
777           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
778         } else {
779           if (!aij->colmap) {
780             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
781           }
782 #if defined(PETSC_USE_CTABLE)
783           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
784           col--;
785 #else
786           col = aij->colmap[idxn[j]] - 1;
787 #endif
788           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
789           else {
790             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
791           }
792         }
793       }
794     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
795   }
796   PetscFunctionReturn(0);
797 }
798 
799 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
800 
801 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
802 {
803   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
804   PetscErrorCode ierr;
805   PetscInt       nstash,reallocs;
806 
807   PetscFunctionBegin;
808   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
809 
810   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
811   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
812   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
813   PetscFunctionReturn(0);
814 }
815 
816 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
817 {
818   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
819   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
820   PetscErrorCode ierr;
821   PetscMPIInt    n;
822   PetscInt       i,j,rstart,ncols,flg;
823   PetscInt       *row,*col;
824   PetscBool      other_disassembled;
825   PetscScalar    *val;
826 
827   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
828 
829   PetscFunctionBegin;
830   if (!aij->donotstash && !mat->nooffprocentries) {
831     while (1) {
832       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
833       if (!flg) break;
834 
835       for (i=0; i<n; ) {
836         /* Now identify the consecutive vals belonging to the same row */
837         for (j=i,rstart=row[j]; j<n; j++) {
838           if (row[j] != rstart) break;
839         }
840         if (j < n) ncols = j-i;
841         else       ncols = n-i;
842         /* Now assemble all these values with a single function call */
843         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
844 
845         i = j;
846       }
847     }
848     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
849   }
850 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
851   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
852   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
853   if (mat->boundtocpu) {
854     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
855     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
856   }
857 #endif
858   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
859   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
860 
861   /* determine if any processor has disassembled, if so we must
862      also disassemble ourself, in order that we may reassemble. */
863   /*
864      if nonzero structure of submatrix B cannot change then we know that
865      no processor disassembled thus we can skip this stuff
866   */
867   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
868     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
869     if (mat->was_assembled && !other_disassembled) {
870 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
871       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
872 #endif
873       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
874     }
875   }
876   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
877     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
878   }
879   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
880 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
881   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
882 #endif
883   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
884   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
885 
886   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
887 
888   aij->rowvalues = 0;
889 
890   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
891   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
892 
893   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
894   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
895     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
896     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
897   }
898 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
899   mat->offloadmask = PETSC_OFFLOAD_BOTH;
900 #endif
901   PetscFunctionReturn(0);
902 }
903 
904 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
905 {
906   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
907   PetscErrorCode ierr;
908 
909   PetscFunctionBegin;
910   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
911   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
912   PetscFunctionReturn(0);
913 }
914 
915 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
916 {
917   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
918   PetscObjectState sA, sB;
919   PetscInt        *lrows;
920   PetscInt         r, len;
921   PetscBool        cong, lch, gch;
922   PetscErrorCode   ierr;
923 
924   PetscFunctionBegin;
925   /* get locally owned rows */
926   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
927   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
928   /* fix right hand side if needed */
929   if (x && b) {
930     const PetscScalar *xx;
931     PetscScalar       *bb;
932 
933     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
934     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
935     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
936     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
937     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
938     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
939   }
940 
941   sA = mat->A->nonzerostate;
942   sB = mat->B->nonzerostate;
943 
944   if (diag != 0.0 && cong) {
945     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
946     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
947   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
948     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
949     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
950     PetscInt   nnwA, nnwB;
951     PetscBool  nnzA, nnzB;
952 
953     nnwA = aijA->nonew;
954     nnwB = aijB->nonew;
955     nnzA = aijA->keepnonzeropattern;
956     nnzB = aijB->keepnonzeropattern;
957     if (!nnzA) {
958       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
959       aijA->nonew = 0;
960     }
961     if (!nnzB) {
962       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
963       aijB->nonew = 0;
964     }
965     /* Must zero here before the next loop */
966     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
967     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
968     for (r = 0; r < len; ++r) {
969       const PetscInt row = lrows[r] + A->rmap->rstart;
970       if (row >= A->cmap->N) continue;
971       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
972     }
973     aijA->nonew = nnwA;
974     aijB->nonew = nnwB;
975   } else {
976     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
977     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
978   }
979   ierr = PetscFree(lrows);CHKERRQ(ierr);
980   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
981   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
982 
983   /* reduce nonzerostate */
984   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
985   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
986   if (gch) A->nonzerostate++;
987   PetscFunctionReturn(0);
988 }
989 
990 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
991 {
992   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
993   PetscErrorCode    ierr;
994   PetscMPIInt       n = A->rmap->n;
995   PetscInt          i,j,r,m,len = 0;
996   PetscInt          *lrows,*owners = A->rmap->range;
997   PetscMPIInt       p = 0;
998   PetscSFNode       *rrows;
999   PetscSF           sf;
1000   const PetscScalar *xx;
1001   PetscScalar       *bb,*mask;
1002   Vec               xmask,lmask;
1003   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
1004   const PetscInt    *aj, *ii,*ridx;
1005   PetscScalar       *aa;
1006 
1007   PetscFunctionBegin;
1008   /* Create SF where leaves are input rows and roots are owned rows */
1009   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1010   for (r = 0; r < n; ++r) lrows[r] = -1;
1011   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1012   for (r = 0; r < N; ++r) {
1013     const PetscInt idx   = rows[r];
1014     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1015     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1016       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1017     }
1018     rrows[r].rank  = p;
1019     rrows[r].index = rows[r] - owners[p];
1020   }
1021   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1022   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1023   /* Collect flags for rows to be zeroed */
1024   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1025   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1026   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1027   /* Compress and put in row numbers */
1028   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1029   /* zero diagonal part of matrix */
1030   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1031   /* handle off diagonal part of matrix */
1032   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1033   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1034   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1035   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1036   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1037   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1038   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1039   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1040   if (x && b) { /* this code is buggy when the row and column layout don't match */
1041     PetscBool cong;
1042 
1043     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1044     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1045     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1046     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1047     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1048     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1049   }
1050   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1051   /* remove zeroed rows of off diagonal matrix */
1052   ii = aij->i;
1053   for (i=0; i<len; i++) {
1054     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1055   }
1056   /* loop over all elements of off process part of matrix zeroing removed columns*/
1057   if (aij->compressedrow.use) {
1058     m    = aij->compressedrow.nrows;
1059     ii   = aij->compressedrow.i;
1060     ridx = aij->compressedrow.rindex;
1061     for (i=0; i<m; i++) {
1062       n  = ii[i+1] - ii[i];
1063       aj = aij->j + ii[i];
1064       aa = aij->a + ii[i];
1065 
1066       for (j=0; j<n; j++) {
1067         if (PetscAbsScalar(mask[*aj])) {
1068           if (b) bb[*ridx] -= *aa*xx[*aj];
1069           *aa = 0.0;
1070         }
1071         aa++;
1072         aj++;
1073       }
1074       ridx++;
1075     }
1076   } else { /* do not use compressed row format */
1077     m = l->B->rmap->n;
1078     for (i=0; i<m; i++) {
1079       n  = ii[i+1] - ii[i];
1080       aj = aij->j + ii[i];
1081       aa = aij->a + ii[i];
1082       for (j=0; j<n; j++) {
1083         if (PetscAbsScalar(mask[*aj])) {
1084           if (b) bb[i] -= *aa*xx[*aj];
1085           *aa = 0.0;
1086         }
1087         aa++;
1088         aj++;
1089       }
1090     }
1091   }
1092   if (x && b) {
1093     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1094     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1095   }
1096   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1097   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1098   ierr = PetscFree(lrows);CHKERRQ(ierr);
1099 
1100   /* only change matrix nonzero state if pattern was allowed to be changed */
1101   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1102     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1103     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1104   }
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1109 {
1110   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1111   PetscErrorCode ierr;
1112   PetscInt       nt;
1113   VecScatter     Mvctx = a->Mvctx;
1114 
1115   PetscFunctionBegin;
1116   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1117   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1118 
1119   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1120   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1121   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1122   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1127 {
1128   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1129   PetscErrorCode ierr;
1130 
1131   PetscFunctionBegin;
1132   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1137 {
1138   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1139   PetscErrorCode ierr;
1140   VecScatter     Mvctx = a->Mvctx;
1141 
1142   PetscFunctionBegin;
1143   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1144   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1145   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1146   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1147   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1148   PetscFunctionReturn(0);
1149 }
1150 
1151 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1152 {
1153   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1154   PetscErrorCode ierr;
1155 
1156   PetscFunctionBegin;
1157   /* do nondiagonal part */
1158   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1159   /* do local part */
1160   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1161   /* add partial results together */
1162   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1163   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1168 {
1169   MPI_Comm       comm;
1170   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1171   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1172   IS             Me,Notme;
1173   PetscErrorCode ierr;
1174   PetscInt       M,N,first,last,*notme,i;
1175   PetscBool      lf;
1176   PetscMPIInt    size;
1177 
1178   PetscFunctionBegin;
1179   /* Easy test: symmetric diagonal block */
1180   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1181   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1182   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1183   if (!*f) PetscFunctionReturn(0);
1184   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1185   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1186   if (size == 1) PetscFunctionReturn(0);
1187 
1188   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1189   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1190   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1191   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1192   for (i=0; i<first; i++) notme[i] = i;
1193   for (i=last; i<M; i++) notme[i-last+first] = i;
1194   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1195   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1196   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1197   Aoff = Aoffs[0];
1198   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1199   Boff = Boffs[0];
1200   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1201   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1202   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1203   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1204   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1205   ierr = PetscFree(notme);CHKERRQ(ierr);
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1210 {
1211   PetscErrorCode ierr;
1212 
1213   PetscFunctionBegin;
1214   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1219 {
1220   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1221   PetscErrorCode ierr;
1222 
1223   PetscFunctionBegin;
1224   /* do nondiagonal part */
1225   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1226   /* do local part */
1227   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1228   /* add partial results together */
1229   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1230   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 /*
1235   This only works correctly for square matrices where the subblock A->A is the
1236    diagonal block
1237 */
1238 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1239 {
1240   PetscErrorCode ierr;
1241   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1242 
1243   PetscFunctionBegin;
1244   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1245   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1246   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1251 {
1252   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1253   PetscErrorCode ierr;
1254 
1255   PetscFunctionBegin;
1256   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1257   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1262 {
1263   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1264   PetscErrorCode ierr;
1265 
1266   PetscFunctionBegin;
1267 #if defined(PETSC_USE_LOG)
1268   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1269 #endif
1270   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1271   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1272   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1273   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1274 #if defined(PETSC_USE_CTABLE)
1275   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1276 #else
1277   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1278 #endif
1279   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1280   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1281   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1282   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1283   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1284   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1285   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1286 
1287   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1297 #if defined(PETSC_HAVE_ELEMENTAL)
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1299 #endif
1300 #if defined(PETSC_HAVE_HYPRE)
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1303 #endif
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1311 {
1312   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1313   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1314   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1315   const PetscInt    *garray = aij->garray;
1316   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1317   PetscInt          *rowlens;
1318   PetscInt          *colidxs;
1319   PetscScalar       *matvals;
1320   PetscErrorCode    ierr;
1321 
1322   PetscFunctionBegin;
1323   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1324 
1325   M  = mat->rmap->N;
1326   N  = mat->cmap->N;
1327   m  = mat->rmap->n;
1328   rs = mat->rmap->rstart;
1329   cs = mat->cmap->rstart;
1330   nz = A->nz + B->nz;
1331 
1332   /* write matrix header */
1333   header[0] = MAT_FILE_CLASSID;
1334   header[1] = M; header[2] = N; header[3] = nz;
1335   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1336   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1337 
1338   /* fill in and store row lengths  */
1339   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1340   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1341   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1342   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1343 
1344   /* fill in and store column indices */
1345   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1346   for (cnt=0, i=0; i<m; i++) {
1347     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1348       if (garray[B->j[jb]] > cs) break;
1349       colidxs[cnt++] = garray[B->j[jb]];
1350     }
1351     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1352       colidxs[cnt++] = A->j[ja] + cs;
1353     for (; jb<B->i[i+1]; jb++)
1354       colidxs[cnt++] = garray[B->j[jb]];
1355   }
1356   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1357   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1358   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1359 
1360   /* fill in and store nonzero values */
1361   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1362   for (cnt=0, i=0; i<m; i++) {
1363     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1364       if (garray[B->j[jb]] > cs) break;
1365       matvals[cnt++] = B->a[jb];
1366     }
1367     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1368       matvals[cnt++] = A->a[ja];
1369     for (; jb<B->i[i+1]; jb++)
1370       matvals[cnt++] = B->a[jb];
1371   }
1372   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1373   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1374   ierr = PetscFree(matvals);CHKERRQ(ierr);
1375 
1376   /* write block size option to the viewer's .info file */
1377   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 #include <petscdraw.h>
1382 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1383 {
1384   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1385   PetscErrorCode    ierr;
1386   PetscMPIInt       rank = aij->rank,size = aij->size;
1387   PetscBool         isdraw,iascii,isbinary;
1388   PetscViewer       sviewer;
1389   PetscViewerFormat format;
1390 
1391   PetscFunctionBegin;
1392   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1393   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1394   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1395   if (iascii) {
1396     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1397     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1398       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1399       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1400       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1401       for (i=0; i<(PetscInt)size; i++) {
1402         nmax = PetscMax(nmax,nz[i]);
1403         nmin = PetscMin(nmin,nz[i]);
1404         navg += nz[i];
1405       }
1406       ierr = PetscFree(nz);CHKERRQ(ierr);
1407       navg = navg/size;
1408       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1409       PetscFunctionReturn(0);
1410     }
1411     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1412     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1413       MatInfo   info;
1414       PetscBool inodes;
1415 
1416       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1417       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1418       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1419       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1420       if (!inodes) {
1421         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1422                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1423       } else {
1424         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1425                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1426       }
1427       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1428       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1429       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1430       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1431       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1432       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1433       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1434       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1435       PetscFunctionReturn(0);
1436     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1437       PetscInt inodecount,inodelimit,*inodes;
1438       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1439       if (inodes) {
1440         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1441       } else {
1442         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1443       }
1444       PetscFunctionReturn(0);
1445     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1446       PetscFunctionReturn(0);
1447     }
1448   } else if (isbinary) {
1449     if (size == 1) {
1450       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1451       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1452     } else {
1453       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1454     }
1455     PetscFunctionReturn(0);
1456   } else if (iascii && size == 1) {
1457     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1458     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1459     PetscFunctionReturn(0);
1460   } else if (isdraw) {
1461     PetscDraw draw;
1462     PetscBool isnull;
1463     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1464     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1465     if (isnull) PetscFunctionReturn(0);
1466   }
1467 
1468   { /* assemble the entire matrix onto first processor */
1469     Mat A = NULL, Av;
1470     IS  isrow,iscol;
1471 
1472     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1473     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1474     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1475     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1476 /*  The commented code uses MatCreateSubMatrices instead */
1477 /*
1478     Mat *AA, A = NULL, Av;
1479     IS  isrow,iscol;
1480 
1481     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1482     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1483     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1484     if (!rank) {
1485        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1486        A    = AA[0];
1487        Av   = AA[0];
1488     }
1489     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1490 */
1491     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1492     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1493     /*
1494        Everyone has to call to draw the matrix since the graphics waits are
1495        synchronized across all processors that share the PetscDraw object
1496     */
1497     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1498     if (!rank) {
1499       if (((PetscObject)mat)->name) {
1500         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1501       }
1502       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1503     }
1504     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1505     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1506     ierr = MatDestroy(&A);CHKERRQ(ierr);
1507   }
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1512 {
1513   PetscErrorCode ierr;
1514   PetscBool      iascii,isdraw,issocket,isbinary;
1515 
1516   PetscFunctionBegin;
1517   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1518   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1519   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1520   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1521   if (iascii || isdraw || isbinary || issocket) {
1522     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1523   }
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1528 {
1529   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1530   PetscErrorCode ierr;
1531   Vec            bb1 = 0;
1532   PetscBool      hasop;
1533 
1534   PetscFunctionBegin;
1535   if (flag == SOR_APPLY_UPPER) {
1536     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1537     PetscFunctionReturn(0);
1538   }
1539 
1540   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1541     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1542   }
1543 
1544   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1545     if (flag & SOR_ZERO_INITIAL_GUESS) {
1546       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1547       its--;
1548     }
1549 
1550     while (its--) {
1551       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1552       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1553 
1554       /* update rhs: bb1 = bb - B*x */
1555       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1556       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1557 
1558       /* local sweep */
1559       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1560     }
1561   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1562     if (flag & SOR_ZERO_INITIAL_GUESS) {
1563       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1564       its--;
1565     }
1566     while (its--) {
1567       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1568       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1569 
1570       /* update rhs: bb1 = bb - B*x */
1571       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1572       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1573 
1574       /* local sweep */
1575       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1576     }
1577   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1578     if (flag & SOR_ZERO_INITIAL_GUESS) {
1579       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1580       its--;
1581     }
1582     while (its--) {
1583       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1584       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1585 
1586       /* update rhs: bb1 = bb - B*x */
1587       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1588       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1589 
1590       /* local sweep */
1591       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1592     }
1593   } else if (flag & SOR_EISENSTAT) {
1594     Vec xx1;
1595 
1596     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1597     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1598 
1599     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1600     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1601     if (!mat->diag) {
1602       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1603       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1604     }
1605     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1606     if (hasop) {
1607       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1608     } else {
1609       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1610     }
1611     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1612 
1613     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1614 
1615     /* local sweep */
1616     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1617     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1618     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1619   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1620 
1621   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1622 
1623   matin->factorerrortype = mat->A->factorerrortype;
1624   PetscFunctionReturn(0);
1625 }
1626 
1627 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1628 {
1629   Mat            aA,aB,Aperm;
1630   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1631   PetscScalar    *aa,*ba;
1632   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1633   PetscSF        rowsf,sf;
1634   IS             parcolp = NULL;
1635   PetscBool      done;
1636   PetscErrorCode ierr;
1637 
1638   PetscFunctionBegin;
1639   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1640   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1641   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1642   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1643 
1644   /* Invert row permutation to find out where my rows should go */
1645   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1646   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1647   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1648   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1649   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1650   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1651 
1652   /* Invert column permutation to find out where my columns should go */
1653   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1654   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1655   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1656   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1657   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1658   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1659   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1660 
1661   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1662   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1663   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1664 
1665   /* Find out where my gcols should go */
1666   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1667   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1668   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1669   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1670   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1671   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1672   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1673   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1674 
1675   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1676   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1677   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1678   for (i=0; i<m; i++) {
1679     PetscInt    row = rdest[i];
1680     PetscMPIInt rowner;
1681     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1682     for (j=ai[i]; j<ai[i+1]; j++) {
1683       PetscInt    col = cdest[aj[j]];
1684       PetscMPIInt cowner;
1685       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1686       if (rowner == cowner) dnnz[i]++;
1687       else onnz[i]++;
1688     }
1689     for (j=bi[i]; j<bi[i+1]; j++) {
1690       PetscInt    col = gcdest[bj[j]];
1691       PetscMPIInt cowner;
1692       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1693       if (rowner == cowner) dnnz[i]++;
1694       else onnz[i]++;
1695     }
1696   }
1697   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1698   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1699   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1700   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1701   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1702 
1703   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1704   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1705   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1706   for (i=0; i<m; i++) {
1707     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1708     PetscInt j0,rowlen;
1709     rowlen = ai[i+1] - ai[i];
1710     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1711       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1712       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1713     }
1714     rowlen = bi[i+1] - bi[i];
1715     for (j0=j=0; j<rowlen; j0=j) {
1716       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1717       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1718     }
1719   }
1720   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1721   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1722   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1723   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1724   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1725   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1726   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1727   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1728   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1729   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1730   *B = Aperm;
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1735 {
1736   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1737   PetscErrorCode ierr;
1738 
1739   PetscFunctionBegin;
1740   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1741   if (ghosts) *ghosts = aij->garray;
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1746 {
1747   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1748   Mat            A    = mat->A,B = mat->B;
1749   PetscErrorCode ierr;
1750   PetscLogDouble isend[5],irecv[5];
1751 
1752   PetscFunctionBegin;
1753   info->block_size = 1.0;
1754   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1755 
1756   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1757   isend[3] = info->memory;  isend[4] = info->mallocs;
1758 
1759   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1760 
1761   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1762   isend[3] += info->memory;  isend[4] += info->mallocs;
1763   if (flag == MAT_LOCAL) {
1764     info->nz_used      = isend[0];
1765     info->nz_allocated = isend[1];
1766     info->nz_unneeded  = isend[2];
1767     info->memory       = isend[3];
1768     info->mallocs      = isend[4];
1769   } else if (flag == MAT_GLOBAL_MAX) {
1770     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1771 
1772     info->nz_used      = irecv[0];
1773     info->nz_allocated = irecv[1];
1774     info->nz_unneeded  = irecv[2];
1775     info->memory       = irecv[3];
1776     info->mallocs      = irecv[4];
1777   } else if (flag == MAT_GLOBAL_SUM) {
1778     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1779 
1780     info->nz_used      = irecv[0];
1781     info->nz_allocated = irecv[1];
1782     info->nz_unneeded  = irecv[2];
1783     info->memory       = irecv[3];
1784     info->mallocs      = irecv[4];
1785   }
1786   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1787   info->fill_ratio_needed = 0;
1788   info->factor_mallocs    = 0;
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1793 {
1794   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1795   PetscErrorCode ierr;
1796 
1797   PetscFunctionBegin;
1798   switch (op) {
1799   case MAT_NEW_NONZERO_LOCATIONS:
1800   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1801   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1802   case MAT_KEEP_NONZERO_PATTERN:
1803   case MAT_NEW_NONZERO_LOCATION_ERR:
1804   case MAT_IGNORE_ZERO_ENTRIES:
1805     MatCheckPreallocated(A,1);
1806     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1807     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1808     break;
1809   case MAT_USE_INODES:
1810     if (PetscUnlikely(!(A)->preallocated)) {
1811       a->inode_setoption = PETSC_TRUE; /* option will be set in MatMPIAIJSetPreallocation_MPIAIJ() */
1812       a->inode_use       = flg;
1813     } else {
1814       a->inode_setoption = PETSC_FALSE;
1815       ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1816       ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1817     }
1818     break;
1819   case MAT_ROW_ORIENTED:
1820     MatCheckPreallocated(A,1);
1821     a->roworiented = flg;
1822 
1823     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1824     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1825     break;
1826   case MAT_NEW_DIAGONALS:
1827   case MAT_SORTED_FULL:
1828     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1829     break;
1830   case MAT_IGNORE_OFF_PROC_ENTRIES:
1831     a->donotstash = flg;
1832     break;
1833   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1834   case MAT_SPD:
1835   case MAT_SYMMETRIC:
1836   case MAT_STRUCTURALLY_SYMMETRIC:
1837   case MAT_HERMITIAN:
1838   case MAT_SYMMETRY_ETERNAL:
1839     break;
1840   case MAT_SUBMAT_SINGLEIS:
1841     A->submat_singleis = flg;
1842     break;
1843   case MAT_STRUCTURE_ONLY:
1844     /* The option is handled directly by MatSetOption() */
1845     break;
1846   default:
1847     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1848   }
1849   PetscFunctionReturn(0);
1850 }
1851 
1852 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1853 {
1854   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1855   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1856   PetscErrorCode ierr;
1857   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1858   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1859   PetscInt       *cmap,*idx_p;
1860 
1861   PetscFunctionBegin;
1862   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1863   mat->getrowactive = PETSC_TRUE;
1864 
1865   if (!mat->rowvalues && (idx || v)) {
1866     /*
1867         allocate enough space to hold information from the longest row.
1868     */
1869     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1870     PetscInt   max = 1,tmp;
1871     for (i=0; i<matin->rmap->n; i++) {
1872       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1873       if (max < tmp) max = tmp;
1874     }
1875     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1876   }
1877 
1878   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1879   lrow = row - rstart;
1880 
1881   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1882   if (!v)   {pvA = 0; pvB = 0;}
1883   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1884   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1885   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1886   nztot = nzA + nzB;
1887 
1888   cmap = mat->garray;
1889   if (v  || idx) {
1890     if (nztot) {
1891       /* Sort by increasing column numbers, assuming A and B already sorted */
1892       PetscInt imark = -1;
1893       if (v) {
1894         *v = v_p = mat->rowvalues;
1895         for (i=0; i<nzB; i++) {
1896           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1897           else break;
1898         }
1899         imark = i;
1900         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1901         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1902       }
1903       if (idx) {
1904         *idx = idx_p = mat->rowindices;
1905         if (imark > -1) {
1906           for (i=0; i<imark; i++) {
1907             idx_p[i] = cmap[cworkB[i]];
1908           }
1909         } else {
1910           for (i=0; i<nzB; i++) {
1911             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1912             else break;
1913           }
1914           imark = i;
1915         }
1916         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1917         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1918       }
1919     } else {
1920       if (idx) *idx = 0;
1921       if (v)   *v   = 0;
1922     }
1923   }
1924   *nz  = nztot;
1925   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1926   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1931 {
1932   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1933 
1934   PetscFunctionBegin;
1935   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1936   aij->getrowactive = PETSC_FALSE;
1937   PetscFunctionReturn(0);
1938 }
1939 
1940 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1941 {
1942   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1943   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1944   PetscErrorCode ierr;
1945   PetscInt       i,j,cstart = mat->cmap->rstart;
1946   PetscReal      sum = 0.0;
1947   MatScalar      *v;
1948 
1949   PetscFunctionBegin;
1950   if (aij->size == 1) {
1951     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1952   } else {
1953     if (type == NORM_FROBENIUS) {
1954       v = amat->a;
1955       for (i=0; i<amat->nz; i++) {
1956         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1957       }
1958       v = bmat->a;
1959       for (i=0; i<bmat->nz; i++) {
1960         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1961       }
1962       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1963       *norm = PetscSqrtReal(*norm);
1964       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1965     } else if (type == NORM_1) { /* max column norm */
1966       PetscReal *tmp,*tmp2;
1967       PetscInt  *jj,*garray = aij->garray;
1968       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1969       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1970       *norm = 0.0;
1971       v     = amat->a; jj = amat->j;
1972       for (j=0; j<amat->nz; j++) {
1973         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1974       }
1975       v = bmat->a; jj = bmat->j;
1976       for (j=0; j<bmat->nz; j++) {
1977         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1978       }
1979       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1980       for (j=0; j<mat->cmap->N; j++) {
1981         if (tmp2[j] > *norm) *norm = tmp2[j];
1982       }
1983       ierr = PetscFree(tmp);CHKERRQ(ierr);
1984       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1985       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1986     } else if (type == NORM_INFINITY) { /* max row norm */
1987       PetscReal ntemp = 0.0;
1988       for (j=0; j<aij->A->rmap->n; j++) {
1989         v   = amat->a + amat->i[j];
1990         sum = 0.0;
1991         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1992           sum += PetscAbsScalar(*v); v++;
1993         }
1994         v = bmat->a + bmat->i[j];
1995         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1996           sum += PetscAbsScalar(*v); v++;
1997         }
1998         if (sum > ntemp) ntemp = sum;
1999       }
2000       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2001       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2002     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2003   }
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2008 {
2009   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2010   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2011   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2012   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2013   PetscErrorCode  ierr;
2014   Mat             B,A_diag,*B_diag;
2015   const MatScalar *array;
2016 
2017   PetscFunctionBegin;
2018   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2019   ai = Aloc->i; aj = Aloc->j;
2020   bi = Bloc->i; bj = Bloc->j;
2021   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2022     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2023     PetscSFNode          *oloc;
2024     PETSC_UNUSED PetscSF sf;
2025 
2026     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2027     /* compute d_nnz for preallocation */
2028     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2029     for (i=0; i<ai[ma]; i++) {
2030       d_nnz[aj[i]]++;
2031     }
2032     /* compute local off-diagonal contributions */
2033     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2034     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2035     /* map those to global */
2036     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2037     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2038     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2039     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2040     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2041     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2042     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2043 
2044     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2045     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2046     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2047     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2048     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2049     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2050   } else {
2051     B    = *matout;
2052     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2053   }
2054 
2055   b           = (Mat_MPIAIJ*)B->data;
2056   A_diag      = a->A;
2057   B_diag      = &b->A;
2058   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2059   A_diag_ncol = A_diag->cmap->N;
2060   B_diag_ilen = sub_B_diag->ilen;
2061   B_diag_i    = sub_B_diag->i;
2062 
2063   /* Set ilen for diagonal of B */
2064   for (i=0; i<A_diag_ncol; i++) {
2065     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2066   }
2067 
2068   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2069   very quickly (=without using MatSetValues), because all writes are local. */
2070   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2071 
2072   /* copy over the B part */
2073   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2074   array = Bloc->a;
2075   row   = A->rmap->rstart;
2076   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2077   cols_tmp = cols;
2078   for (i=0; i<mb; i++) {
2079     ncol = bi[i+1]-bi[i];
2080     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2081     row++;
2082     array += ncol; cols_tmp += ncol;
2083   }
2084   ierr = PetscFree(cols);CHKERRQ(ierr);
2085 
2086   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2087   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2088   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2089     *matout = B;
2090   } else {
2091     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2092   }
2093   PetscFunctionReturn(0);
2094 }
2095 
2096 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2097 {
2098   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2099   Mat            a    = aij->A,b = aij->B;
2100   PetscErrorCode ierr;
2101   PetscInt       s1,s2,s3;
2102 
2103   PetscFunctionBegin;
2104   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2105   if (rr) {
2106     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2107     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2108     /* Overlap communication with computation. */
2109     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2110   }
2111   if (ll) {
2112     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2113     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2114     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2115   }
2116   /* scale  the diagonal block */
2117   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2118 
2119   if (rr) {
2120     /* Do a scatter end and then right scale the off-diagonal block */
2121     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2122     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2123   }
2124   PetscFunctionReturn(0);
2125 }
2126 
2127 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2128 {
2129   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2130   PetscErrorCode ierr;
2131 
2132   PetscFunctionBegin;
2133   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2134   PetscFunctionReturn(0);
2135 }
2136 
2137 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2138 {
2139   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2140   Mat            a,b,c,d;
2141   PetscBool      flg;
2142   PetscErrorCode ierr;
2143 
2144   PetscFunctionBegin;
2145   a = matA->A; b = matA->B;
2146   c = matB->A; d = matB->B;
2147 
2148   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2149   if (flg) {
2150     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2151   }
2152   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2153   PetscFunctionReturn(0);
2154 }
2155 
2156 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2157 {
2158   PetscErrorCode ierr;
2159   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2160   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2161 
2162   PetscFunctionBegin;
2163   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2164   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2165     /* because of the column compression in the off-processor part of the matrix a->B,
2166        the number of columns in a->B and b->B may be different, hence we cannot call
2167        the MatCopy() directly on the two parts. If need be, we can provide a more
2168        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2169        then copying the submatrices */
2170     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2171   } else {
2172     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2173     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2174   }
2175   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2180 {
2181   PetscErrorCode ierr;
2182 
2183   PetscFunctionBegin;
2184   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2185   PetscFunctionReturn(0);
2186 }
2187 
2188 /*
2189    Computes the number of nonzeros per row needed for preallocation when X and Y
2190    have different nonzero structure.
2191 */
2192 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2193 {
2194   PetscInt       i,j,k,nzx,nzy;
2195 
2196   PetscFunctionBegin;
2197   /* Set the number of nonzeros in the new matrix */
2198   for (i=0; i<m; i++) {
2199     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2200     nzx = xi[i+1] - xi[i];
2201     nzy = yi[i+1] - yi[i];
2202     nnz[i] = 0;
2203     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2204       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2205       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2206       nnz[i]++;
2207     }
2208     for (; k<nzy; k++) nnz[i]++;
2209   }
2210   PetscFunctionReturn(0);
2211 }
2212 
2213 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2214 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2215 {
2216   PetscErrorCode ierr;
2217   PetscInt       m = Y->rmap->N;
2218   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2219   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2220 
2221   PetscFunctionBegin;
2222   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2223   PetscFunctionReturn(0);
2224 }
2225 
2226 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2227 {
2228   PetscErrorCode ierr;
2229   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2230   PetscBLASInt   bnz,one=1;
2231   Mat_SeqAIJ     *x,*y;
2232 
2233   PetscFunctionBegin;
2234   if (str == SAME_NONZERO_PATTERN) {
2235     PetscScalar alpha = a;
2236     x    = (Mat_SeqAIJ*)xx->A->data;
2237     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2238     y    = (Mat_SeqAIJ*)yy->A->data;
2239     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2240     x    = (Mat_SeqAIJ*)xx->B->data;
2241     y    = (Mat_SeqAIJ*)yy->B->data;
2242     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2243     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2244     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2245     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2246        will be updated */
2247 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2248     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2249       Y->offloadmask = PETSC_OFFLOAD_CPU;
2250     }
2251 #endif
2252   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2253     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2254   } else {
2255     Mat      B;
2256     PetscInt *nnz_d,*nnz_o;
2257     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2258     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2259     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2260     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2261     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2262     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2263     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2264     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2265     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2266     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2267     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2268     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2269     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2270     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2271   }
2272   PetscFunctionReturn(0);
2273 }
2274 
2275 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2276 
2277 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2278 {
2279 #if defined(PETSC_USE_COMPLEX)
2280   PetscErrorCode ierr;
2281   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2282 
2283   PetscFunctionBegin;
2284   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2285   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2286 #else
2287   PetscFunctionBegin;
2288 #endif
2289   PetscFunctionReturn(0);
2290 }
2291 
2292 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2293 {
2294   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2295   PetscErrorCode ierr;
2296 
2297   PetscFunctionBegin;
2298   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2299   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2304 {
2305   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2306   PetscErrorCode ierr;
2307 
2308   PetscFunctionBegin;
2309   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2310   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2311   PetscFunctionReturn(0);
2312 }
2313 
2314 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2315 {
2316   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2317   PetscErrorCode ierr;
2318   PetscInt       i,*idxb = 0;
2319   PetscScalar    *va,*vb;
2320   Vec            vtmp;
2321 
2322   PetscFunctionBegin;
2323   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2324   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2325   if (idx) {
2326     for (i=0; i<A->rmap->n; i++) {
2327       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2328     }
2329   }
2330 
2331   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2332   if (idx) {
2333     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2334   }
2335   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2336   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2337 
2338   for (i=0; i<A->rmap->n; i++) {
2339     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2340       va[i] = vb[i];
2341       if (idx) idx[i] = a->garray[idxb[i]];
2342     }
2343   }
2344 
2345   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2346   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2347   ierr = PetscFree(idxb);CHKERRQ(ierr);
2348   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2349   PetscFunctionReturn(0);
2350 }
2351 
2352 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2353 {
2354   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2355   PetscErrorCode ierr;
2356   PetscInt       i,*idxb = 0;
2357   PetscScalar    *va,*vb;
2358   Vec            vtmp;
2359 
2360   PetscFunctionBegin;
2361   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2362   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2363   if (idx) {
2364     for (i=0; i<A->cmap->n; i++) {
2365       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2366     }
2367   }
2368 
2369   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2370   if (idx) {
2371     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2372   }
2373   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2374   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2375 
2376   for (i=0; i<A->rmap->n; i++) {
2377     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2378       va[i] = vb[i];
2379       if (idx) idx[i] = a->garray[idxb[i]];
2380     }
2381   }
2382 
2383   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2384   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2385   ierr = PetscFree(idxb);CHKERRQ(ierr);
2386   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2387   PetscFunctionReturn(0);
2388 }
2389 
2390 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2391 {
2392   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2393   PetscInt       n      = A->rmap->n;
2394   PetscInt       cstart = A->cmap->rstart;
2395   PetscInt       *cmap  = mat->garray;
2396   PetscInt       *diagIdx, *offdiagIdx;
2397   Vec            diagV, offdiagV;
2398   PetscScalar    *a, *diagA, *offdiagA;
2399   PetscInt       r;
2400   PetscErrorCode ierr;
2401 
2402   PetscFunctionBegin;
2403   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2404   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2405   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2406   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2407   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2408   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2409   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2410   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2411   for (r = 0; r < n; ++r) {
2412     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2413       a[r]   = diagA[r];
2414       idx[r] = cstart + diagIdx[r];
2415     } else {
2416       a[r]   = offdiagA[r];
2417       idx[r] = cmap[offdiagIdx[r]];
2418     }
2419   }
2420   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2421   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2422   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2423   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2424   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2425   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2426   PetscFunctionReturn(0);
2427 }
2428 
2429 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2430 {
2431   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2432   PetscInt       n      = A->rmap->n;
2433   PetscInt       cstart = A->cmap->rstart;
2434   PetscInt       *cmap  = mat->garray;
2435   PetscInt       *diagIdx, *offdiagIdx;
2436   Vec            diagV, offdiagV;
2437   PetscScalar    *a, *diagA, *offdiagA;
2438   PetscInt       r;
2439   PetscErrorCode ierr;
2440 
2441   PetscFunctionBegin;
2442   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2443   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2444   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2445   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2446   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2447   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2448   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2449   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2450   for (r = 0; r < n; ++r) {
2451     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2452       a[r]   = diagA[r];
2453       idx[r] = cstart + diagIdx[r];
2454     } else {
2455       a[r]   = offdiagA[r];
2456       idx[r] = cmap[offdiagIdx[r]];
2457     }
2458   }
2459   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2460   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2461   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2462   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2463   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2464   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2465   PetscFunctionReturn(0);
2466 }
2467 
2468 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2469 {
2470   PetscErrorCode ierr;
2471   Mat            *dummy;
2472 
2473   PetscFunctionBegin;
2474   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2475   *newmat = *dummy;
2476   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2477   PetscFunctionReturn(0);
2478 }
2479 
2480 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2481 {
2482   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2483   PetscErrorCode ierr;
2484 
2485   PetscFunctionBegin;
2486   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2487   A->factorerrortype = a->A->factorerrortype;
2488   PetscFunctionReturn(0);
2489 }
2490 
2491 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2492 {
2493   PetscErrorCode ierr;
2494   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2495 
2496   PetscFunctionBegin;
2497   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2498   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2499   if (x->assembled) {
2500     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2501   } else {
2502     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2503   }
2504   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2505   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2506   PetscFunctionReturn(0);
2507 }
2508 
2509 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2510 {
2511   PetscFunctionBegin;
2512   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2513   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 /*@
2518    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2519 
2520    Collective on Mat
2521 
2522    Input Parameters:
2523 +    A - the matrix
2524 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2525 
2526  Level: advanced
2527 
2528 @*/
2529 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2530 {
2531   PetscErrorCode       ierr;
2532 
2533   PetscFunctionBegin;
2534   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2535   PetscFunctionReturn(0);
2536 }
2537 
2538 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2539 {
2540   PetscErrorCode       ierr;
2541   PetscBool            sc = PETSC_FALSE,flg;
2542 
2543   PetscFunctionBegin;
2544   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2545   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2546   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2547   if (flg) {
2548     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2549   }
2550   ierr = PetscOptionsTail();CHKERRQ(ierr);
2551   PetscFunctionReturn(0);
2552 }
2553 
2554 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2555 {
2556   PetscErrorCode ierr;
2557   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2558   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2559 
2560   PetscFunctionBegin;
2561   if (!Y->preallocated) {
2562     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2563   } else if (!aij->nz) {
2564     PetscInt nonew = aij->nonew;
2565     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2566     aij->nonew = nonew;
2567   }
2568   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2569   PetscFunctionReturn(0);
2570 }
2571 
2572 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2573 {
2574   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2575   PetscErrorCode ierr;
2576 
2577   PetscFunctionBegin;
2578   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2579   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2580   if (d) {
2581     PetscInt rstart;
2582     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2583     *d += rstart;
2584 
2585   }
2586   PetscFunctionReturn(0);
2587 }
2588 
2589 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2590 {
2591   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2592   PetscErrorCode ierr;
2593 
2594   PetscFunctionBegin;
2595   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2596   PetscFunctionReturn(0);
2597 }
2598 
2599 /* -------------------------------------------------------------------*/
2600 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2601                                        MatGetRow_MPIAIJ,
2602                                        MatRestoreRow_MPIAIJ,
2603                                        MatMult_MPIAIJ,
2604                                 /* 4*/ MatMultAdd_MPIAIJ,
2605                                        MatMultTranspose_MPIAIJ,
2606                                        MatMultTransposeAdd_MPIAIJ,
2607                                        0,
2608                                        0,
2609                                        0,
2610                                 /*10*/ 0,
2611                                        0,
2612                                        0,
2613                                        MatSOR_MPIAIJ,
2614                                        MatTranspose_MPIAIJ,
2615                                 /*15*/ MatGetInfo_MPIAIJ,
2616                                        MatEqual_MPIAIJ,
2617                                        MatGetDiagonal_MPIAIJ,
2618                                        MatDiagonalScale_MPIAIJ,
2619                                        MatNorm_MPIAIJ,
2620                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2621                                        MatAssemblyEnd_MPIAIJ,
2622                                        MatSetOption_MPIAIJ,
2623                                        MatZeroEntries_MPIAIJ,
2624                                 /*24*/ MatZeroRows_MPIAIJ,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                        0,
2629                                 /*29*/ MatSetUp_MPIAIJ,
2630                                        0,
2631                                        0,
2632                                        MatGetDiagonalBlock_MPIAIJ,
2633                                        0,
2634                                 /*34*/ MatDuplicate_MPIAIJ,
2635                                        0,
2636                                        0,
2637                                        0,
2638                                        0,
2639                                 /*39*/ MatAXPY_MPIAIJ,
2640                                        MatCreateSubMatrices_MPIAIJ,
2641                                        MatIncreaseOverlap_MPIAIJ,
2642                                        MatGetValues_MPIAIJ,
2643                                        MatCopy_MPIAIJ,
2644                                 /*44*/ MatGetRowMax_MPIAIJ,
2645                                        MatScale_MPIAIJ,
2646                                        MatShift_MPIAIJ,
2647                                        MatDiagonalSet_MPIAIJ,
2648                                        MatZeroRowsColumns_MPIAIJ,
2649                                 /*49*/ MatSetRandom_MPIAIJ,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2655                                        0,
2656                                        MatSetUnfactored_MPIAIJ,
2657                                        MatPermute_MPIAIJ,
2658                                        0,
2659                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2660                                        MatDestroy_MPIAIJ,
2661                                        MatView_MPIAIJ,
2662                                        0,
2663                                        0,
2664                                 /*64*/ 0,
2665                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2670                                        MatGetRowMinAbs_MPIAIJ,
2671                                        0,
2672                                        0,
2673                                        0,
2674                                        0,
2675                                 /*75*/ MatFDColoringApply_AIJ,
2676                                        MatSetFromOptions_MPIAIJ,
2677                                        0,
2678                                        0,
2679                                        MatFindZeroDiagonals_MPIAIJ,
2680                                 /*80*/ 0,
2681                                        0,
2682                                        0,
2683                                 /*83*/ MatLoad_MPIAIJ,
2684                                        MatIsSymmetric_MPIAIJ,
2685                                        0,
2686                                        0,
2687                                        0,
2688                                        0,
2689                                 /*89*/ 0,
2690                                        0,
2691                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2692                                        0,
2693                                        0,
2694                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2695                                        0,
2696                                        0,
2697                                        0,
2698                                        MatBindToCPU_MPIAIJ,
2699                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2700                                        0,
2701                                        0,
2702                                        MatConjugate_MPIAIJ,
2703                                        0,
2704                                 /*104*/MatSetValuesRow_MPIAIJ,
2705                                        MatRealPart_MPIAIJ,
2706                                        MatImaginaryPart_MPIAIJ,
2707                                        0,
2708                                        0,
2709                                 /*109*/0,
2710                                        0,
2711                                        MatGetRowMin_MPIAIJ,
2712                                        0,
2713                                        MatMissingDiagonal_MPIAIJ,
2714                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2715                                        0,
2716                                        MatGetGhosts_MPIAIJ,
2717                                        0,
2718                                        0,
2719                                 /*119*/0,
2720                                        0,
2721                                        0,
2722                                        0,
2723                                        MatGetMultiProcBlock_MPIAIJ,
2724                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2725                                        MatGetColumnNorms_MPIAIJ,
2726                                        MatInvertBlockDiagonal_MPIAIJ,
2727                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2728                                        MatCreateSubMatricesMPI_MPIAIJ,
2729                                 /*129*/0,
2730                                        0,
2731                                        0,
2732                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2733                                        0,
2734                                 /*134*/0,
2735                                        0,
2736                                        0,
2737                                        0,
2738                                        0,
2739                                 /*139*/MatSetBlockSizes_MPIAIJ,
2740                                        0,
2741                                        0,
2742                                        MatFDColoringSetUp_MPIXAIJ,
2743                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2744                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2745                                 /*145*/0,
2746                                        0,
2747                                        0
2748 };
2749 
2750 /* ----------------------------------------------------------------------------------------*/
2751 
2752 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2753 {
2754   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2755   PetscErrorCode ierr;
2756 
2757   PetscFunctionBegin;
2758   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2759   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2760   PetscFunctionReturn(0);
2761 }
2762 
2763 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2764 {
2765   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2766   PetscErrorCode ierr;
2767 
2768   PetscFunctionBegin;
2769   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2770   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2771   PetscFunctionReturn(0);
2772 }
2773 
2774 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2775 {
2776   Mat_MPIAIJ     *b;
2777   PetscErrorCode ierr;
2778   PetscMPIInt    size;
2779 
2780   PetscFunctionBegin;
2781   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2782   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2783   b = (Mat_MPIAIJ*)B->data;
2784 
2785 #if defined(PETSC_USE_CTABLE)
2786   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2787 #else
2788   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2789 #endif
2790   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2791   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2792   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2793 
2794   /* Because the B will have been resized we simply destroy it and create a new one each time */
2795   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2796   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2797   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2798   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2799   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2800   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2801   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2802 
2803   if (!B->preallocated) {
2804     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2805     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2806     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2807     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2808     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2809   }
2810 
2811   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2812   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2813   B->preallocated  = PETSC_TRUE;
2814   B->was_assembled = PETSC_FALSE;
2815   B->assembled     = PETSC_FALSE;
2816 
2817   /* Set inode option */
2818   if (b->inode_setoption) {
2819     ierr = MatSetOption(b->A,MAT_USE_INODES,b->inode_use);CHKERRQ(ierr);
2820     ierr = MatSetOption(b->B,MAT_USE_INODES,b->inode_use);CHKERRQ(ierr);
2821   }
2822   PetscFunctionReturn(0);
2823 }
2824 
2825 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2826 {
2827   Mat_MPIAIJ     *b;
2828   PetscErrorCode ierr;
2829 
2830   PetscFunctionBegin;
2831   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2832   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2833   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2834   b = (Mat_MPIAIJ*)B->data;
2835 
2836 #if defined(PETSC_USE_CTABLE)
2837   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2838 #else
2839   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2840 #endif
2841   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2842   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2843   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2844 
2845   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2846   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2847   B->preallocated  = PETSC_TRUE;
2848   B->was_assembled = PETSC_FALSE;
2849   B->assembled = PETSC_FALSE;
2850   PetscFunctionReturn(0);
2851 }
2852 
2853 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2854 {
2855   Mat            mat;
2856   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2857   PetscErrorCode ierr;
2858 
2859   PetscFunctionBegin;
2860   *newmat = 0;
2861   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2862   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2863   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2864   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2865   a       = (Mat_MPIAIJ*)mat->data;
2866 
2867   mat->factortype   = matin->factortype;
2868   mat->assembled    = matin->assembled;
2869   mat->insertmode   = NOT_SET_VALUES;
2870   mat->preallocated = matin->preallocated;
2871 
2872   a->size         = oldmat->size;
2873   a->rank         = oldmat->rank;
2874   a->donotstash   = oldmat->donotstash;
2875   a->roworiented  = oldmat->roworiented;
2876   a->rowindices   = NULL;
2877   a->rowvalues    = NULL;
2878   a->getrowactive = PETSC_FALSE;
2879 
2880   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2881   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2882 
2883   if (oldmat->colmap) {
2884 #if defined(PETSC_USE_CTABLE)
2885     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2886 #else
2887     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2888     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2889     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2890 #endif
2891   } else a->colmap = NULL;
2892   if (oldmat->garray) {
2893     PetscInt len;
2894     len  = oldmat->B->cmap->n;
2895     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2896     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2897     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2898   } else a->garray = NULL;
2899 
2900   /* It may happen MatDuplicate is called with a non-assembled matrix
2901      In fact, MatDuplicate only requires the matrix to be preallocated
2902      This may happen inside a DMCreateMatrix_Shell */
2903   if (oldmat->lvec) {
2904     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2905     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2906   }
2907   if (oldmat->Mvctx) {
2908     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2909     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2910   }
2911   if (oldmat->Mvctx_mpi1) {
2912     ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2913     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2914   }
2915 
2916   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2917   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2918   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2919   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2920   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2921   *newmat = mat;
2922   PetscFunctionReturn(0);
2923 }
2924 
2925 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2926 {
2927   PetscBool      isbinary, ishdf5;
2928   PetscErrorCode ierr;
2929 
2930   PetscFunctionBegin;
2931   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2932   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2933   /* force binary viewer to load .info file if it has not yet done so */
2934   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2935   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2936   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2937   if (isbinary) {
2938     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2939   } else if (ishdf5) {
2940 #if defined(PETSC_HAVE_HDF5)
2941     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2942 #else
2943     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2944 #endif
2945   } else {
2946     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2947   }
2948   PetscFunctionReturn(0);
2949 }
2950 
2951 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
2952 {
2953   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
2954   PetscInt       *rowidxs,*colidxs;
2955   PetscScalar    *matvals;
2956   PetscErrorCode ierr;
2957 
2958   PetscFunctionBegin;
2959   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2960 
2961   /* read in matrix header */
2962   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2963   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
2964   M  = header[1]; N = header[2]; nz = header[3];
2965   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
2966   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
2967   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
2968 
2969   /* set block sizes from the viewer's .info file */
2970   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
2971   /* set global sizes if not set already */
2972   if (mat->rmap->N < 0) mat->rmap->N = M;
2973   if (mat->cmap->N < 0) mat->cmap->N = N;
2974   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
2975   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
2976 
2977   /* check if the matrix sizes are correct */
2978   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2979   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
2980 
2981   /* read in row lengths and build row indices */
2982   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
2983   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
2984   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
2985   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
2986   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
2987   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
2988   /* read in column indices and matrix values */
2989   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
2990   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
2991   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
2992   /* store matrix indices and values */
2993   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
2994   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
2995   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
2996   PetscFunctionReturn(0);
2997 }
2998 
2999 /* Not scalable because of ISAllGather() unless getting all columns. */
3000 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3001 {
3002   PetscErrorCode ierr;
3003   IS             iscol_local;
3004   PetscBool      isstride;
3005   PetscMPIInt    lisstride=0,gisstride;
3006 
3007   PetscFunctionBegin;
3008   /* check if we are grabbing all columns*/
3009   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3010 
3011   if (isstride) {
3012     PetscInt  start,len,mstart,mlen;
3013     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3014     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3015     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3016     if (mstart == start && mlen-mstart == len) lisstride = 1;
3017   }
3018 
3019   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3020   if (gisstride) {
3021     PetscInt N;
3022     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3023     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3024     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3025     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3026   } else {
3027     PetscInt cbs;
3028     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3029     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3030     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3031   }
3032 
3033   *isseq = iscol_local;
3034   PetscFunctionReturn(0);
3035 }
3036 
3037 /*
3038  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3039  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3040 
3041  Input Parameters:
3042    mat - matrix
3043    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3044            i.e., mat->rstart <= isrow[i] < mat->rend
3045    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3046            i.e., mat->cstart <= iscol[i] < mat->cend
3047  Output Parameter:
3048    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3049    iscol_o - sequential column index set for retrieving mat->B
3050    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3051  */
3052 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3053 {
3054   PetscErrorCode ierr;
3055   Vec            x,cmap;
3056   const PetscInt *is_idx;
3057   PetscScalar    *xarray,*cmaparray;
3058   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3059   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3060   Mat            B=a->B;
3061   Vec            lvec=a->lvec,lcmap;
3062   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3063   MPI_Comm       comm;
3064   VecScatter     Mvctx=a->Mvctx;
3065 
3066   PetscFunctionBegin;
3067   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3068   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3069 
3070   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3071   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3072   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3073   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3074   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3075 
3076   /* Get start indices */
3077   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3078   isstart -= ncols;
3079   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3080 
3081   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3082   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3083   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3084   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3085   for (i=0; i<ncols; i++) {
3086     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3087     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3088     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3089   }
3090   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3091   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3092   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3093 
3094   /* Get iscol_d */
3095   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3096   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3097   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3098 
3099   /* Get isrow_d */
3100   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3101   rstart = mat->rmap->rstart;
3102   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3103   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3104   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3105   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3106 
3107   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3108   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3109   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3110 
3111   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3112   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3113   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3114 
3115   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3116 
3117   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3118   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3119 
3120   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3121   /* off-process column indices */
3122   count = 0;
3123   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3124   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3125 
3126   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3127   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3128   for (i=0; i<Bn; i++) {
3129     if (PetscRealPart(xarray[i]) > -1.0) {
3130       idx[count]     = i;                   /* local column index in off-diagonal part B */
3131       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3132       count++;
3133     }
3134   }
3135   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3136   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3137 
3138   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3139   /* cannot ensure iscol_o has same blocksize as iscol! */
3140 
3141   ierr = PetscFree(idx);CHKERRQ(ierr);
3142   *garray = cmap1;
3143 
3144   ierr = VecDestroy(&x);CHKERRQ(ierr);
3145   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3146   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3147   PetscFunctionReturn(0);
3148 }
3149 
3150 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3151 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3152 {
3153   PetscErrorCode ierr;
3154   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3155   Mat            M = NULL;
3156   MPI_Comm       comm;
3157   IS             iscol_d,isrow_d,iscol_o;
3158   Mat            Asub = NULL,Bsub = NULL;
3159   PetscInt       n;
3160 
3161   PetscFunctionBegin;
3162   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3163 
3164   if (call == MAT_REUSE_MATRIX) {
3165     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3166     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3167     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3168 
3169     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3170     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3171 
3172     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3173     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3174 
3175     /* Update diagonal and off-diagonal portions of submat */
3176     asub = (Mat_MPIAIJ*)(*submat)->data;
3177     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3178     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3179     if (n) {
3180       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3181     }
3182     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3183     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3184 
3185   } else { /* call == MAT_INITIAL_MATRIX) */
3186     const PetscInt *garray;
3187     PetscInt        BsubN;
3188 
3189     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3190     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3191 
3192     /* Create local submatrices Asub and Bsub */
3193     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3194     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3195 
3196     /* Create submatrix M */
3197     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3198 
3199     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3200     asub = (Mat_MPIAIJ*)M->data;
3201 
3202     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3203     n = asub->B->cmap->N;
3204     if (BsubN > n) {
3205       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3206       const PetscInt *idx;
3207       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3208       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3209 
3210       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3211       j = 0;
3212       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3213       for (i=0; i<n; i++) {
3214         if (j >= BsubN) break;
3215         while (subgarray[i] > garray[j]) j++;
3216 
3217         if (subgarray[i] == garray[j]) {
3218           idx_new[i] = idx[j++];
3219         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3220       }
3221       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3222 
3223       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3224       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3225 
3226     } else if (BsubN < n) {
3227       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3228     }
3229 
3230     ierr = PetscFree(garray);CHKERRQ(ierr);
3231     *submat = M;
3232 
3233     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3234     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3235     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3236 
3237     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3238     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3239 
3240     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3241     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3242   }
3243   PetscFunctionReturn(0);
3244 }
3245 
3246 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3247 {
3248   PetscErrorCode ierr;
3249   IS             iscol_local=NULL,isrow_d;
3250   PetscInt       csize;
3251   PetscInt       n,i,j,start,end;
3252   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3253   MPI_Comm       comm;
3254 
3255   PetscFunctionBegin;
3256   /* If isrow has same processor distribution as mat,
3257      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3258   if (call == MAT_REUSE_MATRIX) {
3259     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3260     if (isrow_d) {
3261       sameRowDist  = PETSC_TRUE;
3262       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3263     } else {
3264       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3265       if (iscol_local) {
3266         sameRowDist  = PETSC_TRUE;
3267         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3268       }
3269     }
3270   } else {
3271     /* Check if isrow has same processor distribution as mat */
3272     sameDist[0] = PETSC_FALSE;
3273     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3274     if (!n) {
3275       sameDist[0] = PETSC_TRUE;
3276     } else {
3277       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3278       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3279       if (i >= start && j < end) {
3280         sameDist[0] = PETSC_TRUE;
3281       }
3282     }
3283 
3284     /* Check if iscol has same processor distribution as mat */
3285     sameDist[1] = PETSC_FALSE;
3286     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3287     if (!n) {
3288       sameDist[1] = PETSC_TRUE;
3289     } else {
3290       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3291       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3292       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3293     }
3294 
3295     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3296     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3297     sameRowDist = tsameDist[0];
3298   }
3299 
3300   if (sameRowDist) {
3301     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3302       /* isrow and iscol have same processor distribution as mat */
3303       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3304       PetscFunctionReturn(0);
3305     } else { /* sameRowDist */
3306       /* isrow has same processor distribution as mat */
3307       if (call == MAT_INITIAL_MATRIX) {
3308         PetscBool sorted;
3309         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3310         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3311         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3312         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3313 
3314         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3315         if (sorted) {
3316           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3317           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3318           PetscFunctionReturn(0);
3319         }
3320       } else { /* call == MAT_REUSE_MATRIX */
3321         IS    iscol_sub;
3322         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3323         if (iscol_sub) {
3324           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3325           PetscFunctionReturn(0);
3326         }
3327       }
3328     }
3329   }
3330 
3331   /* General case: iscol -> iscol_local which has global size of iscol */
3332   if (call == MAT_REUSE_MATRIX) {
3333     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3334     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3335   } else {
3336     if (!iscol_local) {
3337       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3338     }
3339   }
3340 
3341   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3342   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3343 
3344   if (call == MAT_INITIAL_MATRIX) {
3345     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3346     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3347   }
3348   PetscFunctionReturn(0);
3349 }
3350 
3351 /*@C
3352      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3353          and "off-diagonal" part of the matrix in CSR format.
3354 
3355    Collective
3356 
3357    Input Parameters:
3358 +  comm - MPI communicator
3359 .  A - "diagonal" portion of matrix
3360 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3361 -  garray - global index of B columns
3362 
3363    Output Parameter:
3364 .   mat - the matrix, with input A as its local diagonal matrix
3365    Level: advanced
3366 
3367    Notes:
3368        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3369        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3370 
3371 .seealso: MatCreateMPIAIJWithSplitArrays()
3372 @*/
3373 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3374 {
3375   PetscErrorCode ierr;
3376   Mat_MPIAIJ     *maij;
3377   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3378   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3379   PetscScalar    *oa=b->a;
3380   Mat            Bnew;
3381   PetscInt       m,n,N;
3382 
3383   PetscFunctionBegin;
3384   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3385   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3386   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3387   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3388   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3389   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3390 
3391   /* Get global columns of mat */
3392   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3393 
3394   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3395   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3396   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3397   maij = (Mat_MPIAIJ*)(*mat)->data;
3398 
3399   (*mat)->preallocated = PETSC_TRUE;
3400 
3401   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3402   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3403 
3404   /* Set A as diagonal portion of *mat */
3405   maij->A = A;
3406 
3407   nz = oi[m];
3408   for (i=0; i<nz; i++) {
3409     col   = oj[i];
3410     oj[i] = garray[col];
3411   }
3412 
3413    /* Set Bnew as off-diagonal portion of *mat */
3414   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3415   bnew        = (Mat_SeqAIJ*)Bnew->data;
3416   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3417   maij->B     = Bnew;
3418 
3419   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3420 
3421   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3422   b->free_a       = PETSC_FALSE;
3423   b->free_ij      = PETSC_FALSE;
3424   ierr = MatDestroy(&B);CHKERRQ(ierr);
3425 
3426   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3427   bnew->free_a       = PETSC_TRUE;
3428   bnew->free_ij      = PETSC_TRUE;
3429 
3430   /* condense columns of maij->B */
3431   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3432   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3433   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3434   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3435   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3436   PetscFunctionReturn(0);
3437 }
3438 
3439 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3440 
3441 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3442 {
3443   PetscErrorCode ierr;
3444   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3445   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3446   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3447   Mat            M,Msub,B=a->B;
3448   MatScalar      *aa;
3449   Mat_SeqAIJ     *aij;
3450   PetscInt       *garray = a->garray,*colsub,Ncols;
3451   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3452   IS             iscol_sub,iscmap;
3453   const PetscInt *is_idx,*cmap;
3454   PetscBool      allcolumns=PETSC_FALSE;
3455   MPI_Comm       comm;
3456 
3457   PetscFunctionBegin;
3458   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3459 
3460   if (call == MAT_REUSE_MATRIX) {
3461     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3462     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3463     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3464 
3465     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3466     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3467 
3468     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3469     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3470 
3471     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3472 
3473   } else { /* call == MAT_INITIAL_MATRIX) */
3474     PetscBool flg;
3475 
3476     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3477     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3478 
3479     /* (1) iscol -> nonscalable iscol_local */
3480     /* Check for special case: each processor gets entire matrix columns */
3481     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3482     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3483     if (allcolumns) {
3484       iscol_sub = iscol_local;
3485       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3486       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3487 
3488     } else {
3489       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3490       PetscInt *idx,*cmap1,k;
3491       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3492       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3493       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3494       count = 0;
3495       k     = 0;
3496       for (i=0; i<Ncols; i++) {
3497         j = is_idx[i];
3498         if (j >= cstart && j < cend) {
3499           /* diagonal part of mat */
3500           idx[count]     = j;
3501           cmap1[count++] = i; /* column index in submat */
3502         } else if (Bn) {
3503           /* off-diagonal part of mat */
3504           if (j == garray[k]) {
3505             idx[count]     = j;
3506             cmap1[count++] = i;  /* column index in submat */
3507           } else if (j > garray[k]) {
3508             while (j > garray[k] && k < Bn-1) k++;
3509             if (j == garray[k]) {
3510               idx[count]     = j;
3511               cmap1[count++] = i; /* column index in submat */
3512             }
3513           }
3514         }
3515       }
3516       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3517 
3518       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3519       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3520       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3521 
3522       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3523     }
3524 
3525     /* (3) Create sequential Msub */
3526     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3527   }
3528 
3529   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3530   aij  = (Mat_SeqAIJ*)(Msub)->data;
3531   ii   = aij->i;
3532   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3533 
3534   /*
3535       m - number of local rows
3536       Ncols - number of columns (same on all processors)
3537       rstart - first row in new global matrix generated
3538   */
3539   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3540 
3541   if (call == MAT_INITIAL_MATRIX) {
3542     /* (4) Create parallel newmat */
3543     PetscMPIInt    rank,size;
3544     PetscInt       csize;
3545 
3546     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3547     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3548 
3549     /*
3550         Determine the number of non-zeros in the diagonal and off-diagonal
3551         portions of the matrix in order to do correct preallocation
3552     */
3553 
3554     /* first get start and end of "diagonal" columns */
3555     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3556     if (csize == PETSC_DECIDE) {
3557       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3558       if (mglobal == Ncols) { /* square matrix */
3559         nlocal = m;
3560       } else {
3561         nlocal = Ncols/size + ((Ncols % size) > rank);
3562       }
3563     } else {
3564       nlocal = csize;
3565     }
3566     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3567     rstart = rend - nlocal;
3568     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3569 
3570     /* next, compute all the lengths */
3571     jj    = aij->j;
3572     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3573     olens = dlens + m;
3574     for (i=0; i<m; i++) {
3575       jend = ii[i+1] - ii[i];
3576       olen = 0;
3577       dlen = 0;
3578       for (j=0; j<jend; j++) {
3579         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3580         else dlen++;
3581         jj++;
3582       }
3583       olens[i] = olen;
3584       dlens[i] = dlen;
3585     }
3586 
3587     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3588     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3589 
3590     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3591     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3592     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3593     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3594     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3595     ierr = PetscFree(dlens);CHKERRQ(ierr);
3596 
3597   } else { /* call == MAT_REUSE_MATRIX */
3598     M    = *newmat;
3599     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3600     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3601     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3602     /*
3603          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3604        rather than the slower MatSetValues().
3605     */
3606     M->was_assembled = PETSC_TRUE;
3607     M->assembled     = PETSC_FALSE;
3608   }
3609 
3610   /* (5) Set values of Msub to *newmat */
3611   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3612   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3613 
3614   jj   = aij->j;
3615   aa   = aij->a;
3616   for (i=0; i<m; i++) {
3617     row = rstart + i;
3618     nz  = ii[i+1] - ii[i];
3619     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3620     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3621     jj += nz; aa += nz;
3622   }
3623   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3624 
3625   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3626   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3627 
3628   ierr = PetscFree(colsub);CHKERRQ(ierr);
3629 
3630   /* save Msub, iscol_sub and iscmap used in processor for next request */
3631   if (call ==  MAT_INITIAL_MATRIX) {
3632     *newmat = M;
3633     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3634     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3635 
3636     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3637     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3638 
3639     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3640     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3641 
3642     if (iscol_local) {
3643       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3644       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3645     }
3646   }
3647   PetscFunctionReturn(0);
3648 }
3649 
3650 /*
3651     Not great since it makes two copies of the submatrix, first an SeqAIJ
3652   in local and then by concatenating the local matrices the end result.
3653   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3654 
3655   Note: This requires a sequential iscol with all indices.
3656 */
3657 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3658 {
3659   PetscErrorCode ierr;
3660   PetscMPIInt    rank,size;
3661   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3662   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3663   Mat            M,Mreuse;
3664   MatScalar      *aa,*vwork;
3665   MPI_Comm       comm;
3666   Mat_SeqAIJ     *aij;
3667   PetscBool      colflag,allcolumns=PETSC_FALSE;
3668 
3669   PetscFunctionBegin;
3670   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3671   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3672   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3673 
3674   /* Check for special case: each processor gets entire matrix columns */
3675   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3676   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3677   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3678 
3679   if (call ==  MAT_REUSE_MATRIX) {
3680     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3681     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3682     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3683   } else {
3684     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3685   }
3686 
3687   /*
3688       m - number of local rows
3689       n - number of columns (same on all processors)
3690       rstart - first row in new global matrix generated
3691   */
3692   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3693   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3694   if (call == MAT_INITIAL_MATRIX) {
3695     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3696     ii  = aij->i;
3697     jj  = aij->j;
3698 
3699     /*
3700         Determine the number of non-zeros in the diagonal and off-diagonal
3701         portions of the matrix in order to do correct preallocation
3702     */
3703 
3704     /* first get start and end of "diagonal" columns */
3705     if (csize == PETSC_DECIDE) {
3706       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3707       if (mglobal == n) { /* square matrix */
3708         nlocal = m;
3709       } else {
3710         nlocal = n/size + ((n % size) > rank);
3711       }
3712     } else {
3713       nlocal = csize;
3714     }
3715     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3716     rstart = rend - nlocal;
3717     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3718 
3719     /* next, compute all the lengths */
3720     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3721     olens = dlens + m;
3722     for (i=0; i<m; i++) {
3723       jend = ii[i+1] - ii[i];
3724       olen = 0;
3725       dlen = 0;
3726       for (j=0; j<jend; j++) {
3727         if (*jj < rstart || *jj >= rend) olen++;
3728         else dlen++;
3729         jj++;
3730       }
3731       olens[i] = olen;
3732       dlens[i] = dlen;
3733     }
3734     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3735     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3736     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3737     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3738     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3739     ierr = PetscFree(dlens);CHKERRQ(ierr);
3740   } else {
3741     PetscInt ml,nl;
3742 
3743     M    = *newmat;
3744     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3745     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3746     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3747     /*
3748          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3749        rather than the slower MatSetValues().
3750     */
3751     M->was_assembled = PETSC_TRUE;
3752     M->assembled     = PETSC_FALSE;
3753   }
3754   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3755   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3756   ii   = aij->i;
3757   jj   = aij->j;
3758   aa   = aij->a;
3759   for (i=0; i<m; i++) {
3760     row   = rstart + i;
3761     nz    = ii[i+1] - ii[i];
3762     cwork = jj;     jj += nz;
3763     vwork = aa;     aa += nz;
3764     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3765   }
3766 
3767   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3768   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3769   *newmat = M;
3770 
3771   /* save submatrix used in processor for next request */
3772   if (call ==  MAT_INITIAL_MATRIX) {
3773     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3774     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3775   }
3776   PetscFunctionReturn(0);
3777 }
3778 
3779 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3780 {
3781   PetscInt       m,cstart, cend,j,nnz,i,d;
3782   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3783   const PetscInt *JJ;
3784   PetscErrorCode ierr;
3785   PetscBool      nooffprocentries;
3786 
3787   PetscFunctionBegin;
3788   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3789 
3790   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3791   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3792   m      = B->rmap->n;
3793   cstart = B->cmap->rstart;
3794   cend   = B->cmap->rend;
3795   rstart = B->rmap->rstart;
3796 
3797   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3798 
3799 #if defined(PETSC_USE_DEBUG)
3800   for (i=0; i<m; i++) {
3801     nnz = Ii[i+1]- Ii[i];
3802     JJ  = J + Ii[i];
3803     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3804     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3805     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3806   }
3807 #endif
3808 
3809   for (i=0; i<m; i++) {
3810     nnz     = Ii[i+1]- Ii[i];
3811     JJ      = J + Ii[i];
3812     nnz_max = PetscMax(nnz_max,nnz);
3813     d       = 0;
3814     for (j=0; j<nnz; j++) {
3815       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3816     }
3817     d_nnz[i] = d;
3818     o_nnz[i] = nnz - d;
3819   }
3820   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3821   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3822 
3823   for (i=0; i<m; i++) {
3824     ii   = i + rstart;
3825     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3826   }
3827   nooffprocentries    = B->nooffprocentries;
3828   B->nooffprocentries = PETSC_TRUE;
3829   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3830   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3831   B->nooffprocentries = nooffprocentries;
3832 
3833   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3834   PetscFunctionReturn(0);
3835 }
3836 
3837 /*@
3838    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3839    (the default parallel PETSc format).
3840 
3841    Collective
3842 
3843    Input Parameters:
3844 +  B - the matrix
3845 .  i - the indices into j for the start of each local row (starts with zero)
3846 .  j - the column indices for each local row (starts with zero)
3847 -  v - optional values in the matrix
3848 
3849    Level: developer
3850 
3851    Notes:
3852        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3853      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3854      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3855 
3856        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3857 
3858        The format which is used for the sparse matrix input, is equivalent to a
3859     row-major ordering.. i.e for the following matrix, the input data expected is
3860     as shown
3861 
3862 $        1 0 0
3863 $        2 0 3     P0
3864 $       -------
3865 $        4 5 6     P1
3866 $
3867 $     Process0 [P0]: rows_owned=[0,1]
3868 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3869 $        j =  {0,0,2}  [size = 3]
3870 $        v =  {1,2,3}  [size = 3]
3871 $
3872 $     Process1 [P1]: rows_owned=[2]
3873 $        i =  {0,3}    [size = nrow+1  = 1+1]
3874 $        j =  {0,1,2}  [size = 3]
3875 $        v =  {4,5,6}  [size = 3]
3876 
3877 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3878           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3879 @*/
3880 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3881 {
3882   PetscErrorCode ierr;
3883 
3884   PetscFunctionBegin;
3885   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3886   PetscFunctionReturn(0);
3887 }
3888 
3889 /*@C
3890    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3891    (the default parallel PETSc format).  For good matrix assembly performance
3892    the user should preallocate the matrix storage by setting the parameters
3893    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3894    performance can be increased by more than a factor of 50.
3895 
3896    Collective
3897 
3898    Input Parameters:
3899 +  B - the matrix
3900 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3901            (same value is used for all local rows)
3902 .  d_nnz - array containing the number of nonzeros in the various rows of the
3903            DIAGONAL portion of the local submatrix (possibly different for each row)
3904            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3905            The size of this array is equal to the number of local rows, i.e 'm'.
3906            For matrices that will be factored, you must leave room for (and set)
3907            the diagonal entry even if it is zero.
3908 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3909            submatrix (same value is used for all local rows).
3910 -  o_nnz - array containing the number of nonzeros in the various rows of the
3911            OFF-DIAGONAL portion of the local submatrix (possibly different for
3912            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3913            structure. The size of this array is equal to the number
3914            of local rows, i.e 'm'.
3915 
3916    If the *_nnz parameter is given then the *_nz parameter is ignored
3917 
3918    The AIJ format (also called the Yale sparse matrix format or
3919    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3920    storage.  The stored row and column indices begin with zero.
3921    See Users-Manual: ch_mat for details.
3922 
3923    The parallel matrix is partitioned such that the first m0 rows belong to
3924    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3925    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3926 
3927    The DIAGONAL portion of the local submatrix of a processor can be defined
3928    as the submatrix which is obtained by extraction the part corresponding to
3929    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3930    first row that belongs to the processor, r2 is the last row belonging to
3931    the this processor, and c1-c2 is range of indices of the local part of a
3932    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3933    common case of a square matrix, the row and column ranges are the same and
3934    the DIAGONAL part is also square. The remaining portion of the local
3935    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3936 
3937    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3938 
3939    You can call MatGetInfo() to get information on how effective the preallocation was;
3940    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3941    You can also run with the option -info and look for messages with the string
3942    malloc in them to see if additional memory allocation was needed.
3943 
3944    Example usage:
3945 
3946    Consider the following 8x8 matrix with 34 non-zero values, that is
3947    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3948    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3949    as follows:
3950 
3951 .vb
3952             1  2  0  |  0  3  0  |  0  4
3953     Proc0   0  5  6  |  7  0  0  |  8  0
3954             9  0 10  | 11  0  0  | 12  0
3955     -------------------------------------
3956            13  0 14  | 15 16 17  |  0  0
3957     Proc1   0 18  0  | 19 20 21  |  0  0
3958             0  0  0  | 22 23  0  | 24  0
3959     -------------------------------------
3960     Proc2  25 26 27  |  0  0 28  | 29  0
3961            30  0  0  | 31 32 33  |  0 34
3962 .ve
3963 
3964    This can be represented as a collection of submatrices as:
3965 
3966 .vb
3967       A B C
3968       D E F
3969       G H I
3970 .ve
3971 
3972    Where the submatrices A,B,C are owned by proc0, D,E,F are
3973    owned by proc1, G,H,I are owned by proc2.
3974 
3975    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3976    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3977    The 'M','N' parameters are 8,8, and have the same values on all procs.
3978 
3979    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3980    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3981    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3982    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3983    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3984    matrix, ans [DF] as another SeqAIJ matrix.
3985 
3986    When d_nz, o_nz parameters are specified, d_nz storage elements are
3987    allocated for every row of the local diagonal submatrix, and o_nz
3988    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3989    One way to choose d_nz and o_nz is to use the max nonzerors per local
3990    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3991    In this case, the values of d_nz,o_nz are:
3992 .vb
3993      proc0 : dnz = 2, o_nz = 2
3994      proc1 : dnz = 3, o_nz = 2
3995      proc2 : dnz = 1, o_nz = 4
3996 .ve
3997    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3998    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3999    for proc3. i.e we are using 12+15+10=37 storage locations to store
4000    34 values.
4001 
4002    When d_nnz, o_nnz parameters are specified, the storage is specified
4003    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4004    In the above case the values for d_nnz,o_nnz are:
4005 .vb
4006      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4007      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4008      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4009 .ve
4010    Here the space allocated is sum of all the above values i.e 34, and
4011    hence pre-allocation is perfect.
4012 
4013    Level: intermediate
4014 
4015 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4016           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4017 @*/
4018 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4019 {
4020   PetscErrorCode ierr;
4021 
4022   PetscFunctionBegin;
4023   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4024   PetscValidType(B,1);
4025   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4026   PetscFunctionReturn(0);
4027 }
4028 
4029 /*@
4030      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4031          CSR format for the local rows.
4032 
4033    Collective
4034 
4035    Input Parameters:
4036 +  comm - MPI communicator
4037 .  m - number of local rows (Cannot be PETSC_DECIDE)
4038 .  n - This value should be the same as the local size used in creating the
4039        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4040        calculated if N is given) For square matrices n is almost always m.
4041 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4042 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4043 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4044 .   j - column indices
4045 -   a - matrix values
4046 
4047    Output Parameter:
4048 .   mat - the matrix
4049 
4050    Level: intermediate
4051 
4052    Notes:
4053        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4054      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4055      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4056 
4057        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4058 
4059        The format which is used for the sparse matrix input, is equivalent to a
4060     row-major ordering.. i.e for the following matrix, the input data expected is
4061     as shown
4062 
4063        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4064 
4065 $        1 0 0
4066 $        2 0 3     P0
4067 $       -------
4068 $        4 5 6     P1
4069 $
4070 $     Process0 [P0]: rows_owned=[0,1]
4071 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4072 $        j =  {0,0,2}  [size = 3]
4073 $        v =  {1,2,3}  [size = 3]
4074 $
4075 $     Process1 [P1]: rows_owned=[2]
4076 $        i =  {0,3}    [size = nrow+1  = 1+1]
4077 $        j =  {0,1,2}  [size = 3]
4078 $        v =  {4,5,6}  [size = 3]
4079 
4080 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4081           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4082 @*/
4083 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4084 {
4085   PetscErrorCode ierr;
4086 
4087   PetscFunctionBegin;
4088   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4089   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4090   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4091   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4092   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4093   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4094   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4095   PetscFunctionReturn(0);
4096 }
4097 
4098 /*@
4099      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4100          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4101 
4102    Collective
4103 
4104    Input Parameters:
4105 +  mat - the matrix
4106 .  m - number of local rows (Cannot be PETSC_DECIDE)
4107 .  n - This value should be the same as the local size used in creating the
4108        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4109        calculated if N is given) For square matrices n is almost always m.
4110 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4111 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4112 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4113 .  J - column indices
4114 -  v - matrix values
4115 
4116    Level: intermediate
4117 
4118 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4119           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4120 @*/
4121 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4122 {
4123   PetscErrorCode ierr;
4124   PetscInt       cstart,nnz,i,j;
4125   PetscInt       *ld;
4126   PetscBool      nooffprocentries;
4127   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4128   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4129   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4130   const PetscInt *Adi = Ad->i;
4131   PetscInt       ldi,Iii,md;
4132 
4133   PetscFunctionBegin;
4134   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4135   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4136   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4137   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4138 
4139   cstart = mat->cmap->rstart;
4140   if (!Aij->ld) {
4141     /* count number of entries below block diagonal */
4142     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4143     Aij->ld = ld;
4144     for (i=0; i<m; i++) {
4145       nnz  = Ii[i+1]- Ii[i];
4146       j     = 0;
4147       while  (J[j] < cstart && j < nnz) {j++;}
4148       J    += nnz;
4149       ld[i] = j;
4150     }
4151   } else {
4152     ld = Aij->ld;
4153   }
4154 
4155   for (i=0; i<m; i++) {
4156     nnz  = Ii[i+1]- Ii[i];
4157     Iii  = Ii[i];
4158     ldi  = ld[i];
4159     md   = Adi[i+1]-Adi[i];
4160     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4161     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4162     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4163     ad  += md;
4164     ao  += nnz - md;
4165   }
4166   nooffprocentries      = mat->nooffprocentries;
4167   mat->nooffprocentries = PETSC_TRUE;
4168   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4169   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4170   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4171   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4172   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4173   mat->nooffprocentries = nooffprocentries;
4174   PetscFunctionReturn(0);
4175 }
4176 
4177 /*@C
4178    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4179    (the default parallel PETSc format).  For good matrix assembly performance
4180    the user should preallocate the matrix storage by setting the parameters
4181    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4182    performance can be increased by more than a factor of 50.
4183 
4184    Collective
4185 
4186    Input Parameters:
4187 +  comm - MPI communicator
4188 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4189            This value should be the same as the local size used in creating the
4190            y vector for the matrix-vector product y = Ax.
4191 .  n - This value should be the same as the local size used in creating the
4192        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4193        calculated if N is given) For square matrices n is almost always m.
4194 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4195 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4196 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4197            (same value is used for all local rows)
4198 .  d_nnz - array containing the number of nonzeros in the various rows of the
4199            DIAGONAL portion of the local submatrix (possibly different for each row)
4200            or NULL, if d_nz is used to specify the nonzero structure.
4201            The size of this array is equal to the number of local rows, i.e 'm'.
4202 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4203            submatrix (same value is used for all local rows).
4204 -  o_nnz - array containing the number of nonzeros in the various rows of the
4205            OFF-DIAGONAL portion of the local submatrix (possibly different for
4206            each row) or NULL, if o_nz is used to specify the nonzero
4207            structure. The size of this array is equal to the number
4208            of local rows, i.e 'm'.
4209 
4210    Output Parameter:
4211 .  A - the matrix
4212 
4213    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4214    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4215    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4216 
4217    Notes:
4218    If the *_nnz parameter is given then the *_nz parameter is ignored
4219 
4220    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4221    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4222    storage requirements for this matrix.
4223 
4224    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4225    processor than it must be used on all processors that share the object for
4226    that argument.
4227 
4228    The user MUST specify either the local or global matrix dimensions
4229    (possibly both).
4230 
4231    The parallel matrix is partitioned across processors such that the
4232    first m0 rows belong to process 0, the next m1 rows belong to
4233    process 1, the next m2 rows belong to process 2 etc.. where
4234    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4235    values corresponding to [m x N] submatrix.
4236 
4237    The columns are logically partitioned with the n0 columns belonging
4238    to 0th partition, the next n1 columns belonging to the next
4239    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4240 
4241    The DIAGONAL portion of the local submatrix on any given processor
4242    is the submatrix corresponding to the rows and columns m,n
4243    corresponding to the given processor. i.e diagonal matrix on
4244    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4245    etc. The remaining portion of the local submatrix [m x (N-n)]
4246    constitute the OFF-DIAGONAL portion. The example below better
4247    illustrates this concept.
4248 
4249    For a square global matrix we define each processor's diagonal portion
4250    to be its local rows and the corresponding columns (a square submatrix);
4251    each processor's off-diagonal portion encompasses the remainder of the
4252    local matrix (a rectangular submatrix).
4253 
4254    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4255 
4256    When calling this routine with a single process communicator, a matrix of
4257    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4258    type of communicator, use the construction mechanism
4259 .vb
4260      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4261 .ve
4262 
4263 $     MatCreate(...,&A);
4264 $     MatSetType(A,MATMPIAIJ);
4265 $     MatSetSizes(A, m,n,M,N);
4266 $     MatMPIAIJSetPreallocation(A,...);
4267 
4268    By default, this format uses inodes (identical nodes) when possible.
4269    We search for consecutive rows with the same nonzero structure, thereby
4270    reusing matrix information to achieve increased efficiency.
4271 
4272    Options Database Keys:
4273 +  -mat_no_inode  - Do not use inodes
4274 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4275 
4276 
4277 
4278    Example usage:
4279 
4280    Consider the following 8x8 matrix with 34 non-zero values, that is
4281    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4282    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4283    as follows
4284 
4285 .vb
4286             1  2  0  |  0  3  0  |  0  4
4287     Proc0   0  5  6  |  7  0  0  |  8  0
4288             9  0 10  | 11  0  0  | 12  0
4289     -------------------------------------
4290            13  0 14  | 15 16 17  |  0  0
4291     Proc1   0 18  0  | 19 20 21  |  0  0
4292             0  0  0  | 22 23  0  | 24  0
4293     -------------------------------------
4294     Proc2  25 26 27  |  0  0 28  | 29  0
4295            30  0  0  | 31 32 33  |  0 34
4296 .ve
4297 
4298    This can be represented as a collection of submatrices as
4299 
4300 .vb
4301       A B C
4302       D E F
4303       G H I
4304 .ve
4305 
4306    Where the submatrices A,B,C are owned by proc0, D,E,F are
4307    owned by proc1, G,H,I are owned by proc2.
4308 
4309    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4310    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4311    The 'M','N' parameters are 8,8, and have the same values on all procs.
4312 
4313    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4314    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4315    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4316    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4317    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4318    matrix, ans [DF] as another SeqAIJ matrix.
4319 
4320    When d_nz, o_nz parameters are specified, d_nz storage elements are
4321    allocated for every row of the local diagonal submatrix, and o_nz
4322    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4323    One way to choose d_nz and o_nz is to use the max nonzerors per local
4324    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4325    In this case, the values of d_nz,o_nz are
4326 .vb
4327      proc0 : dnz = 2, o_nz = 2
4328      proc1 : dnz = 3, o_nz = 2
4329      proc2 : dnz = 1, o_nz = 4
4330 .ve
4331    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4332    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4333    for proc3. i.e we are using 12+15+10=37 storage locations to store
4334    34 values.
4335 
4336    When d_nnz, o_nnz parameters are specified, the storage is specified
4337    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4338    In the above case the values for d_nnz,o_nnz are
4339 .vb
4340      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4341      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4342      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4343 .ve
4344    Here the space allocated is sum of all the above values i.e 34, and
4345    hence pre-allocation is perfect.
4346 
4347    Level: intermediate
4348 
4349 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4350           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4351 @*/
4352 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4353 {
4354   PetscErrorCode ierr;
4355   PetscMPIInt    size;
4356 
4357   PetscFunctionBegin;
4358   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4359   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4360   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4361   if (size > 1) {
4362     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4363     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4364   } else {
4365     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4366     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4367   }
4368   PetscFunctionReturn(0);
4369 }
4370 
4371 /*@C
4372   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4373 
4374   Not collective
4375 
4376   Input Parameter:
4377 . A - The MPIAIJ matrix
4378 
4379   Output Parameters:
4380 + Ad - The local diagonal block as a SeqAIJ matrix
4381 . Ao - The local off-diagonal block as a SeqAIJ matrix
4382 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4383 
4384   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4385   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4386   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4387   local column numbers to global column numbers in the original matrix.
4388 
4389   Level: intermediate
4390 
4391 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAJ, MATSEQAIJ
4392 @*/
4393 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4394 {
4395   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4396   PetscBool      flg;
4397   PetscErrorCode ierr;
4398 
4399   PetscFunctionBegin;
4400   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4401   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4402   if (Ad)     *Ad     = a->A;
4403   if (Ao)     *Ao     = a->B;
4404   if (colmap) *colmap = a->garray;
4405   PetscFunctionReturn(0);
4406 }
4407 
4408 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4409 {
4410   PetscErrorCode ierr;
4411   PetscInt       m,N,i,rstart,nnz,Ii;
4412   PetscInt       *indx;
4413   PetscScalar    *values;
4414 
4415   PetscFunctionBegin;
4416   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4417   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4418     PetscInt       *dnz,*onz,sum,bs,cbs;
4419 
4420     if (n == PETSC_DECIDE) {
4421       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4422     }
4423     /* Check sum(n) = N */
4424     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4425     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4426 
4427     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4428     rstart -= m;
4429 
4430     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4431     for (i=0; i<m; i++) {
4432       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4433       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4434       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4435     }
4436 
4437     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4438     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4439     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4440     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4441     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4442     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4443     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4444     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4445   }
4446 
4447   /* numeric phase */
4448   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4449   for (i=0; i<m; i++) {
4450     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4451     Ii   = i + rstart;
4452     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4453     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4454   }
4455   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4456   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4457   PetscFunctionReturn(0);
4458 }
4459 
4460 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4461 {
4462   PetscErrorCode    ierr;
4463   PetscMPIInt       rank;
4464   PetscInt          m,N,i,rstart,nnz;
4465   size_t            len;
4466   const PetscInt    *indx;
4467   PetscViewer       out;
4468   char              *name;
4469   Mat               B;
4470   const PetscScalar *values;
4471 
4472   PetscFunctionBegin;
4473   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4474   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4475   /* Should this be the type of the diagonal block of A? */
4476   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4477   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4478   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4479   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4480   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4481   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4482   for (i=0; i<m; i++) {
4483     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4484     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4485     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4486   }
4487   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4488   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4489 
4490   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4491   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4492   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4493   sprintf(name,"%s.%d",outfile,rank);
4494   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4495   ierr = PetscFree(name);CHKERRQ(ierr);
4496   ierr = MatView(B,out);CHKERRQ(ierr);
4497   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4498   ierr = MatDestroy(&B);CHKERRQ(ierr);
4499   PetscFunctionReturn(0);
4500 }
4501 
4502 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4503 {
4504   PetscErrorCode      ierr;
4505   Mat_Merge_SeqsToMPI *merge;
4506   PetscContainer      container;
4507 
4508   PetscFunctionBegin;
4509   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4510   if (container) {
4511     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4512     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4513     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4514     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4515     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4516     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4517     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4518     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4519     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4520     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4521     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4522     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4523     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4524     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4525     ierr = PetscFree(merge);CHKERRQ(ierr);
4526     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4527   }
4528   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4529   PetscFunctionReturn(0);
4530 }
4531 
4532 #include <../src/mat/utils/freespace.h>
4533 #include <petscbt.h>
4534 
4535 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4536 {
4537   PetscErrorCode      ierr;
4538   MPI_Comm            comm;
4539   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4540   PetscMPIInt         size,rank,taga,*len_s;
4541   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4542   PetscInt            proc,m;
4543   PetscInt            **buf_ri,**buf_rj;
4544   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4545   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4546   MPI_Request         *s_waits,*r_waits;
4547   MPI_Status          *status;
4548   MatScalar           *aa=a->a;
4549   MatScalar           **abuf_r,*ba_i;
4550   Mat_Merge_SeqsToMPI *merge;
4551   PetscContainer      container;
4552 
4553   PetscFunctionBegin;
4554   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4555   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4556 
4557   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4558   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4559 
4560   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4561   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4562 
4563   bi     = merge->bi;
4564   bj     = merge->bj;
4565   buf_ri = merge->buf_ri;
4566   buf_rj = merge->buf_rj;
4567 
4568   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4569   owners = merge->rowmap->range;
4570   len_s  = merge->len_s;
4571 
4572   /* send and recv matrix values */
4573   /*-----------------------------*/
4574   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4575   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4576 
4577   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4578   for (proc=0,k=0; proc<size; proc++) {
4579     if (!len_s[proc]) continue;
4580     i    = owners[proc];
4581     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4582     k++;
4583   }
4584 
4585   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4586   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4587   ierr = PetscFree(status);CHKERRQ(ierr);
4588 
4589   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4590   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4591 
4592   /* insert mat values of mpimat */
4593   /*----------------------------*/
4594   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4595   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4596 
4597   for (k=0; k<merge->nrecv; k++) {
4598     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4599     nrows       = *(buf_ri_k[k]);
4600     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4601     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4602   }
4603 
4604   /* set values of ba */
4605   m = merge->rowmap->n;
4606   for (i=0; i<m; i++) {
4607     arow = owners[rank] + i;
4608     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4609     bnzi = bi[i+1] - bi[i];
4610     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4611 
4612     /* add local non-zero vals of this proc's seqmat into ba */
4613     anzi   = ai[arow+1] - ai[arow];
4614     aj     = a->j + ai[arow];
4615     aa     = a->a + ai[arow];
4616     nextaj = 0;
4617     for (j=0; nextaj<anzi; j++) {
4618       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4619         ba_i[j] += aa[nextaj++];
4620       }
4621     }
4622 
4623     /* add received vals into ba */
4624     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4625       /* i-th row */
4626       if (i == *nextrow[k]) {
4627         anzi   = *(nextai[k]+1) - *nextai[k];
4628         aj     = buf_rj[k] + *(nextai[k]);
4629         aa     = abuf_r[k] + *(nextai[k]);
4630         nextaj = 0;
4631         for (j=0; nextaj<anzi; j++) {
4632           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4633             ba_i[j] += aa[nextaj++];
4634           }
4635         }
4636         nextrow[k]++; nextai[k]++;
4637       }
4638     }
4639     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4640   }
4641   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4642   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4643 
4644   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4645   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4646   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4647   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4648   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4649   PetscFunctionReturn(0);
4650 }
4651 
4652 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4653 {
4654   PetscErrorCode      ierr;
4655   Mat                 B_mpi;
4656   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4657   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4658   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4659   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4660   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4661   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4662   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4663   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4664   MPI_Status          *status;
4665   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4666   PetscBT             lnkbt;
4667   Mat_Merge_SeqsToMPI *merge;
4668   PetscContainer      container;
4669 
4670   PetscFunctionBegin;
4671   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4672 
4673   /* make sure it is a PETSc comm */
4674   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4675   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4676   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4677 
4678   ierr = PetscNew(&merge);CHKERRQ(ierr);
4679   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4680 
4681   /* determine row ownership */
4682   /*---------------------------------------------------------*/
4683   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4684   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4685   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4686   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4687   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4688   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4689   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4690 
4691   m      = merge->rowmap->n;
4692   owners = merge->rowmap->range;
4693 
4694   /* determine the number of messages to send, their lengths */
4695   /*---------------------------------------------------------*/
4696   len_s = merge->len_s;
4697 
4698   len          = 0; /* length of buf_si[] */
4699   merge->nsend = 0;
4700   for (proc=0; proc<size; proc++) {
4701     len_si[proc] = 0;
4702     if (proc == rank) {
4703       len_s[proc] = 0;
4704     } else {
4705       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4706       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4707     }
4708     if (len_s[proc]) {
4709       merge->nsend++;
4710       nrows = 0;
4711       for (i=owners[proc]; i<owners[proc+1]; i++) {
4712         if (ai[i+1] > ai[i]) nrows++;
4713       }
4714       len_si[proc] = 2*(nrows+1);
4715       len         += len_si[proc];
4716     }
4717   }
4718 
4719   /* determine the number and length of messages to receive for ij-structure */
4720   /*-------------------------------------------------------------------------*/
4721   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4722   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4723 
4724   /* post the Irecv of j-structure */
4725   /*-------------------------------*/
4726   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4727   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4728 
4729   /* post the Isend of j-structure */
4730   /*--------------------------------*/
4731   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4732 
4733   for (proc=0, k=0; proc<size; proc++) {
4734     if (!len_s[proc]) continue;
4735     i    = owners[proc];
4736     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4737     k++;
4738   }
4739 
4740   /* receives and sends of j-structure are complete */
4741   /*------------------------------------------------*/
4742   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4743   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4744 
4745   /* send and recv i-structure */
4746   /*---------------------------*/
4747   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4748   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4749 
4750   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4751   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4752   for (proc=0,k=0; proc<size; proc++) {
4753     if (!len_s[proc]) continue;
4754     /* form outgoing message for i-structure:
4755          buf_si[0]:                 nrows to be sent
4756                [1:nrows]:           row index (global)
4757                [nrows+1:2*nrows+1]: i-structure index
4758     */
4759     /*-------------------------------------------*/
4760     nrows       = len_si[proc]/2 - 1;
4761     buf_si_i    = buf_si + nrows+1;
4762     buf_si[0]   = nrows;
4763     buf_si_i[0] = 0;
4764     nrows       = 0;
4765     for (i=owners[proc]; i<owners[proc+1]; i++) {
4766       anzi = ai[i+1] - ai[i];
4767       if (anzi) {
4768         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4769         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4770         nrows++;
4771       }
4772     }
4773     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4774     k++;
4775     buf_si += len_si[proc];
4776   }
4777 
4778   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4779   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4780 
4781   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4782   for (i=0; i<merge->nrecv; i++) {
4783     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4784   }
4785 
4786   ierr = PetscFree(len_si);CHKERRQ(ierr);
4787   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4788   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4789   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4790   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4791   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4792   ierr = PetscFree(status);CHKERRQ(ierr);
4793 
4794   /* compute a local seq matrix in each processor */
4795   /*----------------------------------------------*/
4796   /* allocate bi array and free space for accumulating nonzero column info */
4797   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4798   bi[0] = 0;
4799 
4800   /* create and initialize a linked list */
4801   nlnk = N+1;
4802   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4803 
4804   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4805   len  = ai[owners[rank+1]] - ai[owners[rank]];
4806   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4807 
4808   current_space = free_space;
4809 
4810   /* determine symbolic info for each local row */
4811   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4812 
4813   for (k=0; k<merge->nrecv; k++) {
4814     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4815     nrows       = *buf_ri_k[k];
4816     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4817     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4818   }
4819 
4820   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4821   len  = 0;
4822   for (i=0; i<m; i++) {
4823     bnzi = 0;
4824     /* add local non-zero cols of this proc's seqmat into lnk */
4825     arow  = owners[rank] + i;
4826     anzi  = ai[arow+1] - ai[arow];
4827     aj    = a->j + ai[arow];
4828     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4829     bnzi += nlnk;
4830     /* add received col data into lnk */
4831     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4832       if (i == *nextrow[k]) { /* i-th row */
4833         anzi  = *(nextai[k]+1) - *nextai[k];
4834         aj    = buf_rj[k] + *nextai[k];
4835         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4836         bnzi += nlnk;
4837         nextrow[k]++; nextai[k]++;
4838       }
4839     }
4840     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4841 
4842     /* if free space is not available, make more free space */
4843     if (current_space->local_remaining<bnzi) {
4844       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4845       nspacedouble++;
4846     }
4847     /* copy data into free space, then initialize lnk */
4848     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4849     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4850 
4851     current_space->array           += bnzi;
4852     current_space->local_used      += bnzi;
4853     current_space->local_remaining -= bnzi;
4854 
4855     bi[i+1] = bi[i] + bnzi;
4856   }
4857 
4858   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4859 
4860   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4861   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4862   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4863 
4864   /* create symbolic parallel matrix B_mpi */
4865   /*---------------------------------------*/
4866   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4867   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4868   if (n==PETSC_DECIDE) {
4869     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4870   } else {
4871     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4872   }
4873   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4874   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4875   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4876   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4877   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4878 
4879   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4880   B_mpi->assembled    = PETSC_FALSE;
4881   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4882   merge->bi           = bi;
4883   merge->bj           = bj;
4884   merge->buf_ri       = buf_ri;
4885   merge->buf_rj       = buf_rj;
4886   merge->coi          = NULL;
4887   merge->coj          = NULL;
4888   merge->owners_co    = NULL;
4889 
4890   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4891 
4892   /* attach the supporting struct to B_mpi for reuse */
4893   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4894   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4895   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4896   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4897   *mpimat = B_mpi;
4898 
4899   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4900   PetscFunctionReturn(0);
4901 }
4902 
4903 /*@C
4904       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4905                  matrices from each processor
4906 
4907     Collective
4908 
4909    Input Parameters:
4910 +    comm - the communicators the parallel matrix will live on
4911 .    seqmat - the input sequential matrices
4912 .    m - number of local rows (or PETSC_DECIDE)
4913 .    n - number of local columns (or PETSC_DECIDE)
4914 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4915 
4916    Output Parameter:
4917 .    mpimat - the parallel matrix generated
4918 
4919     Level: advanced
4920 
4921    Notes:
4922      The dimensions of the sequential matrix in each processor MUST be the same.
4923      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4924      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4925 @*/
4926 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4927 {
4928   PetscErrorCode ierr;
4929   PetscMPIInt    size;
4930 
4931   PetscFunctionBegin;
4932   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4933   if (size == 1) {
4934     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4935     if (scall == MAT_INITIAL_MATRIX) {
4936       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4937     } else {
4938       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4939     }
4940     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4941     PetscFunctionReturn(0);
4942   }
4943   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4944   if (scall == MAT_INITIAL_MATRIX) {
4945     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4946   }
4947   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4948   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4949   PetscFunctionReturn(0);
4950 }
4951 
4952 /*@
4953      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4954           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4955           with MatGetSize()
4956 
4957     Not Collective
4958 
4959    Input Parameters:
4960 +    A - the matrix
4961 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4962 
4963    Output Parameter:
4964 .    A_loc - the local sequential matrix generated
4965 
4966     Level: developer
4967 
4968    Notes:
4969      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
4970      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
4971      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
4972      modify the values of the returned A_loc.
4973 
4974 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
4975 
4976 @*/
4977 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4978 {
4979   PetscErrorCode ierr;
4980   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4981   Mat_SeqAIJ     *mat,*a,*b;
4982   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4983   MatScalar      *aa,*ba,*cam;
4984   PetscScalar    *ca;
4985   PetscMPIInt    size;
4986   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4987   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4988   PetscBool      match;
4989 
4990   PetscFunctionBegin;
4991   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
4992   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4993   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
4994   if (size == 1) {
4995     if (scall == MAT_INITIAL_MATRIX) {
4996       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
4997       *A_loc = mpimat->A;
4998     } else if (scall == MAT_REUSE_MATRIX) {
4999       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5000     }
5001     PetscFunctionReturn(0);
5002   }
5003 
5004   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5005   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5006   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5007   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5008   aa = a->a; ba = b->a;
5009   if (scall == MAT_INITIAL_MATRIX) {
5010     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5011     ci[0] = 0;
5012     for (i=0; i<am; i++) {
5013       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5014     }
5015     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5016     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5017     k    = 0;
5018     for (i=0; i<am; i++) {
5019       ncols_o = bi[i+1] - bi[i];
5020       ncols_d = ai[i+1] - ai[i];
5021       /* off-diagonal portion of A */
5022       for (jo=0; jo<ncols_o; jo++) {
5023         col = cmap[*bj];
5024         if (col >= cstart) break;
5025         cj[k]   = col; bj++;
5026         ca[k++] = *ba++;
5027       }
5028       /* diagonal portion of A */
5029       for (j=0; j<ncols_d; j++) {
5030         cj[k]   = cstart + *aj++;
5031         ca[k++] = *aa++;
5032       }
5033       /* off-diagonal portion of A */
5034       for (j=jo; j<ncols_o; j++) {
5035         cj[k]   = cmap[*bj++];
5036         ca[k++] = *ba++;
5037       }
5038     }
5039     /* put together the new matrix */
5040     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5041     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5042     /* Since these are PETSc arrays, change flags to free them as necessary. */
5043     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5044     mat->free_a  = PETSC_TRUE;
5045     mat->free_ij = PETSC_TRUE;
5046     mat->nonew   = 0;
5047   } else if (scall == MAT_REUSE_MATRIX) {
5048     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5049     ci = mat->i; cj = mat->j; cam = mat->a;
5050     for (i=0; i<am; i++) {
5051       /* off-diagonal portion of A */
5052       ncols_o = bi[i+1] - bi[i];
5053       for (jo=0; jo<ncols_o; jo++) {
5054         col = cmap[*bj];
5055         if (col >= cstart) break;
5056         *cam++ = *ba++; bj++;
5057       }
5058       /* diagonal portion of A */
5059       ncols_d = ai[i+1] - ai[i];
5060       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5061       /* off-diagonal portion of A */
5062       for (j=jo; j<ncols_o; j++) {
5063         *cam++ = *ba++; bj++;
5064       }
5065     }
5066   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5067   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5068   PetscFunctionReturn(0);
5069 }
5070 
5071 /*@C
5072      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5073 
5074     Not Collective
5075 
5076    Input Parameters:
5077 +    A - the matrix
5078 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5079 -    row, col - index sets of rows and columns to extract (or NULL)
5080 
5081    Output Parameter:
5082 .    A_loc - the local sequential matrix generated
5083 
5084     Level: developer
5085 
5086 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5087 
5088 @*/
5089 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5090 {
5091   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5092   PetscErrorCode ierr;
5093   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5094   IS             isrowa,iscola;
5095   Mat            *aloc;
5096   PetscBool      match;
5097 
5098   PetscFunctionBegin;
5099   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5100   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5101   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5102   if (!row) {
5103     start = A->rmap->rstart; end = A->rmap->rend;
5104     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5105   } else {
5106     isrowa = *row;
5107   }
5108   if (!col) {
5109     start = A->cmap->rstart;
5110     cmap  = a->garray;
5111     nzA   = a->A->cmap->n;
5112     nzB   = a->B->cmap->n;
5113     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5114     ncols = 0;
5115     for (i=0; i<nzB; i++) {
5116       if (cmap[i] < start) idx[ncols++] = cmap[i];
5117       else break;
5118     }
5119     imark = i;
5120     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5121     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5122     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5123   } else {
5124     iscola = *col;
5125   }
5126   if (scall != MAT_INITIAL_MATRIX) {
5127     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5128     aloc[0] = *A_loc;
5129   }
5130   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5131   if (!col) { /* attach global id of condensed columns */
5132     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5133   }
5134   *A_loc = aloc[0];
5135   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5136   if (!row) {
5137     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5138   }
5139   if (!col) {
5140     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5141   }
5142   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5143   PetscFunctionReturn(0);
5144 }
5145 
5146 /*
5147  * Destroy a mat that may be composed with PetscSF communication objects.
5148  * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private.
5149  * */
5150 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat)
5151 {
5152   PetscSF          sf,osf;
5153   IS               map;
5154   PetscErrorCode   ierr;
5155 
5156   PetscFunctionBegin;
5157   ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5158   ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5159   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5160   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5161   ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr);
5162   ierr = ISDestroy(&map);CHKERRQ(ierr);
5163   ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr);
5164   PetscFunctionReturn(0);
5165 }
5166 
5167 /*
5168  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5169  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5170  * on a global size.
5171  * */
5172 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5173 {
5174   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5175   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5176   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5177   PetscMPIInt              owner;
5178   PetscSFNode              *iremote,*oiremote;
5179   const PetscInt           *lrowindices;
5180   PetscErrorCode           ierr;
5181   PetscSF                  sf,osf;
5182   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5183   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5184   MPI_Comm                 comm;
5185   ISLocalToGlobalMapping   mapping;
5186 
5187   PetscFunctionBegin;
5188   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5189   /* plocalsize is the number of roots
5190    * nrows is the number of leaves
5191    * */
5192   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5193   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5194   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5195   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5196   for (i=0;i<nrows;i++) {
5197     /* Find a remote index and an owner for a row
5198      * The row could be local or remote
5199      * */
5200     owner = 0;
5201     lidx  = 0;
5202     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5203     iremote[i].index = lidx;
5204     iremote[i].rank  = owner;
5205   }
5206   /* Create SF to communicate how many nonzero columns for each row */
5207   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5208   /* SF will figure out the number of nonzero colunms for each row, and their
5209    * offsets
5210    * */
5211   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5212   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5213   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5214 
5215   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5216   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5217   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5218   roffsets[0] = 0;
5219   roffsets[1] = 0;
5220   for (i=0;i<plocalsize;i++) {
5221     /* diag */
5222     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5223     /* off diag */
5224     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5225     /* compute offsets so that we relative location for each row */
5226     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5227     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5228   }
5229   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5230   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5231   /* 'r' means root, and 'l' means leaf */
5232   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5233   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5234   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5235   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5236   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5237   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5238   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5239   dntotalcols = 0;
5240   ontotalcols = 0;
5241   ncol = 0;
5242   for (i=0;i<nrows;i++) {
5243     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5244     ncol = PetscMax(pnnz[i],ncol);
5245     /* diag */
5246     dntotalcols += nlcols[i*2+0];
5247     /* off diag */
5248     ontotalcols += nlcols[i*2+1];
5249   }
5250   /* We do not need to figure the right number of columns
5251    * since all the calculations will be done by going through the raw data
5252    * */
5253   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5254   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5255   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5256   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5257   /* diag */
5258   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5259   /* off diag */
5260   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5261   /* diag */
5262   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5263   /* off diag */
5264   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5265   dntotalcols = 0;
5266   ontotalcols = 0;
5267   ntotalcols  = 0;
5268   for (i=0;i<nrows;i++) {
5269     owner = 0;
5270     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5271     /* Set iremote for diag matrix */
5272     for (j=0;j<nlcols[i*2+0];j++) {
5273       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5274       iremote[dntotalcols].rank    = owner;
5275       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5276       ilocal[dntotalcols++]        = ntotalcols++;
5277     }
5278     /* off diag */
5279     for (j=0;j<nlcols[i*2+1];j++) {
5280       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5281       oiremote[ontotalcols].rank    = owner;
5282       oilocal[ontotalcols++]        = ntotalcols++;
5283     }
5284   }
5285   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5286   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5287   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5288   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5289   /* P serves as roots and P_oth is leaves
5290    * Diag matrix
5291    * */
5292   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5293   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5294   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5295 
5296   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5297   /* Off diag */
5298   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5299   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5300   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5301   /* We operate on the matrix internal data for saving memory */
5302   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5303   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5304   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5305   /* Convert to global indices for diag matrix */
5306   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5307   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5308   /* We want P_oth store global indices */
5309   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5310   /* Use memory scalable approach */
5311   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5312   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5313   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5314   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5315   /* Convert back to local indices */
5316   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5317   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5318   nout = 0;
5319   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5320   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5321   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5322   /* Exchange values */
5323   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5324   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5325   /* Stop PETSc from shrinking memory */
5326   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5327   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5328   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5329   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5330   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5331   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5332   /* ``New MatDestroy" takes care of PetscSF objects as well */
5333   (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF;
5334   PetscFunctionReturn(0);
5335 }
5336 
5337 /*
5338  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5339  * This supports MPIAIJ and MAIJ
5340  * */
5341 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5342 {
5343   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5344   Mat_SeqAIJ            *p_oth;
5345   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5346   IS                    rows,map;
5347   PetscHMapI            hamp;
5348   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5349   MPI_Comm              comm;
5350   PetscSF               sf,osf;
5351   PetscBool             has;
5352   PetscErrorCode        ierr;
5353 
5354   PetscFunctionBegin;
5355   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5356   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5357   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5358    *  and then create a submatrix (that often is an overlapping matrix)
5359    * */
5360   if (reuse==MAT_INITIAL_MATRIX) {
5361     /* Use a hash table to figure out unique keys */
5362     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5363     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5364     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5365     count = 0;
5366     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5367     for (i=0;i<a->B->cmap->n;i++) {
5368       key  = a->garray[i]/dof;
5369       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5370       if (!has) {
5371         mapping[i] = count;
5372         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5373       } else {
5374         /* Current 'i' has the same value the previous step */
5375         mapping[i] = count-1;
5376       }
5377     }
5378     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5379     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5380     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5381     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5382     off = 0;
5383     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5384     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5385     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5386     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5387     /* In case, the matrix was already created but users want to recreate the matrix */
5388     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5389     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5390     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5391     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5392   } else if (reuse==MAT_REUSE_MATRIX) {
5393     /* If matrix was already created, we simply update values using SF objects
5394      * that as attached to the matrix ealier.
5395      *  */
5396     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5397     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5398     if (!sf || !osf) {
5399       SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n");
5400     }
5401     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5402     /* Update values in place */
5403     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5404     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5405     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5406     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5407   } else {
5408     SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n");
5409   }
5410   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5411   PetscFunctionReturn(0);
5412 }
5413 
5414 /*@C
5415     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5416 
5417     Collective on Mat
5418 
5419    Input Parameters:
5420 +    A,B - the matrices in mpiaij format
5421 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5422 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5423 
5424    Output Parameter:
5425 +    rowb, colb - index sets of rows and columns of B to extract
5426 -    B_seq - the sequential matrix generated
5427 
5428     Level: developer
5429 
5430 @*/
5431 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5432 {
5433   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5434   PetscErrorCode ierr;
5435   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5436   IS             isrowb,iscolb;
5437   Mat            *bseq=NULL;
5438 
5439   PetscFunctionBegin;
5440   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5441     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5442   }
5443   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5444 
5445   if (scall == MAT_INITIAL_MATRIX) {
5446     start = A->cmap->rstart;
5447     cmap  = a->garray;
5448     nzA   = a->A->cmap->n;
5449     nzB   = a->B->cmap->n;
5450     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5451     ncols = 0;
5452     for (i=0; i<nzB; i++) {  /* row < local row index */
5453       if (cmap[i] < start) idx[ncols++] = cmap[i];
5454       else break;
5455     }
5456     imark = i;
5457     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5458     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5459     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5460     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5461   } else {
5462     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5463     isrowb  = *rowb; iscolb = *colb;
5464     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5465     bseq[0] = *B_seq;
5466   }
5467   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5468   *B_seq = bseq[0];
5469   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5470   if (!rowb) {
5471     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5472   } else {
5473     *rowb = isrowb;
5474   }
5475   if (!colb) {
5476     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5477   } else {
5478     *colb = iscolb;
5479   }
5480   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5481   PetscFunctionReturn(0);
5482 }
5483 
5484 /*
5485     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5486     of the OFF-DIAGONAL portion of local A
5487 
5488     Collective on Mat
5489 
5490    Input Parameters:
5491 +    A,B - the matrices in mpiaij format
5492 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5493 
5494    Output Parameter:
5495 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5496 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5497 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5498 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5499 
5500     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5501      for this matrix. This is not desirable..
5502 
5503     Level: developer
5504 
5505 */
5506 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5507 {
5508   PetscErrorCode         ierr;
5509   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5510   Mat_SeqAIJ             *b_oth;
5511   VecScatter             ctx;
5512   MPI_Comm               comm;
5513   const PetscMPIInt      *rprocs,*sprocs;
5514   const PetscInt         *srow,*rstarts,*sstarts;
5515   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5516   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5517   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5518   MPI_Request            *rwaits = NULL,*swaits = NULL;
5519   MPI_Status             rstatus;
5520   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5521 
5522   PetscFunctionBegin;
5523   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5524   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5525 
5526   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5527     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5528   }
5529   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5530   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5531 
5532   if (size == 1) {
5533     startsj_s = NULL;
5534     bufa_ptr  = NULL;
5535     *B_oth    = NULL;
5536     PetscFunctionReturn(0);
5537   }
5538 
5539   ctx = a->Mvctx;
5540   tag = ((PetscObject)ctx)->tag;
5541 
5542   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5543   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5544   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5545   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5546   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5547   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5548   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5549 
5550   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5551   if (scall == MAT_INITIAL_MATRIX) {
5552     /* i-array */
5553     /*---------*/
5554     /*  post receives */
5555     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5556     for (i=0; i<nrecvs; i++) {
5557       rowlen = rvalues + rstarts[i]*rbs;
5558       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5559       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5560     }
5561 
5562     /* pack the outgoing message */
5563     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5564 
5565     sstartsj[0] = 0;
5566     rstartsj[0] = 0;
5567     len         = 0; /* total length of j or a array to be sent */
5568     if (nsends) {
5569       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5570       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5571     }
5572     for (i=0; i<nsends; i++) {
5573       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5574       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5575       for (j=0; j<nrows; j++) {
5576         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5577         for (l=0; l<sbs; l++) {
5578           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5579 
5580           rowlen[j*sbs+l] = ncols;
5581 
5582           len += ncols;
5583           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5584         }
5585         k++;
5586       }
5587       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5588 
5589       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5590     }
5591     /* recvs and sends of i-array are completed */
5592     i = nrecvs;
5593     while (i--) {
5594       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5595     }
5596     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5597     ierr = PetscFree(svalues);CHKERRQ(ierr);
5598 
5599     /* allocate buffers for sending j and a arrays */
5600     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5601     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5602 
5603     /* create i-array of B_oth */
5604     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5605 
5606     b_othi[0] = 0;
5607     len       = 0; /* total length of j or a array to be received */
5608     k         = 0;
5609     for (i=0; i<nrecvs; i++) {
5610       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5611       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5612       for (j=0; j<nrows; j++) {
5613         b_othi[k+1] = b_othi[k] + rowlen[j];
5614         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5615         k++;
5616       }
5617       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5618     }
5619     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5620 
5621     /* allocate space for j and a arrrays of B_oth */
5622     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5623     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5624 
5625     /* j-array */
5626     /*---------*/
5627     /*  post receives of j-array */
5628     for (i=0; i<nrecvs; i++) {
5629       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5630       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5631     }
5632 
5633     /* pack the outgoing message j-array */
5634     if (nsends) k = sstarts[0];
5635     for (i=0; i<nsends; i++) {
5636       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5637       bufJ  = bufj+sstartsj[i];
5638       for (j=0; j<nrows; j++) {
5639         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5640         for (ll=0; ll<sbs; ll++) {
5641           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5642           for (l=0; l<ncols; l++) {
5643             *bufJ++ = cols[l];
5644           }
5645           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5646         }
5647       }
5648       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5649     }
5650 
5651     /* recvs and sends of j-array are completed */
5652     i = nrecvs;
5653     while (i--) {
5654       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5655     }
5656     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5657   } else if (scall == MAT_REUSE_MATRIX) {
5658     sstartsj = *startsj_s;
5659     rstartsj = *startsj_r;
5660     bufa     = *bufa_ptr;
5661     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5662     b_otha   = b_oth->a;
5663   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5664 
5665   /* a-array */
5666   /*---------*/
5667   /*  post receives of a-array */
5668   for (i=0; i<nrecvs; i++) {
5669     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5670     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5671   }
5672 
5673   /* pack the outgoing message a-array */
5674   if (nsends) k = sstarts[0];
5675   for (i=0; i<nsends; i++) {
5676     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5677     bufA  = bufa+sstartsj[i];
5678     for (j=0; j<nrows; j++) {
5679       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5680       for (ll=0; ll<sbs; ll++) {
5681         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5682         for (l=0; l<ncols; l++) {
5683           *bufA++ = vals[l];
5684         }
5685         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5686       }
5687     }
5688     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5689   }
5690   /* recvs and sends of a-array are completed */
5691   i = nrecvs;
5692   while (i--) {
5693     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5694   }
5695   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5696   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5697 
5698   if (scall == MAT_INITIAL_MATRIX) {
5699     /* put together the new matrix */
5700     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5701 
5702     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5703     /* Since these are PETSc arrays, change flags to free them as necessary. */
5704     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5705     b_oth->free_a  = PETSC_TRUE;
5706     b_oth->free_ij = PETSC_TRUE;
5707     b_oth->nonew   = 0;
5708 
5709     ierr = PetscFree(bufj);CHKERRQ(ierr);
5710     if (!startsj_s || !bufa_ptr) {
5711       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5712       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5713     } else {
5714       *startsj_s = sstartsj;
5715       *startsj_r = rstartsj;
5716       *bufa_ptr  = bufa;
5717     }
5718   }
5719 
5720   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5721   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5722   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5723   PetscFunctionReturn(0);
5724 }
5725 
5726 /*@C
5727   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5728 
5729   Not Collective
5730 
5731   Input Parameters:
5732 . A - The matrix in mpiaij format
5733 
5734   Output Parameter:
5735 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5736 . colmap - A map from global column index to local index into lvec
5737 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5738 
5739   Level: developer
5740 
5741 @*/
5742 #if defined(PETSC_USE_CTABLE)
5743 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5744 #else
5745 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5746 #endif
5747 {
5748   Mat_MPIAIJ *a;
5749 
5750   PetscFunctionBegin;
5751   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5752   PetscValidPointer(lvec, 2);
5753   PetscValidPointer(colmap, 3);
5754   PetscValidPointer(multScatter, 4);
5755   a = (Mat_MPIAIJ*) A->data;
5756   if (lvec) *lvec = a->lvec;
5757   if (colmap) *colmap = a->colmap;
5758   if (multScatter) *multScatter = a->Mvctx;
5759   PetscFunctionReturn(0);
5760 }
5761 
5762 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5763 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5764 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5765 #if defined(PETSC_HAVE_MKL_SPARSE)
5766 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5767 #endif
5768 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5769 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5770 #if defined(PETSC_HAVE_ELEMENTAL)
5771 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5772 #endif
5773 #if defined(PETSC_HAVE_HYPRE)
5774 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5775 #endif
5776 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5777 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5778 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5779 
5780 /*
5781     Computes (B'*A')' since computing B*A directly is untenable
5782 
5783                n                       p                          p
5784         (              )       (              )         (                  )
5785       m (      A       )  *  n (       B      )   =   m (         C        )
5786         (              )       (              )         (                  )
5787 
5788 */
5789 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5790 {
5791   PetscErrorCode ierr;
5792   Mat            At,Bt,Ct;
5793 
5794   PetscFunctionBegin;
5795   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5796   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5797   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5798   ierr = MatDestroy(&At);CHKERRQ(ierr);
5799   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5800   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5801   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5802   PetscFunctionReturn(0);
5803 }
5804 
5805 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5806 {
5807   PetscErrorCode ierr;
5808   PetscInt       m=A->rmap->n,n=B->cmap->n;
5809 
5810   PetscFunctionBegin;
5811   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5812   ierr = MatSetSizes(C,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5813   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5814   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
5815   ierr = MatMPIDenseSetPreallocation(C,NULL);CHKERRQ(ierr);
5816   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5817   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5818 
5819   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5820   PetscFunctionReturn(0);
5821 }
5822 
5823 /* ----------------------------------------------------------------*/
5824 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5825 {
5826   Mat_Product *product = C->product;
5827   Mat         A = product->A,B=product->B;
5828 
5829   PetscFunctionBegin;
5830   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5831     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5832 
5833   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5834   C->ops->productsymbolic = MatProductSymbolic_AB;
5835   PetscFunctionReturn(0);
5836 }
5837 
5838 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
5839 {
5840   PetscErrorCode ierr;
5841   Mat_Product    *product = C->product;
5842 
5843   PetscFunctionBegin;
5844   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
5845   if (product->type == MATPRODUCT_AB) {
5846     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
5847   } else SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIAIJ matrices",MatProductTypes[product->type]);
5848   PetscFunctionReturn(0);
5849 }
5850 /* ----------------------------------------------------------------*/
5851 
5852 /*MC
5853    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5854 
5855    Options Database Keys:
5856 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5857 
5858    Level: beginner
5859 
5860    Notes:
5861     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
5862     in this case the values associated with the rows and columns one passes in are set to zero
5863     in the matrix
5864 
5865     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
5866     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
5867 
5868 .seealso: MatCreateAIJ()
5869 M*/
5870 
5871 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5872 {
5873   Mat_MPIAIJ     *b;
5874   PetscErrorCode ierr;
5875   PetscMPIInt    size;
5876 
5877   PetscFunctionBegin;
5878   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5879 
5880   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5881   B->data       = (void*)b;
5882   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5883   B->assembled  = PETSC_FALSE;
5884   B->insertmode = NOT_SET_VALUES;
5885   b->size       = size;
5886 
5887   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5888 
5889   /* build cache for off array entries formed */
5890   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5891 
5892   b->donotstash  = PETSC_FALSE;
5893   b->colmap      = 0;
5894   b->garray      = 0;
5895   b->roworiented = PETSC_TRUE;
5896 
5897   /* stuff used for matrix vector multiply */
5898   b->lvec  = NULL;
5899   b->Mvctx = NULL;
5900 
5901   /* stuff for MatGetRow() */
5902   b->rowindices   = 0;
5903   b->rowvalues    = 0;
5904   b->getrowactive = PETSC_FALSE;
5905 
5906   /* flexible pointer used in CUSP/CUSPARSE classes */
5907   b->spptr = NULL;
5908 
5909   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5910   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5911   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5912   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5913   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5914   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5915   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5916   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5917   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5918   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5919 #if defined(PETSC_HAVE_MKL_SPARSE)
5920   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5921 #endif
5922   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5923   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
5924   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5925 #if defined(PETSC_HAVE_ELEMENTAL)
5926   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5927 #endif
5928   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5929   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5930   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5931   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5932 #if defined(PETSC_HAVE_HYPRE)
5933   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5934   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5935 #endif
5936   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
5937   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
5938   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5939   PetscFunctionReturn(0);
5940 }
5941 
5942 /*@C
5943      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5944          and "off-diagonal" part of the matrix in CSR format.
5945 
5946    Collective
5947 
5948    Input Parameters:
5949 +  comm - MPI communicator
5950 .  m - number of local rows (Cannot be PETSC_DECIDE)
5951 .  n - This value should be the same as the local size used in creating the
5952        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5953        calculated if N is given) For square matrices n is almost always m.
5954 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5955 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5956 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5957 .   j - column indices
5958 .   a - matrix values
5959 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5960 .   oj - column indices
5961 -   oa - matrix values
5962 
5963    Output Parameter:
5964 .   mat - the matrix
5965 
5966    Level: advanced
5967 
5968    Notes:
5969        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5970        must free the arrays once the matrix has been destroyed and not before.
5971 
5972        The i and j indices are 0 based
5973 
5974        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5975 
5976        This sets local rows and cannot be used to set off-processor values.
5977 
5978        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5979        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5980        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5981        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5982        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5983        communication if it is known that only local entries will be set.
5984 
5985 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5986           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5987 @*/
5988 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5989 {
5990   PetscErrorCode ierr;
5991   Mat_MPIAIJ     *maij;
5992 
5993   PetscFunctionBegin;
5994   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5995   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5996   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5997   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5998   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5999   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6000   maij = (Mat_MPIAIJ*) (*mat)->data;
6001 
6002   (*mat)->preallocated = PETSC_TRUE;
6003 
6004   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6005   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6006 
6007   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6008   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6009 
6010   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6011   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6012   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6013   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6014 
6015   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6016   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6017   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6018   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6019   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6020   PetscFunctionReturn(0);
6021 }
6022 
6023 /*
6024     Special version for direct calls from Fortran
6025 */
6026 #include <petsc/private/fortranimpl.h>
6027 
6028 /* Change these macros so can be used in void function */
6029 #undef CHKERRQ
6030 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6031 #undef SETERRQ2
6032 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6033 #undef SETERRQ3
6034 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6035 #undef SETERRQ
6036 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6037 
6038 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6039 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6040 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6041 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6042 #else
6043 #endif
6044 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6045 {
6046   Mat            mat  = *mmat;
6047   PetscInt       m    = *mm, n = *mn;
6048   InsertMode     addv = *maddv;
6049   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6050   PetscScalar    value;
6051   PetscErrorCode ierr;
6052 
6053   MatCheckPreallocated(mat,1);
6054   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6055 
6056 #if defined(PETSC_USE_DEBUG)
6057   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6058 #endif
6059   {
6060     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6061     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6062     PetscBool roworiented = aij->roworiented;
6063 
6064     /* Some Variables required in the macro */
6065     Mat        A                    = aij->A;
6066     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6067     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6068     MatScalar  *aa                  = a->a;
6069     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6070     Mat        B                    = aij->B;
6071     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6072     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6073     MatScalar  *ba                  = b->a;
6074     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6075      * cannot use "#if defined" inside a macro. */
6076     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6077 
6078     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6079     PetscInt  nonew = a->nonew;
6080     MatScalar *ap1,*ap2;
6081 
6082     PetscFunctionBegin;
6083     for (i=0; i<m; i++) {
6084       if (im[i] < 0) continue;
6085 #if defined(PETSC_USE_DEBUG)
6086       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6087 #endif
6088       if (im[i] >= rstart && im[i] < rend) {
6089         row      = im[i] - rstart;
6090         lastcol1 = -1;
6091         rp1      = aj + ai[row];
6092         ap1      = aa + ai[row];
6093         rmax1    = aimax[row];
6094         nrow1    = ailen[row];
6095         low1     = 0;
6096         high1    = nrow1;
6097         lastcol2 = -1;
6098         rp2      = bj + bi[row];
6099         ap2      = ba + bi[row];
6100         rmax2    = bimax[row];
6101         nrow2    = bilen[row];
6102         low2     = 0;
6103         high2    = nrow2;
6104 
6105         for (j=0; j<n; j++) {
6106           if (roworiented) value = v[i*n+j];
6107           else value = v[i+j*m];
6108           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6109           if (in[j] >= cstart && in[j] < cend) {
6110             col = in[j] - cstart;
6111             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6112 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6113             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6114 #endif
6115           } else if (in[j] < 0) continue;
6116 #if defined(PETSC_USE_DEBUG)
6117           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6118           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
6119 #endif
6120           else {
6121             if (mat->was_assembled) {
6122               if (!aij->colmap) {
6123                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6124               }
6125 #if defined(PETSC_USE_CTABLE)
6126               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6127               col--;
6128 #else
6129               col = aij->colmap[in[j]] - 1;
6130 #endif
6131               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6132                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6133                 col  =  in[j];
6134                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6135                 B        = aij->B;
6136                 b        = (Mat_SeqAIJ*)B->data;
6137                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6138                 rp2      = bj + bi[row];
6139                 ap2      = ba + bi[row];
6140                 rmax2    = bimax[row];
6141                 nrow2    = bilen[row];
6142                 low2     = 0;
6143                 high2    = nrow2;
6144                 bm       = aij->B->rmap->n;
6145                 ba       = b->a;
6146                 inserted = PETSC_FALSE;
6147               }
6148             } else col = in[j];
6149             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6150 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6151             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6152 #endif
6153           }
6154         }
6155       } else if (!aij->donotstash) {
6156         if (roworiented) {
6157           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6158         } else {
6159           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6160         }
6161       }
6162     }
6163   }
6164   PetscFunctionReturnVoid();
6165 }
6166