xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 1a25486991308d78e6a2b51cebb4dae069e468b1)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = NULL;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=NULL;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           inserted = PETSC_TRUE; \
468           goto a_noinsert; \
469         } \
470       }  \
471       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
473       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475       N = nrow1++ - 1; a->nz++; high1++; \
476       /* shift up all the later entries in this row */ \
477       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
478       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
479       rp1[_i] = col;  \
480       ap1[_i] = value;  \
481       A->nonzerostate++;\
482       a_noinsert: ; \
483       ailen[row] = nrow1; \
484 }
485 
486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487   { \
488     if (col <= lastcol2) low2 = 0;                        \
489     else high2 = nrow2;                                   \
490     lastcol2 = col;                                       \
491     while (high2-low2 > 5) {                              \
492       t = (low2+high2)/2;                                 \
493       if (rp2[t] > col) high2 = t;                        \
494       else             low2  = t;                         \
495     }                                                     \
496     for (_i=low2; _i<high2; _i++) {                       \
497       if (rp2[_i] > col) break;                           \
498       if (rp2[_i] == col) {                               \
499         if (addv == ADD_VALUES) {                         \
500           ap2[_i] += value;                               \
501           (void)PetscLogFlops(1.0);                       \
502         }                                                 \
503         else                    ap2[_i] = value;          \
504         inserted = PETSC_TRUE;                            \
505         goto b_noinsert;                                  \
506       }                                                   \
507     }                                                     \
508     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
510     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512     N = nrow2++ - 1; b->nz++; high2++;                    \
513     /* shift up all the later entries in this row */      \
514     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
515     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
516     rp2[_i] = col;                                        \
517     ap2[_i] = value;                                      \
518     B->nonzerostate++;                                    \
519     b_noinsert: ;                                         \
520     bilen[row] = nrow2;                                   \
521   }
522 
523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524 {
525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
526   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
527   PetscErrorCode ierr;
528   PetscInt       l,*garray = mat->garray,diag;
529 
530   PetscFunctionBegin;
531   /* code only works for square matrices A */
532 
533   /* find size of row to the left of the diagonal part */
534   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
535   row  = row - diag;
536   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537     if (garray[b->j[b->i[row]+l]] > diag) break;
538   }
539   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
540 
541   /* diagonal part */
542   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
543 
544   /* right of diagonal part */
545   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
547   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548 #endif
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553 {
554   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
555   PetscScalar    value = 0.0;
556   PetscErrorCode ierr;
557   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
558   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559   PetscBool      roworiented = aij->roworiented;
560 
561   /* Some Variables required in the macro */
562   Mat        A                    = aij->A;
563   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
564   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565   MatScalar  *aa                  = a->a;
566   PetscBool  ignorezeroentries    = a->ignorezeroentries;
567   Mat        B                    = aij->B;
568   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
569   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570   MatScalar  *ba                  = b->a;
571   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572    * cannot use "#if defined" inside a macro. */
573   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
574 
575   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576   PetscInt  nonew;
577   MatScalar *ap1,*ap2;
578 
579   PetscFunctionBegin;
580   for (i=0; i<m; i++) {
581     if (im[i] < 0) continue;
582     if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
583     if (im[i] >= rstart && im[i] < rend) {
584       row      = im[i] - rstart;
585       lastcol1 = -1;
586       rp1      = aj + ai[row];
587       ap1      = aa + ai[row];
588       rmax1    = aimax[row];
589       nrow1    = ailen[row];
590       low1     = 0;
591       high1    = nrow1;
592       lastcol2 = -1;
593       rp2      = bj + bi[row];
594       ap2      = ba + bi[row];
595       rmax2    = bimax[row];
596       nrow2    = bilen[row];
597       low2     = 0;
598       high2    = nrow2;
599 
600       for (j=0; j<n; j++) {
601         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
602         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
603         if (in[j] >= cstart && in[j] < cend) {
604           col   = in[j] - cstart;
605           nonew = a->nonew;
606           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
607 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
608           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
609 #endif
610         } else if (in[j] < 0) continue;
611         else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
612         else {
613           if (mat->was_assembled) {
614             if (!aij->colmap) {
615               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
616             }
617 #if defined(PETSC_USE_CTABLE)
618             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
619             col--;
620 #else
621             col = aij->colmap[in[j]] - 1;
622 #endif
623             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
624               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
625               col  =  in[j];
626               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
627               B        = aij->B;
628               b        = (Mat_SeqAIJ*)B->data;
629               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
630               rp2      = bj + bi[row];
631               ap2      = ba + bi[row];
632               rmax2    = bimax[row];
633               nrow2    = bilen[row];
634               low2     = 0;
635               high2    = nrow2;
636               bm       = aij->B->rmap->n;
637               ba       = b->a;
638               inserted = PETSC_FALSE;
639             } else if (col < 0) {
640               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
641                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
642               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
643             }
644           } else col = in[j];
645           nonew = b->nonew;
646           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
647 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
648           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
649 #endif
650         }
651       }
652     } else {
653       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
654       if (!aij->donotstash) {
655         mat->assembled = PETSC_FALSE;
656         if (roworiented) {
657           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
658         } else {
659           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
660         }
661       }
662     }
663   }
664   PetscFunctionReturn(0);
665 }
666 
667 /*
668     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
669     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
670     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
671 */
672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
673 {
674   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
675   Mat            A           = aij->A; /* diagonal part of the matrix */
676   Mat            B           = aij->B; /* offdiagonal part of the matrix */
677   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
678   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
679   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
680   PetscInt       *ailen      = a->ilen,*aj = a->j;
681   PetscInt       *bilen      = b->ilen,*bj = b->j;
682   PetscInt       am          = aij->A->rmap->n,j;
683   PetscInt       diag_so_far = 0,dnz;
684   PetscInt       offd_so_far = 0,onz;
685 
686   PetscFunctionBegin;
687   /* Iterate over all rows of the matrix */
688   for (j=0; j<am; j++) {
689     dnz = onz = 0;
690     /*  Iterate over all non-zero columns of the current row */
691     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
692       /* If column is in the diagonal */
693       if (mat_j[col] >= cstart && mat_j[col] < cend) {
694         aj[diag_so_far++] = mat_j[col] - cstart;
695         dnz++;
696       } else { /* off-diagonal entries */
697         bj[offd_so_far++] = mat_j[col];
698         onz++;
699       }
700     }
701     ailen[j] = dnz;
702     bilen[j] = onz;
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 /*
708     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
709     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
710     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
711     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
712     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
713 */
714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
715 {
716   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
717   Mat            A      = aij->A; /* diagonal part of the matrix */
718   Mat            B      = aij->B; /* offdiagonal part of the matrix */
719   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
720   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
721   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
722   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
723   PetscInt       *ailen = a->ilen,*aj = a->j;
724   PetscInt       *bilen = b->ilen,*bj = b->j;
725   PetscInt       am     = aij->A->rmap->n,j;
726   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
727   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
728   PetscScalar    *aa = a->a,*ba = b->a;
729 
730   PetscFunctionBegin;
731   /* Iterate over all rows of the matrix */
732   for (j=0; j<am; j++) {
733     dnz_row = onz_row = 0;
734     rowstart_offd = full_offd_i[j];
735     rowstart_diag = full_diag_i[j];
736     /*  Iterate over all non-zero columns of the current row */
737     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
738       /* If column is in the diagonal */
739       if (mat_j[col] >= cstart && mat_j[col] < cend) {
740         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
741         aa[rowstart_diag+dnz_row] = mat_a[col];
742         dnz_row++;
743       } else { /* off-diagonal entries */
744         bj[rowstart_offd+onz_row] = mat_j[col];
745         ba[rowstart_offd+onz_row] = mat_a[col];
746         onz_row++;
747       }
748     }
749     ailen[j] = dnz_row;
750     bilen[j] = onz_row;
751   }
752   PetscFunctionReturn(0);
753 }
754 
755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
756 {
757   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
758   PetscErrorCode ierr;
759   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
760   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
761 
762   PetscFunctionBegin;
763   for (i=0; i<m; i++) {
764     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
765     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
766     if (idxm[i] >= rstart && idxm[i] < rend) {
767       row = idxm[i] - rstart;
768       for (j=0; j<n; j++) {
769         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
770         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
771         if (idxn[j] >= cstart && idxn[j] < cend) {
772           col  = idxn[j] - cstart;
773           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
774         } else {
775           if (!aij->colmap) {
776             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
777           }
778 #if defined(PETSC_USE_CTABLE)
779           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
780           col--;
781 #else
782           col = aij->colmap[idxn[j]] - 1;
783 #endif
784           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
785           else {
786             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
787           }
788         }
789       }
790     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
791   }
792   PetscFunctionReturn(0);
793 }
794 
795 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
796 
797 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
798 {
799   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
800   PetscErrorCode ierr;
801   PetscInt       nstash,reallocs;
802 
803   PetscFunctionBegin;
804   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
805 
806   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
807   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
808   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
809   PetscFunctionReturn(0);
810 }
811 
812 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
813 {
814   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
815   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
816   PetscErrorCode ierr;
817   PetscMPIInt    n;
818   PetscInt       i,j,rstart,ncols,flg;
819   PetscInt       *row,*col;
820   PetscBool      other_disassembled;
821   PetscScalar    *val;
822 
823   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
824 
825   PetscFunctionBegin;
826   if (!aij->donotstash && !mat->nooffprocentries) {
827     while (1) {
828       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
829       if (!flg) break;
830 
831       for (i=0; i<n;) {
832         /* Now identify the consecutive vals belonging to the same row */
833         for (j=i,rstart=row[j]; j<n; j++) {
834           if (row[j] != rstart) break;
835         }
836         if (j < n) ncols = j-i;
837         else       ncols = n-i;
838         /* Now assemble all these values with a single function call */
839         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
840         i    = j;
841       }
842     }
843     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
844   }
845 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
846   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
847   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
848   if (mat->boundtocpu) {
849     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
850     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
851   }
852 #endif
853   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
854   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
855 
856   /* determine if any processor has disassembled, if so we must
857      also disassemble ourself, in order that we may reassemble. */
858   /*
859      if nonzero structure of submatrix B cannot change then we know that
860      no processor disassembled thus we can skip this stuff
861   */
862   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
863     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
864     if (mat->was_assembled && !other_disassembled) {
865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
866       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
867 #endif
868       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
869     }
870   }
871   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
872     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
873   }
874   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
876   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
877 #endif
878   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
879   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
880 
881   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
882 
883   aij->rowvalues = NULL;
884 
885   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
886   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
887 
888   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
889   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
890     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
891     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
892   }
893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
894   mat->offloadmask = PETSC_OFFLOAD_BOTH;
895 #endif
896   PetscFunctionReturn(0);
897 }
898 
899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
900 {
901   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
902   PetscErrorCode ierr;
903 
904   PetscFunctionBegin;
905   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
906   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
907   PetscFunctionReturn(0);
908 }
909 
910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
911 {
912   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
913   PetscObjectState sA, sB;
914   PetscInt        *lrows;
915   PetscInt         r, len;
916   PetscBool        cong, lch, gch;
917   PetscErrorCode   ierr;
918 
919   PetscFunctionBegin;
920   /* get locally owned rows */
921   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
922   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
923   /* fix right hand side if needed */
924   if (x && b) {
925     const PetscScalar *xx;
926     PetscScalar       *bb;
927 
928     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
929     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
930     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
931     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
932     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
933     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
934   }
935 
936   sA = mat->A->nonzerostate;
937   sB = mat->B->nonzerostate;
938 
939   if (diag != 0.0 && cong) {
940     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
941     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
942   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
943     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
944     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
945     PetscInt   nnwA, nnwB;
946     PetscBool  nnzA, nnzB;
947 
948     nnwA = aijA->nonew;
949     nnwB = aijB->nonew;
950     nnzA = aijA->keepnonzeropattern;
951     nnzB = aijB->keepnonzeropattern;
952     if (!nnzA) {
953       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
954       aijA->nonew = 0;
955     }
956     if (!nnzB) {
957       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
958       aijB->nonew = 0;
959     }
960     /* Must zero here before the next loop */
961     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
962     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
963     for (r = 0; r < len; ++r) {
964       const PetscInt row = lrows[r] + A->rmap->rstart;
965       if (row >= A->cmap->N) continue;
966       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
967     }
968     aijA->nonew = nnwA;
969     aijB->nonew = nnwB;
970   } else {
971     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
972     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
973   }
974   ierr = PetscFree(lrows);CHKERRQ(ierr);
975   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977 
978   /* reduce nonzerostate */
979   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
980   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
981   if (gch) A->nonzerostate++;
982   PetscFunctionReturn(0);
983 }
984 
985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
986 {
987   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
988   PetscErrorCode    ierr;
989   PetscMPIInt       n = A->rmap->n;
990   PetscInt          i,j,r,m,len = 0;
991   PetscInt          *lrows,*owners = A->rmap->range;
992   PetscMPIInt       p = 0;
993   PetscSFNode       *rrows;
994   PetscSF           sf;
995   const PetscScalar *xx;
996   PetscScalar       *bb,*mask;
997   Vec               xmask,lmask;
998   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
999   const PetscInt    *aj, *ii,*ridx;
1000   PetscScalar       *aa;
1001 
1002   PetscFunctionBegin;
1003   /* Create SF where leaves are input rows and roots are owned rows */
1004   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1005   for (r = 0; r < n; ++r) lrows[r] = -1;
1006   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1007   for (r = 0; r < N; ++r) {
1008     const PetscInt idx   = rows[r];
1009     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1010     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1011       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1012     }
1013     rrows[r].rank  = p;
1014     rrows[r].index = rows[r] - owners[p];
1015   }
1016   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1017   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1018   /* Collect flags for rows to be zeroed */
1019   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1020   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1021   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1022   /* Compress and put in row numbers */
1023   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1024   /* zero diagonal part of matrix */
1025   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1026   /* handle off diagonal part of matrix */
1027   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1028   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1029   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1030   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1031   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1032   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1035   if (x && b) { /* this code is buggy when the row and column layout don't match */
1036     PetscBool cong;
1037 
1038     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1039     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1040     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1041     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1042     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1043     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1044   }
1045   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1046   /* remove zeroed rows of off diagonal matrix */
1047   ii = aij->i;
1048   for (i=0; i<len; i++) {
1049     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1050   }
1051   /* loop over all elements of off process part of matrix zeroing removed columns*/
1052   if (aij->compressedrow.use) {
1053     m    = aij->compressedrow.nrows;
1054     ii   = aij->compressedrow.i;
1055     ridx = aij->compressedrow.rindex;
1056     for (i=0; i<m; i++) {
1057       n  = ii[i+1] - ii[i];
1058       aj = aij->j + ii[i];
1059       aa = aij->a + ii[i];
1060 
1061       for (j=0; j<n; j++) {
1062         if (PetscAbsScalar(mask[*aj])) {
1063           if (b) bb[*ridx] -= *aa*xx[*aj];
1064           *aa = 0.0;
1065         }
1066         aa++;
1067         aj++;
1068       }
1069       ridx++;
1070     }
1071   } else { /* do not use compressed row format */
1072     m = l->B->rmap->n;
1073     for (i=0; i<m; i++) {
1074       n  = ii[i+1] - ii[i];
1075       aj = aij->j + ii[i];
1076       aa = aij->a + ii[i];
1077       for (j=0; j<n; j++) {
1078         if (PetscAbsScalar(mask[*aj])) {
1079           if (b) bb[i] -= *aa*xx[*aj];
1080           *aa = 0.0;
1081         }
1082         aa++;
1083         aj++;
1084       }
1085     }
1086   }
1087   if (x && b) {
1088     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1089     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1090   }
1091   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1092   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1093   ierr = PetscFree(lrows);CHKERRQ(ierr);
1094 
1095   /* only change matrix nonzero state if pattern was allowed to be changed */
1096   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1097     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1098     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1104 {
1105   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1106   PetscErrorCode ierr;
1107   PetscInt       nt;
1108   VecScatter     Mvctx = a->Mvctx;
1109 
1110   PetscFunctionBegin;
1111   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1112   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1113   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1115   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1116   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1131 {
1132   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1133   PetscErrorCode ierr;
1134   VecScatter     Mvctx = a->Mvctx;
1135 
1136   PetscFunctionBegin;
1137   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1138   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1146 {
1147   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151   /* do nondiagonal part */
1152   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1153   /* do local part */
1154   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1155   /* add partial results together */
1156   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1157   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1162 {
1163   MPI_Comm       comm;
1164   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1165   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1166   IS             Me,Notme;
1167   PetscErrorCode ierr;
1168   PetscInt       M,N,first,last,*notme,i;
1169   PetscBool      lf;
1170   PetscMPIInt    size;
1171 
1172   PetscFunctionBegin;
1173   /* Easy test: symmetric diagonal block */
1174   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1175   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1176   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1177   if (!*f) PetscFunctionReturn(0);
1178   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1179   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1180   if (size == 1) PetscFunctionReturn(0);
1181 
1182   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1183   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1184   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1185   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1186   for (i=0; i<first; i++) notme[i] = i;
1187   for (i=last; i<M; i++) notme[i-last+first] = i;
1188   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1189   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1190   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1191   Aoff = Aoffs[0];
1192   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1193   Boff = Boffs[0];
1194   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1195   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1196   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1197   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1198   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1199   ierr = PetscFree(notme);CHKERRQ(ierr);
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1204 {
1205   PetscErrorCode ierr;
1206 
1207   PetscFunctionBegin;
1208   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1213 {
1214   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1215   PetscErrorCode ierr;
1216 
1217   PetscFunctionBegin;
1218   /* do nondiagonal part */
1219   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1220   /* do local part */
1221   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1222   /* add partial results together */
1223   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1224   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 /*
1229   This only works correctly for square matrices where the subblock A->A is the
1230    diagonal block
1231 */
1232 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1233 {
1234   PetscErrorCode ierr;
1235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1236 
1237   PetscFunctionBegin;
1238   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1239   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1240   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1245 {
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1251   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1256 {
1257   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261 #if defined(PETSC_USE_LOG)
1262   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1263 #endif
1264   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1265   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1266   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1267   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1268 #if defined(PETSC_USE_CTABLE)
1269   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1270 #else
1271   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1272 #endif
1273   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1274   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1275   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1276   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1277   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1278   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1279   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1280 
1281   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1282   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1283 
1284   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1294 #if defined(PETSC_HAVE_ELEMENTAL)
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1296 #endif
1297 #if defined(PETSC_HAVE_SCALAPACK)
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1299 #endif
1300 #if defined(PETSC_HAVE_HYPRE)
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1303 #endif
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1311 {
1312   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1313   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1314   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1315   const PetscInt    *garray = aij->garray;
1316   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1317   PetscInt          *rowlens;
1318   PetscInt          *colidxs;
1319   PetscScalar       *matvals;
1320   PetscErrorCode    ierr;
1321 
1322   PetscFunctionBegin;
1323   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1324 
1325   M  = mat->rmap->N;
1326   N  = mat->cmap->N;
1327   m  = mat->rmap->n;
1328   rs = mat->rmap->rstart;
1329   cs = mat->cmap->rstart;
1330   nz = A->nz + B->nz;
1331 
1332   /* write matrix header */
1333   header[0] = MAT_FILE_CLASSID;
1334   header[1] = M; header[2] = N; header[3] = nz;
1335   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1336   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1337 
1338   /* fill in and store row lengths  */
1339   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1340   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1341   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1342   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1343 
1344   /* fill in and store column indices */
1345   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1346   for (cnt=0, i=0; i<m; i++) {
1347     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1348       if (garray[B->j[jb]] > cs) break;
1349       colidxs[cnt++] = garray[B->j[jb]];
1350     }
1351     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1352       colidxs[cnt++] = A->j[ja] + cs;
1353     for (; jb<B->i[i+1]; jb++)
1354       colidxs[cnt++] = garray[B->j[jb]];
1355   }
1356   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1357   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1358   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1359 
1360   /* fill in and store nonzero values */
1361   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1362   for (cnt=0, i=0; i<m; i++) {
1363     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1364       if (garray[B->j[jb]] > cs) break;
1365       matvals[cnt++] = B->a[jb];
1366     }
1367     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1368       matvals[cnt++] = A->a[ja];
1369     for (; jb<B->i[i+1]; jb++)
1370       matvals[cnt++] = B->a[jb];
1371   }
1372   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1373   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1374   ierr = PetscFree(matvals);CHKERRQ(ierr);
1375 
1376   /* write block size option to the viewer's .info file */
1377   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 #include <petscdraw.h>
1382 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1383 {
1384   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1385   PetscErrorCode    ierr;
1386   PetscMPIInt       rank = aij->rank,size = aij->size;
1387   PetscBool         isdraw,iascii,isbinary;
1388   PetscViewer       sviewer;
1389   PetscViewerFormat format;
1390 
1391   PetscFunctionBegin;
1392   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1393   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1394   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1395   if (iascii) {
1396     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1397     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1398       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1399       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1400       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1401       for (i=0; i<(PetscInt)size; i++) {
1402         nmax = PetscMax(nmax,nz[i]);
1403         nmin = PetscMin(nmin,nz[i]);
1404         navg += nz[i];
1405       }
1406       ierr = PetscFree(nz);CHKERRQ(ierr);
1407       navg = navg/size;
1408       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1409       PetscFunctionReturn(0);
1410     }
1411     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1412     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1413       MatInfo   info;
1414       PetscBool inodes;
1415 
1416       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1417       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1418       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1419       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1420       if (!inodes) {
1421         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1422                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1423       } else {
1424         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1425                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1426       }
1427       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1428       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1429       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1430       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1431       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1432       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1433       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1434       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1435       PetscFunctionReturn(0);
1436     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1437       PetscInt inodecount,inodelimit,*inodes;
1438       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1439       if (inodes) {
1440         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1441       } else {
1442         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1443       }
1444       PetscFunctionReturn(0);
1445     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1446       PetscFunctionReturn(0);
1447     }
1448   } else if (isbinary) {
1449     if (size == 1) {
1450       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1451       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1452     } else {
1453       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1454     }
1455     PetscFunctionReturn(0);
1456   } else if (iascii && size == 1) {
1457     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1458     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1459     PetscFunctionReturn(0);
1460   } else if (isdraw) {
1461     PetscDraw draw;
1462     PetscBool isnull;
1463     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1464     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1465     if (isnull) PetscFunctionReturn(0);
1466   }
1467 
1468   { /* assemble the entire matrix onto first processor */
1469     Mat A = NULL, Av;
1470     IS  isrow,iscol;
1471 
1472     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1473     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1474     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1475     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1476 /*  The commented code uses MatCreateSubMatrices instead */
1477 /*
1478     Mat *AA, A = NULL, Av;
1479     IS  isrow,iscol;
1480 
1481     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1482     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1483     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1484     if (!rank) {
1485        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1486        A    = AA[0];
1487        Av   = AA[0];
1488     }
1489     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1490 */
1491     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1492     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1493     /*
1494        Everyone has to call to draw the matrix since the graphics waits are
1495        synchronized across all processors that share the PetscDraw object
1496     */
1497     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1498     if (!rank) {
1499       if (((PetscObject)mat)->name) {
1500         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1501       }
1502       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1503     }
1504     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1505     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1506     ierr = MatDestroy(&A);CHKERRQ(ierr);
1507   }
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1512 {
1513   PetscErrorCode ierr;
1514   PetscBool      iascii,isdraw,issocket,isbinary;
1515 
1516   PetscFunctionBegin;
1517   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1518   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1519   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1520   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1521   if (iascii || isdraw || isbinary || issocket) {
1522     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1523   }
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1528 {
1529   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1530   PetscErrorCode ierr;
1531   Vec            bb1 = NULL;
1532   PetscBool      hasop;
1533 
1534   PetscFunctionBegin;
1535   if (flag == SOR_APPLY_UPPER) {
1536     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1537     PetscFunctionReturn(0);
1538   }
1539 
1540   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1541     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1542   }
1543 
1544   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1545     if (flag & SOR_ZERO_INITIAL_GUESS) {
1546       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1547       its--;
1548     }
1549 
1550     while (its--) {
1551       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1552       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1553 
1554       /* update rhs: bb1 = bb - B*x */
1555       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1556       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1557 
1558       /* local sweep */
1559       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1560     }
1561   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1562     if (flag & SOR_ZERO_INITIAL_GUESS) {
1563       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1564       its--;
1565     }
1566     while (its--) {
1567       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1568       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1569 
1570       /* update rhs: bb1 = bb - B*x */
1571       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1572       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1573 
1574       /* local sweep */
1575       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1576     }
1577   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1578     if (flag & SOR_ZERO_INITIAL_GUESS) {
1579       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1580       its--;
1581     }
1582     while (its--) {
1583       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1584       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1585 
1586       /* update rhs: bb1 = bb - B*x */
1587       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1588       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1589 
1590       /* local sweep */
1591       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1592     }
1593   } else if (flag & SOR_EISENSTAT) {
1594     Vec xx1;
1595 
1596     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1597     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1598 
1599     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1600     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1601     if (!mat->diag) {
1602       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1603       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1604     }
1605     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1606     if (hasop) {
1607       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1608     } else {
1609       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1610     }
1611     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1612 
1613     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1614 
1615     /* local sweep */
1616     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1617     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1618     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1619   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1620 
1621   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1622 
1623   matin->factorerrortype = mat->A->factorerrortype;
1624   PetscFunctionReturn(0);
1625 }
1626 
1627 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1628 {
1629   Mat            aA,aB,Aperm;
1630   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1631   PetscScalar    *aa,*ba;
1632   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1633   PetscSF        rowsf,sf;
1634   IS             parcolp = NULL;
1635   PetscBool      done;
1636   PetscErrorCode ierr;
1637 
1638   PetscFunctionBegin;
1639   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1640   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1641   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1642   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1643 
1644   /* Invert row permutation to find out where my rows should go */
1645   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1646   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1647   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1648   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1649   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1650   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1651 
1652   /* Invert column permutation to find out where my columns should go */
1653   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1654   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1655   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1656   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1657   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1658   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1659   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1660 
1661   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1662   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1663   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1664 
1665   /* Find out where my gcols should go */
1666   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1667   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1668   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1669   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1670   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1671   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1672   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1673   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1674 
1675   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1676   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1677   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1678   for (i=0; i<m; i++) {
1679     PetscInt    row = rdest[i];
1680     PetscMPIInt rowner;
1681     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1682     for (j=ai[i]; j<ai[i+1]; j++) {
1683       PetscInt    col = cdest[aj[j]];
1684       PetscMPIInt cowner;
1685       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1686       if (rowner == cowner) dnnz[i]++;
1687       else onnz[i]++;
1688     }
1689     for (j=bi[i]; j<bi[i+1]; j++) {
1690       PetscInt    col = gcdest[bj[j]];
1691       PetscMPIInt cowner;
1692       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1693       if (rowner == cowner) dnnz[i]++;
1694       else onnz[i]++;
1695     }
1696   }
1697   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1698   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1699   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1700   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1701   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1702 
1703   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1704   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1705   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1706   for (i=0; i<m; i++) {
1707     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1708     PetscInt j0,rowlen;
1709     rowlen = ai[i+1] - ai[i];
1710     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1711       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1712       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1713     }
1714     rowlen = bi[i+1] - bi[i];
1715     for (j0=j=0; j<rowlen; j0=j) {
1716       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1717       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1718     }
1719   }
1720   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1721   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1722   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1723   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1724   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1725   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1726   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1727   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1728   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1729   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1730   *B = Aperm;
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1735 {
1736   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1737   PetscErrorCode ierr;
1738 
1739   PetscFunctionBegin;
1740   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1741   if (ghosts) *ghosts = aij->garray;
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1746 {
1747   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1748   Mat            A    = mat->A,B = mat->B;
1749   PetscErrorCode ierr;
1750   PetscLogDouble isend[5],irecv[5];
1751 
1752   PetscFunctionBegin;
1753   info->block_size = 1.0;
1754   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1755 
1756   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1757   isend[3] = info->memory;  isend[4] = info->mallocs;
1758 
1759   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1760 
1761   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1762   isend[3] += info->memory;  isend[4] += info->mallocs;
1763   if (flag == MAT_LOCAL) {
1764     info->nz_used      = isend[0];
1765     info->nz_allocated = isend[1];
1766     info->nz_unneeded  = isend[2];
1767     info->memory       = isend[3];
1768     info->mallocs      = isend[4];
1769   } else if (flag == MAT_GLOBAL_MAX) {
1770     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1771 
1772     info->nz_used      = irecv[0];
1773     info->nz_allocated = irecv[1];
1774     info->nz_unneeded  = irecv[2];
1775     info->memory       = irecv[3];
1776     info->mallocs      = irecv[4];
1777   } else if (flag == MAT_GLOBAL_SUM) {
1778     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1779 
1780     info->nz_used      = irecv[0];
1781     info->nz_allocated = irecv[1];
1782     info->nz_unneeded  = irecv[2];
1783     info->memory       = irecv[3];
1784     info->mallocs      = irecv[4];
1785   }
1786   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1787   info->fill_ratio_needed = 0;
1788   info->factor_mallocs    = 0;
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1793 {
1794   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1795   PetscErrorCode ierr;
1796 
1797   PetscFunctionBegin;
1798   switch (op) {
1799   case MAT_NEW_NONZERO_LOCATIONS:
1800   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1801   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1802   case MAT_KEEP_NONZERO_PATTERN:
1803   case MAT_NEW_NONZERO_LOCATION_ERR:
1804   case MAT_USE_INODES:
1805   case MAT_IGNORE_ZERO_ENTRIES:
1806     MatCheckPreallocated(A,1);
1807     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1808     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1809     break;
1810   case MAT_ROW_ORIENTED:
1811     MatCheckPreallocated(A,1);
1812     a->roworiented = flg;
1813 
1814     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1815     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1816     break;
1817   case MAT_NEW_DIAGONALS:
1818   case MAT_SORTED_FULL:
1819     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1820     break;
1821   case MAT_IGNORE_OFF_PROC_ENTRIES:
1822     a->donotstash = flg;
1823     break;
1824   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1825   case MAT_SPD:
1826   case MAT_SYMMETRIC:
1827   case MAT_STRUCTURALLY_SYMMETRIC:
1828   case MAT_HERMITIAN:
1829   case MAT_SYMMETRY_ETERNAL:
1830     break;
1831   case MAT_SUBMAT_SINGLEIS:
1832     A->submat_singleis = flg;
1833     break;
1834   case MAT_STRUCTURE_ONLY:
1835     /* The option is handled directly by MatSetOption() */
1836     break;
1837   default:
1838     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1839   }
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1844 {
1845   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1846   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1847   PetscErrorCode ierr;
1848   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1849   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1850   PetscInt       *cmap,*idx_p;
1851 
1852   PetscFunctionBegin;
1853   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1854   mat->getrowactive = PETSC_TRUE;
1855 
1856   if (!mat->rowvalues && (idx || v)) {
1857     /*
1858         allocate enough space to hold information from the longest row.
1859     */
1860     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1861     PetscInt   max = 1,tmp;
1862     for (i=0; i<matin->rmap->n; i++) {
1863       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1864       if (max < tmp) max = tmp;
1865     }
1866     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1867   }
1868 
1869   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1870   lrow = row - rstart;
1871 
1872   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1873   if (!v)   {pvA = NULL; pvB = NULL;}
1874   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1875   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1876   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1877   nztot = nzA + nzB;
1878 
1879   cmap = mat->garray;
1880   if (v  || idx) {
1881     if (nztot) {
1882       /* Sort by increasing column numbers, assuming A and B already sorted */
1883       PetscInt imark = -1;
1884       if (v) {
1885         *v = v_p = mat->rowvalues;
1886         for (i=0; i<nzB; i++) {
1887           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1888           else break;
1889         }
1890         imark = i;
1891         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1892         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1893       }
1894       if (idx) {
1895         *idx = idx_p = mat->rowindices;
1896         if (imark > -1) {
1897           for (i=0; i<imark; i++) {
1898             idx_p[i] = cmap[cworkB[i]];
1899           }
1900         } else {
1901           for (i=0; i<nzB; i++) {
1902             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1903             else break;
1904           }
1905           imark = i;
1906         }
1907         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1908         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1909       }
1910     } else {
1911       if (idx) *idx = NULL;
1912       if (v)   *v   = NULL;
1913     }
1914   }
1915   *nz  = nztot;
1916   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1917   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1922 {
1923   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1924 
1925   PetscFunctionBegin;
1926   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1927   aij->getrowactive = PETSC_FALSE;
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1932 {
1933   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1934   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1935   PetscErrorCode ierr;
1936   PetscInt       i,j,cstart = mat->cmap->rstart;
1937   PetscReal      sum = 0.0;
1938   MatScalar      *v;
1939 
1940   PetscFunctionBegin;
1941   if (aij->size == 1) {
1942     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1943   } else {
1944     if (type == NORM_FROBENIUS) {
1945       v = amat->a;
1946       for (i=0; i<amat->nz; i++) {
1947         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1948       }
1949       v = bmat->a;
1950       for (i=0; i<bmat->nz; i++) {
1951         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1952       }
1953       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1954       *norm = PetscSqrtReal(*norm);
1955       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1956     } else if (type == NORM_1) { /* max column norm */
1957       PetscReal *tmp,*tmp2;
1958       PetscInt  *jj,*garray = aij->garray;
1959       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1960       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1961       *norm = 0.0;
1962       v     = amat->a; jj = amat->j;
1963       for (j=0; j<amat->nz; j++) {
1964         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1965       }
1966       v = bmat->a; jj = bmat->j;
1967       for (j=0; j<bmat->nz; j++) {
1968         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1969       }
1970       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1971       for (j=0; j<mat->cmap->N; j++) {
1972         if (tmp2[j] > *norm) *norm = tmp2[j];
1973       }
1974       ierr = PetscFree(tmp);CHKERRQ(ierr);
1975       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1976       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1977     } else if (type == NORM_INFINITY) { /* max row norm */
1978       PetscReal ntemp = 0.0;
1979       for (j=0; j<aij->A->rmap->n; j++) {
1980         v   = amat->a + amat->i[j];
1981         sum = 0.0;
1982         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1983           sum += PetscAbsScalar(*v); v++;
1984         }
1985         v = bmat->a + bmat->i[j];
1986         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1987           sum += PetscAbsScalar(*v); v++;
1988         }
1989         if (sum > ntemp) ntemp = sum;
1990       }
1991       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1992       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1993     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1994   }
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1999 {
2000   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2001   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2002   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2003   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2004   PetscErrorCode  ierr;
2005   Mat             B,A_diag,*B_diag;
2006   const MatScalar *array;
2007 
2008   PetscFunctionBegin;
2009   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2010   ai = Aloc->i; aj = Aloc->j;
2011   bi = Bloc->i; bj = Bloc->j;
2012   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2013     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2014     PetscSFNode          *oloc;
2015     PETSC_UNUSED PetscSF sf;
2016 
2017     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2018     /* compute d_nnz for preallocation */
2019     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2020     for (i=0; i<ai[ma]; i++) {
2021       d_nnz[aj[i]]++;
2022     }
2023     /* compute local off-diagonal contributions */
2024     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2025     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2026     /* map those to global */
2027     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2028     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2029     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2030     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2031     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2032     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2033     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2034 
2035     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2036     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2037     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2038     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2039     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2040     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2041   } else {
2042     B    = *matout;
2043     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2044   }
2045 
2046   b           = (Mat_MPIAIJ*)B->data;
2047   A_diag      = a->A;
2048   B_diag      = &b->A;
2049   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2050   A_diag_ncol = A_diag->cmap->N;
2051   B_diag_ilen = sub_B_diag->ilen;
2052   B_diag_i    = sub_B_diag->i;
2053 
2054   /* Set ilen for diagonal of B */
2055   for (i=0; i<A_diag_ncol; i++) {
2056     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2057   }
2058 
2059   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2060   very quickly (=without using MatSetValues), because all writes are local. */
2061   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2062 
2063   /* copy over the B part */
2064   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2065   array = Bloc->a;
2066   row   = A->rmap->rstart;
2067   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2068   cols_tmp = cols;
2069   for (i=0; i<mb; i++) {
2070     ncol = bi[i+1]-bi[i];
2071     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2072     row++;
2073     array += ncol; cols_tmp += ncol;
2074   }
2075   ierr = PetscFree(cols);CHKERRQ(ierr);
2076 
2077   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2078   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2080     *matout = B;
2081   } else {
2082     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2083   }
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2088 {
2089   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2090   Mat            a    = aij->A,b = aij->B;
2091   PetscErrorCode ierr;
2092   PetscInt       s1,s2,s3;
2093 
2094   PetscFunctionBegin;
2095   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2096   if (rr) {
2097     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2098     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2099     /* Overlap communication with computation. */
2100     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2101   }
2102   if (ll) {
2103     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2104     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2105     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
2106   }
2107   /* scale  the diagonal block */
2108   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2109 
2110   if (rr) {
2111     /* Do a scatter end and then right scale the off-diagonal block */
2112     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2113     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
2114   }
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2119 {
2120   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2121   PetscErrorCode ierr;
2122 
2123   PetscFunctionBegin;
2124   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2129 {
2130   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2131   Mat            a,b,c,d;
2132   PetscBool      flg;
2133   PetscErrorCode ierr;
2134 
2135   PetscFunctionBegin;
2136   a = matA->A; b = matA->B;
2137   c = matB->A; d = matB->B;
2138 
2139   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2140   if (flg) {
2141     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2142   }
2143   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2148 {
2149   PetscErrorCode ierr;
2150   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2151   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2152 
2153   PetscFunctionBegin;
2154   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2155   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2156     /* because of the column compression in the off-processor part of the matrix a->B,
2157        the number of columns in a->B and b->B may be different, hence we cannot call
2158        the MatCopy() directly on the two parts. If need be, we can provide a more
2159        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2160        then copying the submatrices */
2161     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2162   } else {
2163     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2164     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2165   }
2166   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2171 {
2172   PetscErrorCode ierr;
2173 
2174   PetscFunctionBegin;
2175   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 /*
2180    Computes the number of nonzeros per row needed for preallocation when X and Y
2181    have different nonzero structure.
2182 */
2183 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2184 {
2185   PetscInt       i,j,k,nzx,nzy;
2186 
2187   PetscFunctionBegin;
2188   /* Set the number of nonzeros in the new matrix */
2189   for (i=0; i<m; i++) {
2190     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2191     nzx = xi[i+1] - xi[i];
2192     nzy = yi[i+1] - yi[i];
2193     nnz[i] = 0;
2194     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2195       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2196       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2197       nnz[i]++;
2198     }
2199     for (; k<nzy; k++) nnz[i]++;
2200   }
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2205 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2206 {
2207   PetscErrorCode ierr;
2208   PetscInt       m = Y->rmap->N;
2209   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2210   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2211 
2212   PetscFunctionBegin;
2213   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2214   PetscFunctionReturn(0);
2215 }
2216 
2217 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2218 {
2219   PetscErrorCode ierr;
2220   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2221   PetscBLASInt   bnz,one=1;
2222   Mat_SeqAIJ     *x,*y;
2223 
2224   PetscFunctionBegin;
2225   if (str == SAME_NONZERO_PATTERN) {
2226     PetscScalar alpha = a;
2227     x    = (Mat_SeqAIJ*)xx->A->data;
2228     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2229     y    = (Mat_SeqAIJ*)yy->A->data;
2230     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2231     x    = (Mat_SeqAIJ*)xx->B->data;
2232     y    = (Mat_SeqAIJ*)yy->B->data;
2233     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2234     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2235     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2236     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2237        will be updated */
2238 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2239     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2240       Y->offloadmask = PETSC_OFFLOAD_CPU;
2241     }
2242 #endif
2243   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2244     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2245   } else {
2246     Mat      B;
2247     PetscInt *nnz_d,*nnz_o;
2248     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2249     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2250     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2251     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2252     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2253     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2254     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2255     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2256     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2257     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2258     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2259     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2260     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2261   }
2262   PetscFunctionReturn(0);
2263 }
2264 
2265 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2266 
2267 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2268 {
2269 #if defined(PETSC_USE_COMPLEX)
2270   PetscErrorCode ierr;
2271   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2272 
2273   PetscFunctionBegin;
2274   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2275   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2276 #else
2277   PetscFunctionBegin;
2278 #endif
2279   PetscFunctionReturn(0);
2280 }
2281 
2282 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2283 {
2284   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2285   PetscErrorCode ierr;
2286 
2287   PetscFunctionBegin;
2288   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2289   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2294 {
2295   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2296   PetscErrorCode ierr;
2297 
2298   PetscFunctionBegin;
2299   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2300   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2305 {
2306   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2307   PetscErrorCode ierr;
2308   PetscInt       i,*idxb = NULL;
2309   PetscScalar    *va,*vb;
2310   Vec            vtmp;
2311 
2312   PetscFunctionBegin;
2313   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2314   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2315   if (idx) {
2316     for (i=0; i<A->rmap->n; i++) {
2317       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2318     }
2319   }
2320 
2321   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2322   if (idx) {
2323     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2324   }
2325   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2326   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2327 
2328   for (i=0; i<A->rmap->n; i++) {
2329     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2330       va[i] = vb[i];
2331       if (idx) idx[i] = a->garray[idxb[i]];
2332     }
2333   }
2334 
2335   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2336   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2337   ierr = PetscFree(idxb);CHKERRQ(ierr);
2338   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2339   PetscFunctionReturn(0);
2340 }
2341 
2342 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2343 {
2344   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2345   PetscErrorCode ierr;
2346   PetscInt       i,*idxb = NULL;
2347   PetscScalar    *va,*vb;
2348   Vec            vtmp;
2349 
2350   PetscFunctionBegin;
2351   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2352   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2353   if (idx) {
2354     for (i=0; i<A->cmap->n; i++) {
2355       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2356     }
2357   }
2358 
2359   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2360   if (idx) {
2361     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2362   }
2363   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2364   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2365 
2366   for (i=0; i<A->rmap->n; i++) {
2367     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2368       va[i] = vb[i];
2369       if (idx) idx[i] = a->garray[idxb[i]];
2370     }
2371   }
2372 
2373   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2374   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2375   ierr = PetscFree(idxb);CHKERRQ(ierr);
2376   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2381 {
2382   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2383   PetscInt       n      = A->rmap->n;
2384   PetscInt       cstart = A->cmap->rstart;
2385   PetscInt       *cmap  = mat->garray;
2386   PetscInt       *diagIdx, *offdiagIdx;
2387   Vec            diagV, offdiagV;
2388   PetscScalar    *a, *diagA, *offdiagA;
2389   PetscInt       r;
2390   PetscErrorCode ierr;
2391 
2392   PetscFunctionBegin;
2393   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2394   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2395   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2396   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2397   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2398   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2399   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2400   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2401   for (r = 0; r < n; ++r) {
2402     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2403       a[r]   = diagA[r];
2404       idx[r] = cstart + diagIdx[r];
2405     } else {
2406       a[r]   = offdiagA[r];
2407       idx[r] = cmap[offdiagIdx[r]];
2408     }
2409   }
2410   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2411   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2412   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2413   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2414   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2415   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2416   PetscFunctionReturn(0);
2417 }
2418 
2419 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2420 {
2421   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*)A->data;
2422   PetscInt       m = A->rmap->n,n = A->cmap->n;
2423   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2424   PetscInt       *cmap  = mat->garray;
2425   PetscInt       *diagIdx, *offdiagIdx;
2426   Vec            diagV, offdiagV;
2427   PetscScalar    *a, *diagA, *offdiagA, *ba;
2428   PetscInt       r,j,col,ncols,*bi,*bj;
2429   PetscErrorCode ierr;
2430   Mat            B = mat->B;
2431   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2432 
2433   PetscFunctionBegin;
2434   /* When a process holds entire A and other processes have no entry */
2435   if (A->cmap->N == n) {
2436     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2437     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2438     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2439     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2440     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2441     PetscFunctionReturn(0);
2442   } else if (n == 0) {
2443     if (m) {
2444       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2445       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2446       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2447     }
2448     PetscFunctionReturn(0);
2449   }
2450 
2451   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2452   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2453   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2454   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2455 
2456   /* Get offdiagIdx[] for implicit 0.0 */
2457   ba = b->a;
2458   bi = b->i;
2459   bj = b->j;
2460   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2461   for (r = 0; r < m; r++) {
2462     ncols = bi[r+1] - bi[r];
2463     if (ncols == A->cmap->N - n) { /* Brow is dense */
2464       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2465     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2466       offdiagA[r] = 0.0;
2467 
2468       /* Find first hole in the cmap */
2469       for (j=0; j<ncols; j++) {
2470         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2471         if (col > j && j < cstart) {
2472           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2473           break;
2474         } else if (col > j + n && j >= cstart) {
2475           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2476           break;
2477         }
2478       }
2479       if (j == ncols && B->cmap->N < A->cmap->N - n) {
2480         /* a hole is outside compressed Bcols */
2481         if (ncols == 0) {
2482           if (cstart) {
2483             offdiagIdx[r] = 0;
2484           } else offdiagIdx[r] = cend;
2485         } else { /* ncols > 0 */
2486           offdiagIdx[r] = cmap[ncols-1] + 1;
2487           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2488         }
2489       }
2490     }
2491 
2492     for (j=0; j<ncols; j++) {
2493       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2494       ba++; bj++;
2495     }
2496   }
2497 
2498   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2499   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2500   for (r = 0; r < m; ++r) {
2501     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2502       a[r] = diagA[r];
2503       if (idx) idx[r] = cstart + diagIdx[r];
2504     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2505       a[r] = diagA[r];
2506       if (idx) {
2507         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2508           idx[r] = cstart + diagIdx[r];
2509         } else idx[r] = offdiagIdx[r];
2510       }
2511     } else {
2512       a[r] = offdiagA[r];
2513       if (idx) idx[r] = offdiagIdx[r];
2514     }
2515   }
2516   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2517   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2518   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2519   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2520   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2521   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2522   PetscFunctionReturn(0);
2523 }
2524 
2525 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2526 {
2527   PetscErrorCode ierr;
2528   Mat            *dummy;
2529 
2530   PetscFunctionBegin;
2531   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2532   *newmat = *dummy;
2533   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2534   PetscFunctionReturn(0);
2535 }
2536 
2537 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2538 {
2539   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2540   PetscErrorCode ierr;
2541 
2542   PetscFunctionBegin;
2543   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2544   A->factorerrortype = a->A->factorerrortype;
2545   PetscFunctionReturn(0);
2546 }
2547 
2548 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2549 {
2550   PetscErrorCode ierr;
2551   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2552 
2553   PetscFunctionBegin;
2554   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2555   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2556   if (x->assembled) {
2557     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2558   } else {
2559     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2560   }
2561   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2562   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2563   PetscFunctionReturn(0);
2564 }
2565 
2566 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2567 {
2568   PetscFunctionBegin;
2569   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2570   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2571   PetscFunctionReturn(0);
2572 }
2573 
2574 /*@
2575    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2576 
2577    Collective on Mat
2578 
2579    Input Parameters:
2580 +    A - the matrix
2581 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2582 
2583  Level: advanced
2584 
2585 @*/
2586 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2587 {
2588   PetscErrorCode       ierr;
2589 
2590   PetscFunctionBegin;
2591   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2592   PetscFunctionReturn(0);
2593 }
2594 
2595 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2596 {
2597   PetscErrorCode       ierr;
2598   PetscBool            sc = PETSC_FALSE,flg;
2599 
2600   PetscFunctionBegin;
2601   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2602   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2603   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2604   if (flg) {
2605     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2606   }
2607   ierr = PetscOptionsTail();CHKERRQ(ierr);
2608   PetscFunctionReturn(0);
2609 }
2610 
2611 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2612 {
2613   PetscErrorCode ierr;
2614   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2615   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2616 
2617   PetscFunctionBegin;
2618   if (!Y->preallocated) {
2619     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2620   } else if (!aij->nz) {
2621     PetscInt nonew = aij->nonew;
2622     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2623     aij->nonew = nonew;
2624   }
2625   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2626   PetscFunctionReturn(0);
2627 }
2628 
2629 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2630 {
2631   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2632   PetscErrorCode ierr;
2633 
2634   PetscFunctionBegin;
2635   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2636   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2637   if (d) {
2638     PetscInt rstart;
2639     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2640     *d += rstart;
2641 
2642   }
2643   PetscFunctionReturn(0);
2644 }
2645 
2646 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2647 {
2648   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2649   PetscErrorCode ierr;
2650 
2651   PetscFunctionBegin;
2652   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2653   PetscFunctionReturn(0);
2654 }
2655 
2656 /* -------------------------------------------------------------------*/
2657 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2658                                        MatGetRow_MPIAIJ,
2659                                        MatRestoreRow_MPIAIJ,
2660                                        MatMult_MPIAIJ,
2661                                 /* 4*/ MatMultAdd_MPIAIJ,
2662                                        MatMultTranspose_MPIAIJ,
2663                                        MatMultTransposeAdd_MPIAIJ,
2664                                        NULL,
2665                                        NULL,
2666                                        NULL,
2667                                 /*10*/ NULL,
2668                                        NULL,
2669                                        NULL,
2670                                        MatSOR_MPIAIJ,
2671                                        MatTranspose_MPIAIJ,
2672                                 /*15*/ MatGetInfo_MPIAIJ,
2673                                        MatEqual_MPIAIJ,
2674                                        MatGetDiagonal_MPIAIJ,
2675                                        MatDiagonalScale_MPIAIJ,
2676                                        MatNorm_MPIAIJ,
2677                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2678                                        MatAssemblyEnd_MPIAIJ,
2679                                        MatSetOption_MPIAIJ,
2680                                        MatZeroEntries_MPIAIJ,
2681                                 /*24*/ MatZeroRows_MPIAIJ,
2682                                        NULL,
2683                                        NULL,
2684                                        NULL,
2685                                        NULL,
2686                                 /*29*/ MatSetUp_MPIAIJ,
2687                                        NULL,
2688                                        NULL,
2689                                        MatGetDiagonalBlock_MPIAIJ,
2690                                        NULL,
2691                                 /*34*/ MatDuplicate_MPIAIJ,
2692                                        NULL,
2693                                        NULL,
2694                                        NULL,
2695                                        NULL,
2696                                 /*39*/ MatAXPY_MPIAIJ,
2697                                        MatCreateSubMatrices_MPIAIJ,
2698                                        MatIncreaseOverlap_MPIAIJ,
2699                                        MatGetValues_MPIAIJ,
2700                                        MatCopy_MPIAIJ,
2701                                 /*44*/ MatGetRowMax_MPIAIJ,
2702                                        MatScale_MPIAIJ,
2703                                        MatShift_MPIAIJ,
2704                                        MatDiagonalSet_MPIAIJ,
2705                                        MatZeroRowsColumns_MPIAIJ,
2706                                 /*49*/ MatSetRandom_MPIAIJ,
2707                                        NULL,
2708                                        NULL,
2709                                        NULL,
2710                                        NULL,
2711                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2712                                        NULL,
2713                                        MatSetUnfactored_MPIAIJ,
2714                                        MatPermute_MPIAIJ,
2715                                        NULL,
2716                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2717                                        MatDestroy_MPIAIJ,
2718                                        MatView_MPIAIJ,
2719                                        NULL,
2720                                        NULL,
2721                                 /*64*/ NULL,
2722                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2723                                        NULL,
2724                                        NULL,
2725                                        NULL,
2726                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2727                                        MatGetRowMinAbs_MPIAIJ,
2728                                        NULL,
2729                                        NULL,
2730                                        NULL,
2731                                        NULL,
2732                                 /*75*/ MatFDColoringApply_AIJ,
2733                                        MatSetFromOptions_MPIAIJ,
2734                                        NULL,
2735                                        NULL,
2736                                        MatFindZeroDiagonals_MPIAIJ,
2737                                 /*80*/ NULL,
2738                                        NULL,
2739                                        NULL,
2740                                 /*83*/ MatLoad_MPIAIJ,
2741                                        MatIsSymmetric_MPIAIJ,
2742                                        NULL,
2743                                        NULL,
2744                                        NULL,
2745                                        NULL,
2746                                 /*89*/ NULL,
2747                                        NULL,
2748                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2749                                        NULL,
2750                                        NULL,
2751                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2752                                        NULL,
2753                                        NULL,
2754                                        NULL,
2755                                        MatBindToCPU_MPIAIJ,
2756                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2757                                        NULL,
2758                                        NULL,
2759                                        MatConjugate_MPIAIJ,
2760                                        NULL,
2761                                 /*104*/MatSetValuesRow_MPIAIJ,
2762                                        MatRealPart_MPIAIJ,
2763                                        MatImaginaryPart_MPIAIJ,
2764                                        NULL,
2765                                        NULL,
2766                                 /*109*/NULL,
2767                                        NULL,
2768                                        MatGetRowMin_MPIAIJ,
2769                                        NULL,
2770                                        MatMissingDiagonal_MPIAIJ,
2771                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2772                                        NULL,
2773                                        MatGetGhosts_MPIAIJ,
2774                                        NULL,
2775                                        NULL,
2776                                 /*119*/NULL,
2777                                        NULL,
2778                                        NULL,
2779                                        NULL,
2780                                        MatGetMultiProcBlock_MPIAIJ,
2781                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2782                                        MatGetColumnNorms_MPIAIJ,
2783                                        MatInvertBlockDiagonal_MPIAIJ,
2784                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2785                                        MatCreateSubMatricesMPI_MPIAIJ,
2786                                 /*129*/NULL,
2787                                        NULL,
2788                                        NULL,
2789                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2790                                        NULL,
2791                                 /*134*/NULL,
2792                                        NULL,
2793                                        NULL,
2794                                        NULL,
2795                                        NULL,
2796                                 /*139*/MatSetBlockSizes_MPIAIJ,
2797                                        NULL,
2798                                        NULL,
2799                                        MatFDColoringSetUp_MPIXAIJ,
2800                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2801                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2802                                 /*145*/NULL,
2803                                        NULL,
2804                                        NULL
2805 };
2806 
2807 /* ----------------------------------------------------------------------------------------*/
2808 
2809 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2810 {
2811   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2812   PetscErrorCode ierr;
2813 
2814   PetscFunctionBegin;
2815   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2816   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2817   PetscFunctionReturn(0);
2818 }
2819 
2820 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2821 {
2822   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2823   PetscErrorCode ierr;
2824 
2825   PetscFunctionBegin;
2826   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2827   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2828   PetscFunctionReturn(0);
2829 }
2830 
2831 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2832 {
2833   Mat_MPIAIJ     *b;
2834   PetscErrorCode ierr;
2835   PetscMPIInt    size;
2836 
2837   PetscFunctionBegin;
2838   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2839   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2840   b = (Mat_MPIAIJ*)B->data;
2841 
2842 #if defined(PETSC_USE_CTABLE)
2843   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2844 #else
2845   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2846 #endif
2847   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2848   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2849   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2850 
2851   /* Because the B will have been resized we simply destroy it and create a new one each time */
2852   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2853   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2854   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2855   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2856   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2857   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2858   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2859 
2860   if (!B->preallocated) {
2861     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2862     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2863     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2864     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2865     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2866   }
2867 
2868   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2869   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2870   B->preallocated  = PETSC_TRUE;
2871   B->was_assembled = PETSC_FALSE;
2872   B->assembled     = PETSC_FALSE;
2873   PetscFunctionReturn(0);
2874 }
2875 
2876 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2877 {
2878   Mat_MPIAIJ     *b;
2879   PetscErrorCode ierr;
2880 
2881   PetscFunctionBegin;
2882   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2883   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2884   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2885   b = (Mat_MPIAIJ*)B->data;
2886 
2887 #if defined(PETSC_USE_CTABLE)
2888   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2889 #else
2890   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2891 #endif
2892   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2893   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2894   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2895 
2896   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2897   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2898   B->preallocated  = PETSC_TRUE;
2899   B->was_assembled = PETSC_FALSE;
2900   B->assembled = PETSC_FALSE;
2901   PetscFunctionReturn(0);
2902 }
2903 
2904 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2905 {
2906   Mat            mat;
2907   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2908   PetscErrorCode ierr;
2909 
2910   PetscFunctionBegin;
2911   *newmat = NULL;
2912   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2913   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2914   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2915   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2916   a       = (Mat_MPIAIJ*)mat->data;
2917 
2918   mat->factortype   = matin->factortype;
2919   mat->assembled    = matin->assembled;
2920   mat->insertmode   = NOT_SET_VALUES;
2921   mat->preallocated = matin->preallocated;
2922 
2923   a->size         = oldmat->size;
2924   a->rank         = oldmat->rank;
2925   a->donotstash   = oldmat->donotstash;
2926   a->roworiented  = oldmat->roworiented;
2927   a->rowindices   = NULL;
2928   a->rowvalues    = NULL;
2929   a->getrowactive = PETSC_FALSE;
2930 
2931   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2932   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2933 
2934   if (oldmat->colmap) {
2935 #if defined(PETSC_USE_CTABLE)
2936     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2937 #else
2938     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2939     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2940     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2941 #endif
2942   } else a->colmap = NULL;
2943   if (oldmat->garray) {
2944     PetscInt len;
2945     len  = oldmat->B->cmap->n;
2946     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2947     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2948     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2949   } else a->garray = NULL;
2950 
2951   /* It may happen MatDuplicate is called with a non-assembled matrix
2952      In fact, MatDuplicate only requires the matrix to be preallocated
2953      This may happen inside a DMCreateMatrix_Shell */
2954   if (oldmat->lvec) {
2955     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2956     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2957   }
2958   if (oldmat->Mvctx) {
2959     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2960     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2961   }
2962   if (oldmat->Mvctx_mpi1) {
2963     ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2964     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2965   }
2966 
2967   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2968   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2969   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2970   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2971   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2972   *newmat = mat;
2973   PetscFunctionReturn(0);
2974 }
2975 
2976 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2977 {
2978   PetscBool      isbinary, ishdf5;
2979   PetscErrorCode ierr;
2980 
2981   PetscFunctionBegin;
2982   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2983   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2984   /* force binary viewer to load .info file if it has not yet done so */
2985   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2986   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2987   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2988   if (isbinary) {
2989     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2990   } else if (ishdf5) {
2991 #if defined(PETSC_HAVE_HDF5)
2992     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2993 #else
2994     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2995 #endif
2996   } else {
2997     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2998   }
2999   PetscFunctionReturn(0);
3000 }
3001 
3002 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3003 {
3004   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3005   PetscInt       *rowidxs,*colidxs;
3006   PetscScalar    *matvals;
3007   PetscErrorCode ierr;
3008 
3009   PetscFunctionBegin;
3010   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3011 
3012   /* read in matrix header */
3013   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3014   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3015   M  = header[1]; N = header[2]; nz = header[3];
3016   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3017   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3018   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3019 
3020   /* set block sizes from the viewer's .info file */
3021   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3022   /* set global sizes if not set already */
3023   if (mat->rmap->N < 0) mat->rmap->N = M;
3024   if (mat->cmap->N < 0) mat->cmap->N = N;
3025   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3026   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3027 
3028   /* check if the matrix sizes are correct */
3029   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3030   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3031 
3032   /* read in row lengths and build row indices */
3033   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3034   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3035   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3036   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3037   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
3038   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3039   /* read in column indices and matrix values */
3040   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3041   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3042   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3043   /* store matrix indices and values */
3044   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3045   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3046   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3047   PetscFunctionReturn(0);
3048 }
3049 
3050 /* Not scalable because of ISAllGather() unless getting all columns. */
3051 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3052 {
3053   PetscErrorCode ierr;
3054   IS             iscol_local;
3055   PetscBool      isstride;
3056   PetscMPIInt    lisstride=0,gisstride;
3057 
3058   PetscFunctionBegin;
3059   /* check if we are grabbing all columns*/
3060   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3061 
3062   if (isstride) {
3063     PetscInt  start,len,mstart,mlen;
3064     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3065     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3066     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3067     if (mstart == start && mlen-mstart == len) lisstride = 1;
3068   }
3069 
3070   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3071   if (gisstride) {
3072     PetscInt N;
3073     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3074     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3075     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3076     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3077   } else {
3078     PetscInt cbs;
3079     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3080     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3081     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3082   }
3083 
3084   *isseq = iscol_local;
3085   PetscFunctionReturn(0);
3086 }
3087 
3088 /*
3089  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3090  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3091 
3092  Input Parameters:
3093    mat - matrix
3094    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3095            i.e., mat->rstart <= isrow[i] < mat->rend
3096    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3097            i.e., mat->cstart <= iscol[i] < mat->cend
3098  Output Parameter:
3099    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3100    iscol_o - sequential column index set for retrieving mat->B
3101    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3102  */
3103 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3104 {
3105   PetscErrorCode ierr;
3106   Vec            x,cmap;
3107   const PetscInt *is_idx;
3108   PetscScalar    *xarray,*cmaparray;
3109   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3110   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3111   Mat            B=a->B;
3112   Vec            lvec=a->lvec,lcmap;
3113   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3114   MPI_Comm       comm;
3115   VecScatter     Mvctx=a->Mvctx;
3116 
3117   PetscFunctionBegin;
3118   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3119   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3120 
3121   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3122   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3123   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3124   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3125   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3126 
3127   /* Get start indices */
3128   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3129   isstart -= ncols;
3130   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3131 
3132   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3133   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3134   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3135   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3136   for (i=0; i<ncols; i++) {
3137     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3138     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3139     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3140   }
3141   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3142   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3143   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3144 
3145   /* Get iscol_d */
3146   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3147   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3148   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3149 
3150   /* Get isrow_d */
3151   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3152   rstart = mat->rmap->rstart;
3153   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3154   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3155   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3156   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3157 
3158   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3159   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3160   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3161 
3162   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3163   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3164   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3165 
3166   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3167 
3168   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3169   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3170 
3171   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3172   /* off-process column indices */
3173   count = 0;
3174   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3175   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3176 
3177   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3178   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3179   for (i=0; i<Bn; i++) {
3180     if (PetscRealPart(xarray[i]) > -1.0) {
3181       idx[count]     = i;                   /* local column index in off-diagonal part B */
3182       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3183       count++;
3184     }
3185   }
3186   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3187   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3188 
3189   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3190   /* cannot ensure iscol_o has same blocksize as iscol! */
3191 
3192   ierr = PetscFree(idx);CHKERRQ(ierr);
3193   *garray = cmap1;
3194 
3195   ierr = VecDestroy(&x);CHKERRQ(ierr);
3196   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3197   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3198   PetscFunctionReturn(0);
3199 }
3200 
3201 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3202 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3203 {
3204   PetscErrorCode ierr;
3205   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3206   Mat            M = NULL;
3207   MPI_Comm       comm;
3208   IS             iscol_d,isrow_d,iscol_o;
3209   Mat            Asub = NULL,Bsub = NULL;
3210   PetscInt       n;
3211 
3212   PetscFunctionBegin;
3213   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3214 
3215   if (call == MAT_REUSE_MATRIX) {
3216     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3217     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3218     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3219 
3220     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3221     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3222 
3223     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3224     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3225 
3226     /* Update diagonal and off-diagonal portions of submat */
3227     asub = (Mat_MPIAIJ*)(*submat)->data;
3228     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3229     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3230     if (n) {
3231       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3232     }
3233     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3234     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3235 
3236   } else { /* call == MAT_INITIAL_MATRIX) */
3237     const PetscInt *garray;
3238     PetscInt        BsubN;
3239 
3240     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3241     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3242 
3243     /* Create local submatrices Asub and Bsub */
3244     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3245     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3246 
3247     /* Create submatrix M */
3248     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3249 
3250     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3251     asub = (Mat_MPIAIJ*)M->data;
3252 
3253     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3254     n = asub->B->cmap->N;
3255     if (BsubN > n) {
3256       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3257       const PetscInt *idx;
3258       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3259       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3260 
3261       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3262       j = 0;
3263       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3264       for (i=0; i<n; i++) {
3265         if (j >= BsubN) break;
3266         while (subgarray[i] > garray[j]) j++;
3267 
3268         if (subgarray[i] == garray[j]) {
3269           idx_new[i] = idx[j++];
3270         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3271       }
3272       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3273 
3274       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3275       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3276 
3277     } else if (BsubN < n) {
3278       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3279     }
3280 
3281     ierr = PetscFree(garray);CHKERRQ(ierr);
3282     *submat = M;
3283 
3284     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3285     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3286     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3287 
3288     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3289     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3290 
3291     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3292     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3293   }
3294   PetscFunctionReturn(0);
3295 }
3296 
3297 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3298 {
3299   PetscErrorCode ierr;
3300   IS             iscol_local=NULL,isrow_d;
3301   PetscInt       csize;
3302   PetscInt       n,i,j,start,end;
3303   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3304   MPI_Comm       comm;
3305 
3306   PetscFunctionBegin;
3307   /* If isrow has same processor distribution as mat,
3308      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3309   if (call == MAT_REUSE_MATRIX) {
3310     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3311     if (isrow_d) {
3312       sameRowDist  = PETSC_TRUE;
3313       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3314     } else {
3315       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3316       if (iscol_local) {
3317         sameRowDist  = PETSC_TRUE;
3318         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3319       }
3320     }
3321   } else {
3322     /* Check if isrow has same processor distribution as mat */
3323     sameDist[0] = PETSC_FALSE;
3324     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3325     if (!n) {
3326       sameDist[0] = PETSC_TRUE;
3327     } else {
3328       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3329       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3330       if (i >= start && j < end) {
3331         sameDist[0] = PETSC_TRUE;
3332       }
3333     }
3334 
3335     /* Check if iscol has same processor distribution as mat */
3336     sameDist[1] = PETSC_FALSE;
3337     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3338     if (!n) {
3339       sameDist[1] = PETSC_TRUE;
3340     } else {
3341       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3342       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3343       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3344     }
3345 
3346     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3347     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3348     sameRowDist = tsameDist[0];
3349   }
3350 
3351   if (sameRowDist) {
3352     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3353       /* isrow and iscol have same processor distribution as mat */
3354       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3355       PetscFunctionReturn(0);
3356     } else { /* sameRowDist */
3357       /* isrow has same processor distribution as mat */
3358       if (call == MAT_INITIAL_MATRIX) {
3359         PetscBool sorted;
3360         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3361         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3362         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3363         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3364 
3365         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3366         if (sorted) {
3367           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3368           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3369           PetscFunctionReturn(0);
3370         }
3371       } else { /* call == MAT_REUSE_MATRIX */
3372         IS    iscol_sub;
3373         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3374         if (iscol_sub) {
3375           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3376           PetscFunctionReturn(0);
3377         }
3378       }
3379     }
3380   }
3381 
3382   /* General case: iscol -> iscol_local which has global size of iscol */
3383   if (call == MAT_REUSE_MATRIX) {
3384     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3385     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3386   } else {
3387     if (!iscol_local) {
3388       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3389     }
3390   }
3391 
3392   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3393   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3394 
3395   if (call == MAT_INITIAL_MATRIX) {
3396     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3397     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3398   }
3399   PetscFunctionReturn(0);
3400 }
3401 
3402 /*@C
3403      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3404          and "off-diagonal" part of the matrix in CSR format.
3405 
3406    Collective
3407 
3408    Input Parameters:
3409 +  comm - MPI communicator
3410 .  A - "diagonal" portion of matrix
3411 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3412 -  garray - global index of B columns
3413 
3414    Output Parameter:
3415 .   mat - the matrix, with input A as its local diagonal matrix
3416    Level: advanced
3417 
3418    Notes:
3419        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3420        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3421 
3422 .seealso: MatCreateMPIAIJWithSplitArrays()
3423 @*/
3424 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3425 {
3426   PetscErrorCode ierr;
3427   Mat_MPIAIJ     *maij;
3428   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3429   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3430   PetscScalar    *oa=b->a;
3431   Mat            Bnew;
3432   PetscInt       m,n,N;
3433 
3434   PetscFunctionBegin;
3435   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3436   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3437   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3438   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3439   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3440   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3441 
3442   /* Get global columns of mat */
3443   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3444 
3445   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3446   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3447   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3448   maij = (Mat_MPIAIJ*)(*mat)->data;
3449 
3450   (*mat)->preallocated = PETSC_TRUE;
3451 
3452   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3453   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3454 
3455   /* Set A as diagonal portion of *mat */
3456   maij->A = A;
3457 
3458   nz = oi[m];
3459   for (i=0; i<nz; i++) {
3460     col   = oj[i];
3461     oj[i] = garray[col];
3462   }
3463 
3464    /* Set Bnew as off-diagonal portion of *mat */
3465   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3466   bnew        = (Mat_SeqAIJ*)Bnew->data;
3467   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3468   maij->B     = Bnew;
3469 
3470   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3471 
3472   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3473   b->free_a       = PETSC_FALSE;
3474   b->free_ij      = PETSC_FALSE;
3475   ierr = MatDestroy(&B);CHKERRQ(ierr);
3476 
3477   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3478   bnew->free_a       = PETSC_TRUE;
3479   bnew->free_ij      = PETSC_TRUE;
3480 
3481   /* condense columns of maij->B */
3482   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3483   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3484   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3485   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3486   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3487   PetscFunctionReturn(0);
3488 }
3489 
3490 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3491 
3492 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3493 {
3494   PetscErrorCode ierr;
3495   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3496   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3497   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3498   Mat            M,Msub,B=a->B;
3499   MatScalar      *aa;
3500   Mat_SeqAIJ     *aij;
3501   PetscInt       *garray = a->garray,*colsub,Ncols;
3502   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3503   IS             iscol_sub,iscmap;
3504   const PetscInt *is_idx,*cmap;
3505   PetscBool      allcolumns=PETSC_FALSE;
3506   MPI_Comm       comm;
3507 
3508   PetscFunctionBegin;
3509   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3510 
3511   if (call == MAT_REUSE_MATRIX) {
3512     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3513     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3514     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3515 
3516     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3517     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3518 
3519     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3520     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3521 
3522     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3523 
3524   } else { /* call == MAT_INITIAL_MATRIX) */
3525     PetscBool flg;
3526 
3527     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3528     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3529 
3530     /* (1) iscol -> nonscalable iscol_local */
3531     /* Check for special case: each processor gets entire matrix columns */
3532     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3533     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3534     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3535     if (allcolumns) {
3536       iscol_sub = iscol_local;
3537       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3538       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3539 
3540     } else {
3541       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3542       PetscInt *idx,*cmap1,k;
3543       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3544       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3545       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3546       count = 0;
3547       k     = 0;
3548       for (i=0; i<Ncols; i++) {
3549         j = is_idx[i];
3550         if (j >= cstart && j < cend) {
3551           /* diagonal part of mat */
3552           idx[count]     = j;
3553           cmap1[count++] = i; /* column index in submat */
3554         } else if (Bn) {
3555           /* off-diagonal part of mat */
3556           if (j == garray[k]) {
3557             idx[count]     = j;
3558             cmap1[count++] = i;  /* column index in submat */
3559           } else if (j > garray[k]) {
3560             while (j > garray[k] && k < Bn-1) k++;
3561             if (j == garray[k]) {
3562               idx[count]     = j;
3563               cmap1[count++] = i; /* column index in submat */
3564             }
3565           }
3566         }
3567       }
3568       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3569 
3570       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3571       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3572       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3573 
3574       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3575     }
3576 
3577     /* (3) Create sequential Msub */
3578     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3579   }
3580 
3581   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3582   aij  = (Mat_SeqAIJ*)(Msub)->data;
3583   ii   = aij->i;
3584   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3585 
3586   /*
3587       m - number of local rows
3588       Ncols - number of columns (same on all processors)
3589       rstart - first row in new global matrix generated
3590   */
3591   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3592 
3593   if (call == MAT_INITIAL_MATRIX) {
3594     /* (4) Create parallel newmat */
3595     PetscMPIInt    rank,size;
3596     PetscInt       csize;
3597 
3598     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3599     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3600 
3601     /*
3602         Determine the number of non-zeros in the diagonal and off-diagonal
3603         portions of the matrix in order to do correct preallocation
3604     */
3605 
3606     /* first get start and end of "diagonal" columns */
3607     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3608     if (csize == PETSC_DECIDE) {
3609       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3610       if (mglobal == Ncols) { /* square matrix */
3611         nlocal = m;
3612       } else {
3613         nlocal = Ncols/size + ((Ncols % size) > rank);
3614       }
3615     } else {
3616       nlocal = csize;
3617     }
3618     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3619     rstart = rend - nlocal;
3620     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3621 
3622     /* next, compute all the lengths */
3623     jj    = aij->j;
3624     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3625     olens = dlens + m;
3626     for (i=0; i<m; i++) {
3627       jend = ii[i+1] - ii[i];
3628       olen = 0;
3629       dlen = 0;
3630       for (j=0; j<jend; j++) {
3631         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3632         else dlen++;
3633         jj++;
3634       }
3635       olens[i] = olen;
3636       dlens[i] = dlen;
3637     }
3638 
3639     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3640     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3641 
3642     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3643     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3644     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3645     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3646     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3647     ierr = PetscFree(dlens);CHKERRQ(ierr);
3648 
3649   } else { /* call == MAT_REUSE_MATRIX */
3650     M    = *newmat;
3651     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3652     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3653     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3654     /*
3655          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3656        rather than the slower MatSetValues().
3657     */
3658     M->was_assembled = PETSC_TRUE;
3659     M->assembled     = PETSC_FALSE;
3660   }
3661 
3662   /* (5) Set values of Msub to *newmat */
3663   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3664   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3665 
3666   jj   = aij->j;
3667   aa   = aij->a;
3668   for (i=0; i<m; i++) {
3669     row = rstart + i;
3670     nz  = ii[i+1] - ii[i];
3671     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3672     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3673     jj += nz; aa += nz;
3674   }
3675   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3676 
3677   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3678   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3679 
3680   ierr = PetscFree(colsub);CHKERRQ(ierr);
3681 
3682   /* save Msub, iscol_sub and iscmap used in processor for next request */
3683   if (call ==  MAT_INITIAL_MATRIX) {
3684     *newmat = M;
3685     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3686     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3687 
3688     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3689     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3690 
3691     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3692     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3693 
3694     if (iscol_local) {
3695       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3696       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3697     }
3698   }
3699   PetscFunctionReturn(0);
3700 }
3701 
3702 /*
3703     Not great since it makes two copies of the submatrix, first an SeqAIJ
3704   in local and then by concatenating the local matrices the end result.
3705   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3706 
3707   Note: This requires a sequential iscol with all indices.
3708 */
3709 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3710 {
3711   PetscErrorCode ierr;
3712   PetscMPIInt    rank,size;
3713   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3714   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3715   Mat            M,Mreuse;
3716   MatScalar      *aa,*vwork;
3717   MPI_Comm       comm;
3718   Mat_SeqAIJ     *aij;
3719   PetscBool      colflag,allcolumns=PETSC_FALSE;
3720 
3721   PetscFunctionBegin;
3722   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3723   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3724   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3725 
3726   /* Check for special case: each processor gets entire matrix columns */
3727   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3728   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3729   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3730   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3731 
3732   if (call ==  MAT_REUSE_MATRIX) {
3733     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3734     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3735     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3736   } else {
3737     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3738   }
3739 
3740   /*
3741       m - number of local rows
3742       n - number of columns (same on all processors)
3743       rstart - first row in new global matrix generated
3744   */
3745   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3746   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3747   if (call == MAT_INITIAL_MATRIX) {
3748     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3749     ii  = aij->i;
3750     jj  = aij->j;
3751 
3752     /*
3753         Determine the number of non-zeros in the diagonal and off-diagonal
3754         portions of the matrix in order to do correct preallocation
3755     */
3756 
3757     /* first get start and end of "diagonal" columns */
3758     if (csize == PETSC_DECIDE) {
3759       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3760       if (mglobal == n) { /* square matrix */
3761         nlocal = m;
3762       } else {
3763         nlocal = n/size + ((n % size) > rank);
3764       }
3765     } else {
3766       nlocal = csize;
3767     }
3768     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3769     rstart = rend - nlocal;
3770     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3771 
3772     /* next, compute all the lengths */
3773     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3774     olens = dlens + m;
3775     for (i=0; i<m; i++) {
3776       jend = ii[i+1] - ii[i];
3777       olen = 0;
3778       dlen = 0;
3779       for (j=0; j<jend; j++) {
3780         if (*jj < rstart || *jj >= rend) olen++;
3781         else dlen++;
3782         jj++;
3783       }
3784       olens[i] = olen;
3785       dlens[i] = dlen;
3786     }
3787     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3788     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3789     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3790     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3791     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3792     ierr = PetscFree(dlens);CHKERRQ(ierr);
3793   } else {
3794     PetscInt ml,nl;
3795 
3796     M    = *newmat;
3797     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3798     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3799     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3800     /*
3801          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3802        rather than the slower MatSetValues().
3803     */
3804     M->was_assembled = PETSC_TRUE;
3805     M->assembled     = PETSC_FALSE;
3806   }
3807   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3808   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3809   ii   = aij->i;
3810   jj   = aij->j;
3811   aa   = aij->a;
3812   for (i=0; i<m; i++) {
3813     row   = rstart + i;
3814     nz    = ii[i+1] - ii[i];
3815     cwork = jj;     jj += nz;
3816     vwork = aa;     aa += nz;
3817     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3818   }
3819 
3820   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3821   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3822   *newmat = M;
3823 
3824   /* save submatrix used in processor for next request */
3825   if (call ==  MAT_INITIAL_MATRIX) {
3826     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3827     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3828   }
3829   PetscFunctionReturn(0);
3830 }
3831 
3832 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3833 {
3834   PetscInt       m,cstart, cend,j,nnz,i,d;
3835   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3836   const PetscInt *JJ;
3837   PetscErrorCode ierr;
3838   PetscBool      nooffprocentries;
3839 
3840   PetscFunctionBegin;
3841   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3842 
3843   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3844   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3845   m      = B->rmap->n;
3846   cstart = B->cmap->rstart;
3847   cend   = B->cmap->rend;
3848   rstart = B->rmap->rstart;
3849 
3850   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3851 
3852   if (PetscDefined(USE_DEBUG)) {
3853     for (i=0; i<m; i++) {
3854       nnz = Ii[i+1]- Ii[i];
3855       JJ  = J + Ii[i];
3856       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3857       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3858       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3859     }
3860   }
3861 
3862   for (i=0; i<m; i++) {
3863     nnz     = Ii[i+1]- Ii[i];
3864     JJ      = J + Ii[i];
3865     nnz_max = PetscMax(nnz_max,nnz);
3866     d       = 0;
3867     for (j=0; j<nnz; j++) {
3868       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3869     }
3870     d_nnz[i] = d;
3871     o_nnz[i] = nnz - d;
3872   }
3873   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3874   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3875 
3876   for (i=0; i<m; i++) {
3877     ii   = i + rstart;
3878     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3879   }
3880   nooffprocentries    = B->nooffprocentries;
3881   B->nooffprocentries = PETSC_TRUE;
3882   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3883   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3884   B->nooffprocentries = nooffprocentries;
3885 
3886   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3887   PetscFunctionReturn(0);
3888 }
3889 
3890 /*@
3891    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3892    (the default parallel PETSc format).
3893 
3894    Collective
3895 
3896    Input Parameters:
3897 +  B - the matrix
3898 .  i - the indices into j for the start of each local row (starts with zero)
3899 .  j - the column indices for each local row (starts with zero)
3900 -  v - optional values in the matrix
3901 
3902    Level: developer
3903 
3904    Notes:
3905        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3906      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3907      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3908 
3909        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3910 
3911        The format which is used for the sparse matrix input, is equivalent to a
3912     row-major ordering.. i.e for the following matrix, the input data expected is
3913     as shown
3914 
3915 $        1 0 0
3916 $        2 0 3     P0
3917 $       -------
3918 $        4 5 6     P1
3919 $
3920 $     Process0 [P0]: rows_owned=[0,1]
3921 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3922 $        j =  {0,0,2}  [size = 3]
3923 $        v =  {1,2,3}  [size = 3]
3924 $
3925 $     Process1 [P1]: rows_owned=[2]
3926 $        i =  {0,3}    [size = nrow+1  = 1+1]
3927 $        j =  {0,1,2}  [size = 3]
3928 $        v =  {4,5,6}  [size = 3]
3929 
3930 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3931           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3932 @*/
3933 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3934 {
3935   PetscErrorCode ierr;
3936 
3937   PetscFunctionBegin;
3938   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3939   PetscFunctionReturn(0);
3940 }
3941 
3942 /*@C
3943    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3944    (the default parallel PETSc format).  For good matrix assembly performance
3945    the user should preallocate the matrix storage by setting the parameters
3946    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3947    performance can be increased by more than a factor of 50.
3948 
3949    Collective
3950 
3951    Input Parameters:
3952 +  B - the matrix
3953 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3954            (same value is used for all local rows)
3955 .  d_nnz - array containing the number of nonzeros in the various rows of the
3956            DIAGONAL portion of the local submatrix (possibly different for each row)
3957            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3958            The size of this array is equal to the number of local rows, i.e 'm'.
3959            For matrices that will be factored, you must leave room for (and set)
3960            the diagonal entry even if it is zero.
3961 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3962            submatrix (same value is used for all local rows).
3963 -  o_nnz - array containing the number of nonzeros in the various rows of the
3964            OFF-DIAGONAL portion of the local submatrix (possibly different for
3965            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3966            structure. The size of this array is equal to the number
3967            of local rows, i.e 'm'.
3968 
3969    If the *_nnz parameter is given then the *_nz parameter is ignored
3970 
3971    The AIJ format (also called the Yale sparse matrix format or
3972    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3973    storage.  The stored row and column indices begin with zero.
3974    See Users-Manual: ch_mat for details.
3975 
3976    The parallel matrix is partitioned such that the first m0 rows belong to
3977    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3978    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3979 
3980    The DIAGONAL portion of the local submatrix of a processor can be defined
3981    as the submatrix which is obtained by extraction the part corresponding to
3982    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3983    first row that belongs to the processor, r2 is the last row belonging to
3984    the this processor, and c1-c2 is range of indices of the local part of a
3985    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3986    common case of a square matrix, the row and column ranges are the same and
3987    the DIAGONAL part is also square. The remaining portion of the local
3988    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3989 
3990    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3991 
3992    You can call MatGetInfo() to get information on how effective the preallocation was;
3993    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3994    You can also run with the option -info and look for messages with the string
3995    malloc in them to see if additional memory allocation was needed.
3996 
3997    Example usage:
3998 
3999    Consider the following 8x8 matrix with 34 non-zero values, that is
4000    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4001    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4002    as follows:
4003 
4004 .vb
4005             1  2  0  |  0  3  0  |  0  4
4006     Proc0   0  5  6  |  7  0  0  |  8  0
4007             9  0 10  | 11  0  0  | 12  0
4008     -------------------------------------
4009            13  0 14  | 15 16 17  |  0  0
4010     Proc1   0 18  0  | 19 20 21  |  0  0
4011             0  0  0  | 22 23  0  | 24  0
4012     -------------------------------------
4013     Proc2  25 26 27  |  0  0 28  | 29  0
4014            30  0  0  | 31 32 33  |  0 34
4015 .ve
4016 
4017    This can be represented as a collection of submatrices as:
4018 
4019 .vb
4020       A B C
4021       D E F
4022       G H I
4023 .ve
4024 
4025    Where the submatrices A,B,C are owned by proc0, D,E,F are
4026    owned by proc1, G,H,I are owned by proc2.
4027 
4028    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4029    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4030    The 'M','N' parameters are 8,8, and have the same values on all procs.
4031 
4032    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4033    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4034    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4035    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4036    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4037    matrix, ans [DF] as another SeqAIJ matrix.
4038 
4039    When d_nz, o_nz parameters are specified, d_nz storage elements are
4040    allocated for every row of the local diagonal submatrix, and o_nz
4041    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4042    One way to choose d_nz and o_nz is to use the max nonzerors per local
4043    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4044    In this case, the values of d_nz,o_nz are:
4045 .vb
4046      proc0 : dnz = 2, o_nz = 2
4047      proc1 : dnz = 3, o_nz = 2
4048      proc2 : dnz = 1, o_nz = 4
4049 .ve
4050    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4051    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4052    for proc3. i.e we are using 12+15+10=37 storage locations to store
4053    34 values.
4054 
4055    When d_nnz, o_nnz parameters are specified, the storage is specified
4056    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4057    In the above case the values for d_nnz,o_nnz are:
4058 .vb
4059      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4060      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4061      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4062 .ve
4063    Here the space allocated is sum of all the above values i.e 34, and
4064    hence pre-allocation is perfect.
4065 
4066    Level: intermediate
4067 
4068 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4069           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4070 @*/
4071 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4072 {
4073   PetscErrorCode ierr;
4074 
4075   PetscFunctionBegin;
4076   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4077   PetscValidType(B,1);
4078   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4079   PetscFunctionReturn(0);
4080 }
4081 
4082 /*@
4083      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4084          CSR format for the local rows.
4085 
4086    Collective
4087 
4088    Input Parameters:
4089 +  comm - MPI communicator
4090 .  m - number of local rows (Cannot be PETSC_DECIDE)
4091 .  n - This value should be the same as the local size used in creating the
4092        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4093        calculated if N is given) For square matrices n is almost always m.
4094 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4095 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4096 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4097 .   j - column indices
4098 -   a - matrix values
4099 
4100    Output Parameter:
4101 .   mat - the matrix
4102 
4103    Level: intermediate
4104 
4105    Notes:
4106        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4107      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4108      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4109 
4110        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4111 
4112        The format which is used for the sparse matrix input, is equivalent to a
4113     row-major ordering.. i.e for the following matrix, the input data expected is
4114     as shown
4115 
4116        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4117 
4118 $        1 0 0
4119 $        2 0 3     P0
4120 $       -------
4121 $        4 5 6     P1
4122 $
4123 $     Process0 [P0]: rows_owned=[0,1]
4124 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4125 $        j =  {0,0,2}  [size = 3]
4126 $        v =  {1,2,3}  [size = 3]
4127 $
4128 $     Process1 [P1]: rows_owned=[2]
4129 $        i =  {0,3}    [size = nrow+1  = 1+1]
4130 $        j =  {0,1,2}  [size = 3]
4131 $        v =  {4,5,6}  [size = 3]
4132 
4133 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4134           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4135 @*/
4136 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4137 {
4138   PetscErrorCode ierr;
4139 
4140   PetscFunctionBegin;
4141   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4142   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4143   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4144   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4145   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4146   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4147   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4148   PetscFunctionReturn(0);
4149 }
4150 
4151 /*@
4152      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4153          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4154 
4155    Collective
4156 
4157    Input Parameters:
4158 +  mat - the matrix
4159 .  m - number of local rows (Cannot be PETSC_DECIDE)
4160 .  n - This value should be the same as the local size used in creating the
4161        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4162        calculated if N is given) For square matrices n is almost always m.
4163 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4164 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4165 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4166 .  J - column indices
4167 -  v - matrix values
4168 
4169    Level: intermediate
4170 
4171 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4172           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4173 @*/
4174 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4175 {
4176   PetscErrorCode ierr;
4177   PetscInt       cstart,nnz,i,j;
4178   PetscInt       *ld;
4179   PetscBool      nooffprocentries;
4180   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4181   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4182   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4183   const PetscInt *Adi = Ad->i;
4184   PetscInt       ldi,Iii,md;
4185 
4186   PetscFunctionBegin;
4187   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4188   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4189   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4190   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4191 
4192   cstart = mat->cmap->rstart;
4193   if (!Aij->ld) {
4194     /* count number of entries below block diagonal */
4195     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4196     Aij->ld = ld;
4197     for (i=0; i<m; i++) {
4198       nnz  = Ii[i+1]- Ii[i];
4199       j     = 0;
4200       while  (J[j] < cstart && j < nnz) {j++;}
4201       J    += nnz;
4202       ld[i] = j;
4203     }
4204   } else {
4205     ld = Aij->ld;
4206   }
4207 
4208   for (i=0; i<m; i++) {
4209     nnz  = Ii[i+1]- Ii[i];
4210     Iii  = Ii[i];
4211     ldi  = ld[i];
4212     md   = Adi[i+1]-Adi[i];
4213     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4214     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4215     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4216     ad  += md;
4217     ao  += nnz - md;
4218   }
4219   nooffprocentries      = mat->nooffprocentries;
4220   mat->nooffprocentries = PETSC_TRUE;
4221   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4222   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4223   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4224   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4225   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4226   mat->nooffprocentries = nooffprocentries;
4227   PetscFunctionReturn(0);
4228 }
4229 
4230 /*@C
4231    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4232    (the default parallel PETSc format).  For good matrix assembly performance
4233    the user should preallocate the matrix storage by setting the parameters
4234    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4235    performance can be increased by more than a factor of 50.
4236 
4237    Collective
4238 
4239    Input Parameters:
4240 +  comm - MPI communicator
4241 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4242            This value should be the same as the local size used in creating the
4243            y vector for the matrix-vector product y = Ax.
4244 .  n - This value should be the same as the local size used in creating the
4245        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4246        calculated if N is given) For square matrices n is almost always m.
4247 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4248 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4249 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4250            (same value is used for all local rows)
4251 .  d_nnz - array containing the number of nonzeros in the various rows of the
4252            DIAGONAL portion of the local submatrix (possibly different for each row)
4253            or NULL, if d_nz is used to specify the nonzero structure.
4254            The size of this array is equal to the number of local rows, i.e 'm'.
4255 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4256            submatrix (same value is used for all local rows).
4257 -  o_nnz - array containing the number of nonzeros in the various rows of the
4258            OFF-DIAGONAL portion of the local submatrix (possibly different for
4259            each row) or NULL, if o_nz is used to specify the nonzero
4260            structure. The size of this array is equal to the number
4261            of local rows, i.e 'm'.
4262 
4263    Output Parameter:
4264 .  A - the matrix
4265 
4266    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4267    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4268    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4269 
4270    Notes:
4271    If the *_nnz parameter is given then the *_nz parameter is ignored
4272 
4273    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4274    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4275    storage requirements for this matrix.
4276 
4277    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4278    processor than it must be used on all processors that share the object for
4279    that argument.
4280 
4281    The user MUST specify either the local or global matrix dimensions
4282    (possibly both).
4283 
4284    The parallel matrix is partitioned across processors such that the
4285    first m0 rows belong to process 0, the next m1 rows belong to
4286    process 1, the next m2 rows belong to process 2 etc.. where
4287    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4288    values corresponding to [m x N] submatrix.
4289 
4290    The columns are logically partitioned with the n0 columns belonging
4291    to 0th partition, the next n1 columns belonging to the next
4292    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4293 
4294    The DIAGONAL portion of the local submatrix on any given processor
4295    is the submatrix corresponding to the rows and columns m,n
4296    corresponding to the given processor. i.e diagonal matrix on
4297    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4298    etc. The remaining portion of the local submatrix [m x (N-n)]
4299    constitute the OFF-DIAGONAL portion. The example below better
4300    illustrates this concept.
4301 
4302    For a square global matrix we define each processor's diagonal portion
4303    to be its local rows and the corresponding columns (a square submatrix);
4304    each processor's off-diagonal portion encompasses the remainder of the
4305    local matrix (a rectangular submatrix).
4306 
4307    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4308 
4309    When calling this routine with a single process communicator, a matrix of
4310    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4311    type of communicator, use the construction mechanism
4312 .vb
4313      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4314 .ve
4315 
4316 $     MatCreate(...,&A);
4317 $     MatSetType(A,MATMPIAIJ);
4318 $     MatSetSizes(A, m,n,M,N);
4319 $     MatMPIAIJSetPreallocation(A,...);
4320 
4321    By default, this format uses inodes (identical nodes) when possible.
4322    We search for consecutive rows with the same nonzero structure, thereby
4323    reusing matrix information to achieve increased efficiency.
4324 
4325    Options Database Keys:
4326 +  -mat_no_inode  - Do not use inodes
4327 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4328 
4329 
4330 
4331    Example usage:
4332 
4333    Consider the following 8x8 matrix with 34 non-zero values, that is
4334    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4335    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4336    as follows
4337 
4338 .vb
4339             1  2  0  |  0  3  0  |  0  4
4340     Proc0   0  5  6  |  7  0  0  |  8  0
4341             9  0 10  | 11  0  0  | 12  0
4342     -------------------------------------
4343            13  0 14  | 15 16 17  |  0  0
4344     Proc1   0 18  0  | 19 20 21  |  0  0
4345             0  0  0  | 22 23  0  | 24  0
4346     -------------------------------------
4347     Proc2  25 26 27  |  0  0 28  | 29  0
4348            30  0  0  | 31 32 33  |  0 34
4349 .ve
4350 
4351    This can be represented as a collection of submatrices as
4352 
4353 .vb
4354       A B C
4355       D E F
4356       G H I
4357 .ve
4358 
4359    Where the submatrices A,B,C are owned by proc0, D,E,F are
4360    owned by proc1, G,H,I are owned by proc2.
4361 
4362    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4363    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4364    The 'M','N' parameters are 8,8, and have the same values on all procs.
4365 
4366    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4367    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4368    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4369    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4370    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4371    matrix, ans [DF] as another SeqAIJ matrix.
4372 
4373    When d_nz, o_nz parameters are specified, d_nz storage elements are
4374    allocated for every row of the local diagonal submatrix, and o_nz
4375    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4376    One way to choose d_nz and o_nz is to use the max nonzerors per local
4377    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4378    In this case, the values of d_nz,o_nz are
4379 .vb
4380      proc0 : dnz = 2, o_nz = 2
4381      proc1 : dnz = 3, o_nz = 2
4382      proc2 : dnz = 1, o_nz = 4
4383 .ve
4384    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4385    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4386    for proc3. i.e we are using 12+15+10=37 storage locations to store
4387    34 values.
4388 
4389    When d_nnz, o_nnz parameters are specified, the storage is specified
4390    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4391    In the above case the values for d_nnz,o_nnz are
4392 .vb
4393      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4394      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4395      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4396 .ve
4397    Here the space allocated is sum of all the above values i.e 34, and
4398    hence pre-allocation is perfect.
4399 
4400    Level: intermediate
4401 
4402 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4403           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4404 @*/
4405 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4406 {
4407   PetscErrorCode ierr;
4408   PetscMPIInt    size;
4409 
4410   PetscFunctionBegin;
4411   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4412   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4413   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4414   if (size > 1) {
4415     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4416     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4417   } else {
4418     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4419     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4420   }
4421   PetscFunctionReturn(0);
4422 }
4423 
4424 /*@C
4425   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4426 
4427   Not collective
4428 
4429   Input Parameter:
4430 . A - The MPIAIJ matrix
4431 
4432   Output Parameters:
4433 + Ad - The local diagonal block as a SeqAIJ matrix
4434 . Ao - The local off-diagonal block as a SeqAIJ matrix
4435 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4436 
4437   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4438   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4439   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4440   local column numbers to global column numbers in the original matrix.
4441 
4442   Level: intermediate
4443 
4444 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4445 @*/
4446 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4447 {
4448   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4449   PetscBool      flg;
4450   PetscErrorCode ierr;
4451 
4452   PetscFunctionBegin;
4453   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4454   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4455   if (Ad)     *Ad     = a->A;
4456   if (Ao)     *Ao     = a->B;
4457   if (colmap) *colmap = a->garray;
4458   PetscFunctionReturn(0);
4459 }
4460 
4461 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4462 {
4463   PetscErrorCode ierr;
4464   PetscInt       m,N,i,rstart,nnz,Ii;
4465   PetscInt       *indx;
4466   PetscScalar    *values;
4467 
4468   PetscFunctionBegin;
4469   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4470   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4471     PetscInt       *dnz,*onz,sum,bs,cbs;
4472 
4473     if (n == PETSC_DECIDE) {
4474       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4475     }
4476     /* Check sum(n) = N */
4477     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4478     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4479 
4480     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4481     rstart -= m;
4482 
4483     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4484     for (i=0; i<m; i++) {
4485       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4486       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4487       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4488     }
4489 
4490     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4491     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4492     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4493     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4494     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4495     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4496     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4497     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4498   }
4499 
4500   /* numeric phase */
4501   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4502   for (i=0; i<m; i++) {
4503     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4504     Ii   = i + rstart;
4505     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4506     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4507   }
4508   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4509   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4510   PetscFunctionReturn(0);
4511 }
4512 
4513 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4514 {
4515   PetscErrorCode    ierr;
4516   PetscMPIInt       rank;
4517   PetscInt          m,N,i,rstart,nnz;
4518   size_t            len;
4519   const PetscInt    *indx;
4520   PetscViewer       out;
4521   char              *name;
4522   Mat               B;
4523   const PetscScalar *values;
4524 
4525   PetscFunctionBegin;
4526   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4527   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4528   /* Should this be the type of the diagonal block of A? */
4529   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4530   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4531   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4532   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4533   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4534   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4535   for (i=0; i<m; i++) {
4536     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4537     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4538     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4539   }
4540   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4541   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4542 
4543   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4544   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4545   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4546   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4547   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4548   ierr = PetscFree(name);CHKERRQ(ierr);
4549   ierr = MatView(B,out);CHKERRQ(ierr);
4550   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4551   ierr = MatDestroy(&B);CHKERRQ(ierr);
4552   PetscFunctionReturn(0);
4553 }
4554 
4555 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4556 {
4557   PetscErrorCode      ierr;
4558   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4559 
4560   PetscFunctionBegin;
4561   if (!merge) PetscFunctionReturn(0);
4562   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4563   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4564   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4565   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4566   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4567   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4568   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4569   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4570   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4571   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4572   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4573   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4574   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4575   ierr = PetscFree(merge);CHKERRQ(ierr);
4576   PetscFunctionReturn(0);
4577 }
4578 
4579 #include <../src/mat/utils/freespace.h>
4580 #include <petscbt.h>
4581 
4582 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4583 {
4584   PetscErrorCode      ierr;
4585   MPI_Comm            comm;
4586   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4587   PetscMPIInt         size,rank,taga,*len_s;
4588   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4589   PetscInt            proc,m;
4590   PetscInt            **buf_ri,**buf_rj;
4591   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4592   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4593   MPI_Request         *s_waits,*r_waits;
4594   MPI_Status          *status;
4595   MatScalar           *aa=a->a;
4596   MatScalar           **abuf_r,*ba_i;
4597   Mat_Merge_SeqsToMPI *merge;
4598   PetscContainer      container;
4599 
4600   PetscFunctionBegin;
4601   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4602   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4603 
4604   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4605   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4606 
4607   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4608   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4609   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4610 
4611   bi     = merge->bi;
4612   bj     = merge->bj;
4613   buf_ri = merge->buf_ri;
4614   buf_rj = merge->buf_rj;
4615 
4616   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4617   owners = merge->rowmap->range;
4618   len_s  = merge->len_s;
4619 
4620   /* send and recv matrix values */
4621   /*-----------------------------*/
4622   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4623   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4624 
4625   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4626   for (proc=0,k=0; proc<size; proc++) {
4627     if (!len_s[proc]) continue;
4628     i    = owners[proc];
4629     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4630     k++;
4631   }
4632 
4633   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4634   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4635   ierr = PetscFree(status);CHKERRQ(ierr);
4636 
4637   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4638   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4639 
4640   /* insert mat values of mpimat */
4641   /*----------------------------*/
4642   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4643   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4644 
4645   for (k=0; k<merge->nrecv; k++) {
4646     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4647     nrows       = *(buf_ri_k[k]);
4648     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4649     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4650   }
4651 
4652   /* set values of ba */
4653   m = merge->rowmap->n;
4654   for (i=0; i<m; i++) {
4655     arow = owners[rank] + i;
4656     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4657     bnzi = bi[i+1] - bi[i];
4658     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4659 
4660     /* add local non-zero vals of this proc's seqmat into ba */
4661     anzi   = ai[arow+1] - ai[arow];
4662     aj     = a->j + ai[arow];
4663     aa     = a->a + ai[arow];
4664     nextaj = 0;
4665     for (j=0; nextaj<anzi; j++) {
4666       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4667         ba_i[j] += aa[nextaj++];
4668       }
4669     }
4670 
4671     /* add received vals into ba */
4672     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4673       /* i-th row */
4674       if (i == *nextrow[k]) {
4675         anzi   = *(nextai[k]+1) - *nextai[k];
4676         aj     = buf_rj[k] + *(nextai[k]);
4677         aa     = abuf_r[k] + *(nextai[k]);
4678         nextaj = 0;
4679         for (j=0; nextaj<anzi; j++) {
4680           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4681             ba_i[j] += aa[nextaj++];
4682           }
4683         }
4684         nextrow[k]++; nextai[k]++;
4685       }
4686     }
4687     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4688   }
4689   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4690   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4691 
4692   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4693   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4694   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4695   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4696   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4697   PetscFunctionReturn(0);
4698 }
4699 
4700 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4701 {
4702   PetscErrorCode      ierr;
4703   Mat                 B_mpi;
4704   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4705   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4706   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4707   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4708   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4709   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4710   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4711   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4712   MPI_Status          *status;
4713   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4714   PetscBT             lnkbt;
4715   Mat_Merge_SeqsToMPI *merge;
4716   PetscContainer      container;
4717 
4718   PetscFunctionBegin;
4719   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4720 
4721   /* make sure it is a PETSc comm */
4722   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4723   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4724   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4725 
4726   ierr = PetscNew(&merge);CHKERRQ(ierr);
4727   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4728 
4729   /* determine row ownership */
4730   /*---------------------------------------------------------*/
4731   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4732   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4733   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4734   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4735   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4736   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4737   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4738 
4739   m      = merge->rowmap->n;
4740   owners = merge->rowmap->range;
4741 
4742   /* determine the number of messages to send, their lengths */
4743   /*---------------------------------------------------------*/
4744   len_s = merge->len_s;
4745 
4746   len          = 0; /* length of buf_si[] */
4747   merge->nsend = 0;
4748   for (proc=0; proc<size; proc++) {
4749     len_si[proc] = 0;
4750     if (proc == rank) {
4751       len_s[proc] = 0;
4752     } else {
4753       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4754       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4755     }
4756     if (len_s[proc]) {
4757       merge->nsend++;
4758       nrows = 0;
4759       for (i=owners[proc]; i<owners[proc+1]; i++) {
4760         if (ai[i+1] > ai[i]) nrows++;
4761       }
4762       len_si[proc] = 2*(nrows+1);
4763       len         += len_si[proc];
4764     }
4765   }
4766 
4767   /* determine the number and length of messages to receive for ij-structure */
4768   /*-------------------------------------------------------------------------*/
4769   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4770   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4771 
4772   /* post the Irecv of j-structure */
4773   /*-------------------------------*/
4774   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4775   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4776 
4777   /* post the Isend of j-structure */
4778   /*--------------------------------*/
4779   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4780 
4781   for (proc=0, k=0; proc<size; proc++) {
4782     if (!len_s[proc]) continue;
4783     i    = owners[proc];
4784     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4785     k++;
4786   }
4787 
4788   /* receives and sends of j-structure are complete */
4789   /*------------------------------------------------*/
4790   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4791   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4792 
4793   /* send and recv i-structure */
4794   /*---------------------------*/
4795   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4796   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4797 
4798   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4799   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4800   for (proc=0,k=0; proc<size; proc++) {
4801     if (!len_s[proc]) continue;
4802     /* form outgoing message for i-structure:
4803          buf_si[0]:                 nrows to be sent
4804                [1:nrows]:           row index (global)
4805                [nrows+1:2*nrows+1]: i-structure index
4806     */
4807     /*-------------------------------------------*/
4808     nrows       = len_si[proc]/2 - 1;
4809     buf_si_i    = buf_si + nrows+1;
4810     buf_si[0]   = nrows;
4811     buf_si_i[0] = 0;
4812     nrows       = 0;
4813     for (i=owners[proc]; i<owners[proc+1]; i++) {
4814       anzi = ai[i+1] - ai[i];
4815       if (anzi) {
4816         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4817         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4818         nrows++;
4819       }
4820     }
4821     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4822     k++;
4823     buf_si += len_si[proc];
4824   }
4825 
4826   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4827   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4828 
4829   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4830   for (i=0; i<merge->nrecv; i++) {
4831     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4832   }
4833 
4834   ierr = PetscFree(len_si);CHKERRQ(ierr);
4835   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4836   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4837   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4838   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4839   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4840   ierr = PetscFree(status);CHKERRQ(ierr);
4841 
4842   /* compute a local seq matrix in each processor */
4843   /*----------------------------------------------*/
4844   /* allocate bi array and free space for accumulating nonzero column info */
4845   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4846   bi[0] = 0;
4847 
4848   /* create and initialize a linked list */
4849   nlnk = N+1;
4850   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4851 
4852   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4853   len  = ai[owners[rank+1]] - ai[owners[rank]];
4854   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4855 
4856   current_space = free_space;
4857 
4858   /* determine symbolic info for each local row */
4859   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4860 
4861   for (k=0; k<merge->nrecv; k++) {
4862     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4863     nrows       = *buf_ri_k[k];
4864     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4865     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4866   }
4867 
4868   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4869   len  = 0;
4870   for (i=0; i<m; i++) {
4871     bnzi = 0;
4872     /* add local non-zero cols of this proc's seqmat into lnk */
4873     arow  = owners[rank] + i;
4874     anzi  = ai[arow+1] - ai[arow];
4875     aj    = a->j + ai[arow];
4876     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4877     bnzi += nlnk;
4878     /* add received col data into lnk */
4879     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4880       if (i == *nextrow[k]) { /* i-th row */
4881         anzi  = *(nextai[k]+1) - *nextai[k];
4882         aj    = buf_rj[k] + *nextai[k];
4883         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4884         bnzi += nlnk;
4885         nextrow[k]++; nextai[k]++;
4886       }
4887     }
4888     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4889 
4890     /* if free space is not available, make more free space */
4891     if (current_space->local_remaining<bnzi) {
4892       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4893       nspacedouble++;
4894     }
4895     /* copy data into free space, then initialize lnk */
4896     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4897     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4898 
4899     current_space->array           += bnzi;
4900     current_space->local_used      += bnzi;
4901     current_space->local_remaining -= bnzi;
4902 
4903     bi[i+1] = bi[i] + bnzi;
4904   }
4905 
4906   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4907 
4908   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4909   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4910   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4911 
4912   /* create symbolic parallel matrix B_mpi */
4913   /*---------------------------------------*/
4914   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4915   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4916   if (n==PETSC_DECIDE) {
4917     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4918   } else {
4919     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4920   }
4921   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4922   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4923   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4924   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4925   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4926 
4927   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4928   B_mpi->assembled  = PETSC_FALSE;
4929   merge->bi         = bi;
4930   merge->bj         = bj;
4931   merge->buf_ri     = buf_ri;
4932   merge->buf_rj     = buf_rj;
4933   merge->coi        = NULL;
4934   merge->coj        = NULL;
4935   merge->owners_co  = NULL;
4936 
4937   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4938 
4939   /* attach the supporting struct to B_mpi for reuse */
4940   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4941   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4942   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
4943   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4944   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4945   *mpimat = B_mpi;
4946 
4947   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4948   PetscFunctionReturn(0);
4949 }
4950 
4951 /*@C
4952       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4953                  matrices from each processor
4954 
4955     Collective
4956 
4957    Input Parameters:
4958 +    comm - the communicators the parallel matrix will live on
4959 .    seqmat - the input sequential matrices
4960 .    m - number of local rows (or PETSC_DECIDE)
4961 .    n - number of local columns (or PETSC_DECIDE)
4962 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4963 
4964    Output Parameter:
4965 .    mpimat - the parallel matrix generated
4966 
4967     Level: advanced
4968 
4969    Notes:
4970      The dimensions of the sequential matrix in each processor MUST be the same.
4971      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4972      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4973 @*/
4974 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4975 {
4976   PetscErrorCode ierr;
4977   PetscMPIInt    size;
4978 
4979   PetscFunctionBegin;
4980   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4981   if (size == 1) {
4982     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4983     if (scall == MAT_INITIAL_MATRIX) {
4984       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4985     } else {
4986       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4987     }
4988     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4989     PetscFunctionReturn(0);
4990   }
4991   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4992   if (scall == MAT_INITIAL_MATRIX) {
4993     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4994   }
4995   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4996   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4997   PetscFunctionReturn(0);
4998 }
4999 
5000 /*@
5001      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5002           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5003           with MatGetSize()
5004 
5005     Not Collective
5006 
5007    Input Parameters:
5008 +    A - the matrix
5009 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5010 
5011    Output Parameter:
5012 .    A_loc - the local sequential matrix generated
5013 
5014     Level: developer
5015 
5016    Notes:
5017      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5018      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5019      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5020      modify the values of the returned A_loc.
5021 
5022 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5023 
5024 @*/
5025 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5026 {
5027   PetscErrorCode ierr;
5028   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5029   Mat_SeqAIJ     *mat,*a,*b;
5030   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5031   MatScalar      *aa,*ba,*cam;
5032   PetscScalar    *ca;
5033   PetscMPIInt    size;
5034   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5035   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5036   PetscBool      match;
5037 
5038   PetscFunctionBegin;
5039   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5040   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5041   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
5042   if (size == 1) {
5043     if (scall == MAT_INITIAL_MATRIX) {
5044       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5045       *A_loc = mpimat->A;
5046     } else if (scall == MAT_REUSE_MATRIX) {
5047       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5048     }
5049     PetscFunctionReturn(0);
5050   }
5051 
5052   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5053   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5054   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5055   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5056   aa = a->a; ba = b->a;
5057   if (scall == MAT_INITIAL_MATRIX) {
5058     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5059     ci[0] = 0;
5060     for (i=0; i<am; i++) {
5061       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5062     }
5063     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5064     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5065     k    = 0;
5066     for (i=0; i<am; i++) {
5067       ncols_o = bi[i+1] - bi[i];
5068       ncols_d = ai[i+1] - ai[i];
5069       /* off-diagonal portion of A */
5070       for (jo=0; jo<ncols_o; jo++) {
5071         col = cmap[*bj];
5072         if (col >= cstart) break;
5073         cj[k]   = col; bj++;
5074         ca[k++] = *ba++;
5075       }
5076       /* diagonal portion of A */
5077       for (j=0; j<ncols_d; j++) {
5078         cj[k]   = cstart + *aj++;
5079         ca[k++] = *aa++;
5080       }
5081       /* off-diagonal portion of A */
5082       for (j=jo; j<ncols_o; j++) {
5083         cj[k]   = cmap[*bj++];
5084         ca[k++] = *ba++;
5085       }
5086     }
5087     /* put together the new matrix */
5088     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5089     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5090     /* Since these are PETSc arrays, change flags to free them as necessary. */
5091     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5092     mat->free_a  = PETSC_TRUE;
5093     mat->free_ij = PETSC_TRUE;
5094     mat->nonew   = 0;
5095   } else if (scall == MAT_REUSE_MATRIX) {
5096     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5097     ci = mat->i; cj = mat->j; cam = mat->a;
5098     for (i=0; i<am; i++) {
5099       /* off-diagonal portion of A */
5100       ncols_o = bi[i+1] - bi[i];
5101       for (jo=0; jo<ncols_o; jo++) {
5102         col = cmap[*bj];
5103         if (col >= cstart) break;
5104         *cam++ = *ba++; bj++;
5105       }
5106       /* diagonal portion of A */
5107       ncols_d = ai[i+1] - ai[i];
5108       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5109       /* off-diagonal portion of A */
5110       for (j=jo; j<ncols_o; j++) {
5111         *cam++ = *ba++; bj++;
5112       }
5113     }
5114   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5115   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5116   PetscFunctionReturn(0);
5117 }
5118 
5119 /*@C
5120      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5121 
5122     Not Collective
5123 
5124    Input Parameters:
5125 +    A - the matrix
5126 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5127 -    row, col - index sets of rows and columns to extract (or NULL)
5128 
5129    Output Parameter:
5130 .    A_loc - the local sequential matrix generated
5131 
5132     Level: developer
5133 
5134 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5135 
5136 @*/
5137 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5138 {
5139   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5140   PetscErrorCode ierr;
5141   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5142   IS             isrowa,iscola;
5143   Mat            *aloc;
5144   PetscBool      match;
5145 
5146   PetscFunctionBegin;
5147   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5148   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5149   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5150   if (!row) {
5151     start = A->rmap->rstart; end = A->rmap->rend;
5152     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5153   } else {
5154     isrowa = *row;
5155   }
5156   if (!col) {
5157     start = A->cmap->rstart;
5158     cmap  = a->garray;
5159     nzA   = a->A->cmap->n;
5160     nzB   = a->B->cmap->n;
5161     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5162     ncols = 0;
5163     for (i=0; i<nzB; i++) {
5164       if (cmap[i] < start) idx[ncols++] = cmap[i];
5165       else break;
5166     }
5167     imark = i;
5168     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5169     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5170     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5171   } else {
5172     iscola = *col;
5173   }
5174   if (scall != MAT_INITIAL_MATRIX) {
5175     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5176     aloc[0] = *A_loc;
5177   }
5178   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5179   if (!col) { /* attach global id of condensed columns */
5180     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5181   }
5182   *A_loc = aloc[0];
5183   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5184   if (!row) {
5185     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5186   }
5187   if (!col) {
5188     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5189   }
5190   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5191   PetscFunctionReturn(0);
5192 }
5193 
5194 /*
5195  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5196  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5197  * on a global size.
5198  * */
5199 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5200 {
5201   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5202   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5203   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5204   PetscMPIInt              owner;
5205   PetscSFNode              *iremote,*oiremote;
5206   const PetscInt           *lrowindices;
5207   PetscErrorCode           ierr;
5208   PetscSF                  sf,osf;
5209   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5210   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5211   MPI_Comm                 comm;
5212   ISLocalToGlobalMapping   mapping;
5213 
5214   PetscFunctionBegin;
5215   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5216   /* plocalsize is the number of roots
5217    * nrows is the number of leaves
5218    * */
5219   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5220   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5221   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5222   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5223   for (i=0;i<nrows;i++) {
5224     /* Find a remote index and an owner for a row
5225      * The row could be local or remote
5226      * */
5227     owner = 0;
5228     lidx  = 0;
5229     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5230     iremote[i].index = lidx;
5231     iremote[i].rank  = owner;
5232   }
5233   /* Create SF to communicate how many nonzero columns for each row */
5234   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5235   /* SF will figure out the number of nonzero colunms for each row, and their
5236    * offsets
5237    * */
5238   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5239   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5240   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5241 
5242   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5243   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5244   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5245   roffsets[0] = 0;
5246   roffsets[1] = 0;
5247   for (i=0;i<plocalsize;i++) {
5248     /* diag */
5249     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5250     /* off diag */
5251     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5252     /* compute offsets so that we relative location for each row */
5253     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5254     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5255   }
5256   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5257   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5258   /* 'r' means root, and 'l' means leaf */
5259   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5260   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5261   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5262   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5263   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5264   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5265   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5266   dntotalcols = 0;
5267   ontotalcols = 0;
5268   ncol = 0;
5269   for (i=0;i<nrows;i++) {
5270     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5271     ncol = PetscMax(pnnz[i],ncol);
5272     /* diag */
5273     dntotalcols += nlcols[i*2+0];
5274     /* off diag */
5275     ontotalcols += nlcols[i*2+1];
5276   }
5277   /* We do not need to figure the right number of columns
5278    * since all the calculations will be done by going through the raw data
5279    * */
5280   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5281   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5282   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5283   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5284   /* diag */
5285   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5286   /* off diag */
5287   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5288   /* diag */
5289   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5290   /* off diag */
5291   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5292   dntotalcols = 0;
5293   ontotalcols = 0;
5294   ntotalcols  = 0;
5295   for (i=0;i<nrows;i++) {
5296     owner = 0;
5297     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5298     /* Set iremote for diag matrix */
5299     for (j=0;j<nlcols[i*2+0];j++) {
5300       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5301       iremote[dntotalcols].rank    = owner;
5302       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5303       ilocal[dntotalcols++]        = ntotalcols++;
5304     }
5305     /* off diag */
5306     for (j=0;j<nlcols[i*2+1];j++) {
5307       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5308       oiremote[ontotalcols].rank    = owner;
5309       oilocal[ontotalcols++]        = ntotalcols++;
5310     }
5311   }
5312   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5313   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5314   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5315   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5316   /* P serves as roots and P_oth is leaves
5317    * Diag matrix
5318    * */
5319   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5320   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5321   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5322 
5323   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5324   /* Off diag */
5325   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5326   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5327   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5328   /* We operate on the matrix internal data for saving memory */
5329   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5330   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5331   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5332   /* Convert to global indices for diag matrix */
5333   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5334   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5335   /* We want P_oth store global indices */
5336   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5337   /* Use memory scalable approach */
5338   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5339   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5340   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5341   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5342   /* Convert back to local indices */
5343   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5344   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5345   nout = 0;
5346   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5347   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5348   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5349   /* Exchange values */
5350   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5351   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5352   /* Stop PETSc from shrinking memory */
5353   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5354   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5355   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5356   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5357   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5358   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5359   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5360   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5361   PetscFunctionReturn(0);
5362 }
5363 
5364 /*
5365  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5366  * This supports MPIAIJ and MAIJ
5367  * */
5368 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5369 {
5370   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5371   Mat_SeqAIJ            *p_oth;
5372   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5373   IS                    rows,map;
5374   PetscHMapI            hamp;
5375   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5376   MPI_Comm              comm;
5377   PetscSF               sf,osf;
5378   PetscBool             has;
5379   PetscErrorCode        ierr;
5380 
5381   PetscFunctionBegin;
5382   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5383   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5384   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5385    *  and then create a submatrix (that often is an overlapping matrix)
5386    * */
5387   if (reuse == MAT_INITIAL_MATRIX) {
5388     /* Use a hash table to figure out unique keys */
5389     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5390     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5391     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5392     count = 0;
5393     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5394     for (i=0;i<a->B->cmap->n;i++) {
5395       key  = a->garray[i]/dof;
5396       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5397       if (!has) {
5398         mapping[i] = count;
5399         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5400       } else {
5401         /* Current 'i' has the same value the previous step */
5402         mapping[i] = count-1;
5403       }
5404     }
5405     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5406     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5407     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5408     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5409     off = 0;
5410     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5411     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5412     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5413     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5414     /* In case, the matrix was already created but users want to recreate the matrix */
5415     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5416     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5417     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5418     ierr = ISDestroy(&map);CHKERRQ(ierr);
5419     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5420   } else if (reuse == MAT_REUSE_MATRIX) {
5421     /* If matrix was already created, we simply update values using SF objects
5422      * that as attached to the matrix ealier.
5423      *  */
5424     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5425     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5426     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5427     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5428     /* Update values in place */
5429     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5430     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5431     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5432     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5433   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5434   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5435   PetscFunctionReturn(0);
5436 }
5437 
5438 /*@C
5439     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5440 
5441     Collective on Mat
5442 
5443    Input Parameters:
5444 +    A,B - the matrices in mpiaij format
5445 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5446 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5447 
5448    Output Parameter:
5449 +    rowb, colb - index sets of rows and columns of B to extract
5450 -    B_seq - the sequential matrix generated
5451 
5452     Level: developer
5453 
5454 @*/
5455 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5456 {
5457   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5458   PetscErrorCode ierr;
5459   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5460   IS             isrowb,iscolb;
5461   Mat            *bseq=NULL;
5462 
5463   PetscFunctionBegin;
5464   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5465     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5466   }
5467   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5468 
5469   if (scall == MAT_INITIAL_MATRIX) {
5470     start = A->cmap->rstart;
5471     cmap  = a->garray;
5472     nzA   = a->A->cmap->n;
5473     nzB   = a->B->cmap->n;
5474     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5475     ncols = 0;
5476     for (i=0; i<nzB; i++) {  /* row < local row index */
5477       if (cmap[i] < start) idx[ncols++] = cmap[i];
5478       else break;
5479     }
5480     imark = i;
5481     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5482     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5483     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5484     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5485   } else {
5486     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5487     isrowb  = *rowb; iscolb = *colb;
5488     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5489     bseq[0] = *B_seq;
5490   }
5491   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5492   *B_seq = bseq[0];
5493   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5494   if (!rowb) {
5495     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5496   } else {
5497     *rowb = isrowb;
5498   }
5499   if (!colb) {
5500     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5501   } else {
5502     *colb = iscolb;
5503   }
5504   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5505   PetscFunctionReturn(0);
5506 }
5507 
5508 /*
5509     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5510     of the OFF-DIAGONAL portion of local A
5511 
5512     Collective on Mat
5513 
5514    Input Parameters:
5515 +    A,B - the matrices in mpiaij format
5516 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5517 
5518    Output Parameter:
5519 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5520 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5521 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5522 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5523 
5524     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5525      for this matrix. This is not desirable..
5526 
5527     Level: developer
5528 
5529 */
5530 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5531 {
5532   PetscErrorCode         ierr;
5533   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5534   Mat_SeqAIJ             *b_oth;
5535   VecScatter             ctx;
5536   MPI_Comm               comm;
5537   const PetscMPIInt      *rprocs,*sprocs;
5538   const PetscInt         *srow,*rstarts,*sstarts;
5539   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5540   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5541   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5542   MPI_Request            *rwaits = NULL,*swaits = NULL;
5543   MPI_Status             rstatus;
5544   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5545 
5546   PetscFunctionBegin;
5547   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5548   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5549 
5550   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5551     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5552   }
5553   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5554   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5555 
5556   if (size == 1) {
5557     startsj_s = NULL;
5558     bufa_ptr  = NULL;
5559     *B_oth    = NULL;
5560     PetscFunctionReturn(0);
5561   }
5562 
5563   ctx = a->Mvctx;
5564   tag = ((PetscObject)ctx)->tag;
5565 
5566   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5567   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5568   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5569   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5570   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5571   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5572   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5573 
5574   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5575   if (scall == MAT_INITIAL_MATRIX) {
5576     /* i-array */
5577     /*---------*/
5578     /*  post receives */
5579     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5580     for (i=0; i<nrecvs; i++) {
5581       rowlen = rvalues + rstarts[i]*rbs;
5582       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5583       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5584     }
5585 
5586     /* pack the outgoing message */
5587     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5588 
5589     sstartsj[0] = 0;
5590     rstartsj[0] = 0;
5591     len         = 0; /* total length of j or a array to be sent */
5592     if (nsends) {
5593       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5594       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5595     }
5596     for (i=0; i<nsends; i++) {
5597       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5598       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5599       for (j=0; j<nrows; j++) {
5600         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5601         for (l=0; l<sbs; l++) {
5602           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5603 
5604           rowlen[j*sbs+l] = ncols;
5605 
5606           len += ncols;
5607           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5608         }
5609         k++;
5610       }
5611       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5612 
5613       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5614     }
5615     /* recvs and sends of i-array are completed */
5616     i = nrecvs;
5617     while (i--) {
5618       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5619     }
5620     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5621     ierr = PetscFree(svalues);CHKERRQ(ierr);
5622 
5623     /* allocate buffers for sending j and a arrays */
5624     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5625     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5626 
5627     /* create i-array of B_oth */
5628     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5629 
5630     b_othi[0] = 0;
5631     len       = 0; /* total length of j or a array to be received */
5632     k         = 0;
5633     for (i=0; i<nrecvs; i++) {
5634       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5635       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5636       for (j=0; j<nrows; j++) {
5637         b_othi[k+1] = b_othi[k] + rowlen[j];
5638         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5639         k++;
5640       }
5641       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5642     }
5643     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5644 
5645     /* allocate space for j and a arrrays of B_oth */
5646     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5647     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5648 
5649     /* j-array */
5650     /*---------*/
5651     /*  post receives of j-array */
5652     for (i=0; i<nrecvs; i++) {
5653       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5654       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5655     }
5656 
5657     /* pack the outgoing message j-array */
5658     if (nsends) k = sstarts[0];
5659     for (i=0; i<nsends; i++) {
5660       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5661       bufJ  = bufj+sstartsj[i];
5662       for (j=0; j<nrows; j++) {
5663         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5664         for (ll=0; ll<sbs; ll++) {
5665           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5666           for (l=0; l<ncols; l++) {
5667             *bufJ++ = cols[l];
5668           }
5669           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5670         }
5671       }
5672       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5673     }
5674 
5675     /* recvs and sends of j-array are completed */
5676     i = nrecvs;
5677     while (i--) {
5678       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5679     }
5680     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5681   } else if (scall == MAT_REUSE_MATRIX) {
5682     sstartsj = *startsj_s;
5683     rstartsj = *startsj_r;
5684     bufa     = *bufa_ptr;
5685     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5686     b_otha   = b_oth->a;
5687   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5688 
5689   /* a-array */
5690   /*---------*/
5691   /*  post receives of a-array */
5692   for (i=0; i<nrecvs; i++) {
5693     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5694     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5695   }
5696 
5697   /* pack the outgoing message a-array */
5698   if (nsends) k = sstarts[0];
5699   for (i=0; i<nsends; i++) {
5700     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5701     bufA  = bufa+sstartsj[i];
5702     for (j=0; j<nrows; j++) {
5703       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5704       for (ll=0; ll<sbs; ll++) {
5705         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5706         for (l=0; l<ncols; l++) {
5707           *bufA++ = vals[l];
5708         }
5709         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5710       }
5711     }
5712     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5713   }
5714   /* recvs and sends of a-array are completed */
5715   i = nrecvs;
5716   while (i--) {
5717     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5718   }
5719   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5720   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5721 
5722   if (scall == MAT_INITIAL_MATRIX) {
5723     /* put together the new matrix */
5724     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5725 
5726     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5727     /* Since these are PETSc arrays, change flags to free them as necessary. */
5728     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5729     b_oth->free_a  = PETSC_TRUE;
5730     b_oth->free_ij = PETSC_TRUE;
5731     b_oth->nonew   = 0;
5732 
5733     ierr = PetscFree(bufj);CHKERRQ(ierr);
5734     if (!startsj_s || !bufa_ptr) {
5735       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5736       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5737     } else {
5738       *startsj_s = sstartsj;
5739       *startsj_r = rstartsj;
5740       *bufa_ptr  = bufa;
5741     }
5742   }
5743 
5744   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5745   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5746   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5747   PetscFunctionReturn(0);
5748 }
5749 
5750 /*@C
5751   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5752 
5753   Not Collective
5754 
5755   Input Parameters:
5756 . A - The matrix in mpiaij format
5757 
5758   Output Parameter:
5759 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5760 . colmap - A map from global column index to local index into lvec
5761 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5762 
5763   Level: developer
5764 
5765 @*/
5766 #if defined(PETSC_USE_CTABLE)
5767 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5768 #else
5769 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5770 #endif
5771 {
5772   Mat_MPIAIJ *a;
5773 
5774   PetscFunctionBegin;
5775   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5776   PetscValidPointer(lvec, 2);
5777   PetscValidPointer(colmap, 3);
5778   PetscValidPointer(multScatter, 4);
5779   a = (Mat_MPIAIJ*) A->data;
5780   if (lvec) *lvec = a->lvec;
5781   if (colmap) *colmap = a->colmap;
5782   if (multScatter) *multScatter = a->Mvctx;
5783   PetscFunctionReturn(0);
5784 }
5785 
5786 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5787 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5788 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5789 #if defined(PETSC_HAVE_MKL_SPARSE)
5790 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5791 #endif
5792 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5793 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5794 #if defined(PETSC_HAVE_ELEMENTAL)
5795 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5796 #endif
5797 #if defined(PETSC_HAVE_SCALAPACK)
5798 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5799 #endif
5800 #if defined(PETSC_HAVE_HYPRE)
5801 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5802 #endif
5803 #if defined(PETSC_HAVE_CUDA)
5804 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5805 #endif
5806 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5807 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5808 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5809 
5810 /*
5811     Computes (B'*A')' since computing B*A directly is untenable
5812 
5813                n                       p                          p
5814         [             ]       [             ]         [                 ]
5815       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5816         [             ]       [             ]         [                 ]
5817 
5818 */
5819 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5820 {
5821   PetscErrorCode ierr;
5822   Mat            At,Bt,Ct;
5823 
5824   PetscFunctionBegin;
5825   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5826   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5827   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5828   ierr = MatDestroy(&At);CHKERRQ(ierr);
5829   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5830   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5831   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5832   PetscFunctionReturn(0);
5833 }
5834 
5835 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5836 {
5837   PetscErrorCode ierr;
5838   PetscBool      cisdense;
5839 
5840   PetscFunctionBegin;
5841   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5842   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
5843   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5844   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
5845   if (!cisdense) {
5846     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
5847   }
5848   ierr = MatSetUp(C);CHKERRQ(ierr);
5849 
5850   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5851   PetscFunctionReturn(0);
5852 }
5853 
5854 /* ----------------------------------------------------------------*/
5855 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5856 {
5857   Mat_Product *product = C->product;
5858   Mat         A = product->A,B=product->B;
5859 
5860   PetscFunctionBegin;
5861   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5862     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5863 
5864   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5865   C->ops->productsymbolic = MatProductSymbolic_AB;
5866   PetscFunctionReturn(0);
5867 }
5868 
5869 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
5870 {
5871   PetscErrorCode ierr;
5872   Mat_Product    *product = C->product;
5873 
5874   PetscFunctionBegin;
5875   if (product->type == MATPRODUCT_AB) {
5876     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
5877   }
5878   PetscFunctionReturn(0);
5879 }
5880 /* ----------------------------------------------------------------*/
5881 
5882 /*MC
5883    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5884 
5885    Options Database Keys:
5886 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5887 
5888    Level: beginner
5889 
5890    Notes:
5891     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
5892     in this case the values associated with the rows and columns one passes in are set to zero
5893     in the matrix
5894 
5895     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
5896     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
5897 
5898 .seealso: MatCreateAIJ()
5899 M*/
5900 
5901 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5902 {
5903   Mat_MPIAIJ     *b;
5904   PetscErrorCode ierr;
5905   PetscMPIInt    size;
5906 
5907   PetscFunctionBegin;
5908   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5909 
5910   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5911   B->data       = (void*)b;
5912   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5913   B->assembled  = PETSC_FALSE;
5914   B->insertmode = NOT_SET_VALUES;
5915   b->size       = size;
5916 
5917   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5918 
5919   /* build cache for off array entries formed */
5920   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5921 
5922   b->donotstash  = PETSC_FALSE;
5923   b->colmap      = NULL;
5924   b->garray      = NULL;
5925   b->roworiented = PETSC_TRUE;
5926 
5927   /* stuff used for matrix vector multiply */
5928   b->lvec  = NULL;
5929   b->Mvctx = NULL;
5930 
5931   /* stuff for MatGetRow() */
5932   b->rowindices   = NULL;
5933   b->rowvalues    = NULL;
5934   b->getrowactive = PETSC_FALSE;
5935 
5936   /* flexible pointer used in CUSP/CUSPARSE classes */
5937   b->spptr = NULL;
5938 
5939   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5940   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5941   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5942   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5943   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5944   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5945   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5946   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5947   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5948   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5949 #if defined(PETSC_HAVE_MKL_SPARSE)
5950   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5951 #endif
5952   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5953   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
5954   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5955 #if defined(PETSC_HAVE_ELEMENTAL)
5956   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5957 #endif
5958 #if defined(PETSC_HAVE_SCALAPACK)
5959   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
5960 #endif
5961   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5962   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5963 #if defined(PETSC_HAVE_HYPRE)
5964   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5965   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5966 #endif
5967   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
5968   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
5969   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5970   PetscFunctionReturn(0);
5971 }
5972 
5973 /*@C
5974      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5975          and "off-diagonal" part of the matrix in CSR format.
5976 
5977    Collective
5978 
5979    Input Parameters:
5980 +  comm - MPI communicator
5981 .  m - number of local rows (Cannot be PETSC_DECIDE)
5982 .  n - This value should be the same as the local size used in creating the
5983        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5984        calculated if N is given) For square matrices n is almost always m.
5985 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5986 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5987 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5988 .   j - column indices
5989 .   a - matrix values
5990 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5991 .   oj - column indices
5992 -   oa - matrix values
5993 
5994    Output Parameter:
5995 .   mat - the matrix
5996 
5997    Level: advanced
5998 
5999    Notes:
6000        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6001        must free the arrays once the matrix has been destroyed and not before.
6002 
6003        The i and j indices are 0 based
6004 
6005        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6006 
6007        This sets local rows and cannot be used to set off-processor values.
6008 
6009        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6010        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6011        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6012        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6013        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6014        communication if it is known that only local entries will be set.
6015 
6016 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6017           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6018 @*/
6019 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6020 {
6021   PetscErrorCode ierr;
6022   Mat_MPIAIJ     *maij;
6023 
6024   PetscFunctionBegin;
6025   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6026   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6027   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6028   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6029   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6030   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6031   maij = (Mat_MPIAIJ*) (*mat)->data;
6032 
6033   (*mat)->preallocated = PETSC_TRUE;
6034 
6035   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6036   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6037 
6038   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6039   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6040 
6041   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6042   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6043   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6044   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6045 
6046   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6047   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6048   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6049   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6050   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6051   PetscFunctionReturn(0);
6052 }
6053 
6054 /*
6055     Special version for direct calls from Fortran
6056 */
6057 #include <petsc/private/fortranimpl.h>
6058 
6059 /* Change these macros so can be used in void function */
6060 #undef CHKERRQ
6061 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6062 #undef SETERRQ2
6063 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6064 #undef SETERRQ3
6065 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6066 #undef SETERRQ
6067 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6068 
6069 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6070 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6071 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6072 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6073 #else
6074 #endif
6075 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6076 {
6077   Mat            mat  = *mmat;
6078   PetscInt       m    = *mm, n = *mn;
6079   InsertMode     addv = *maddv;
6080   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6081   PetscScalar    value;
6082   PetscErrorCode ierr;
6083 
6084   MatCheckPreallocated(mat,1);
6085   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6086   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6087   {
6088     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6089     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6090     PetscBool roworiented = aij->roworiented;
6091 
6092     /* Some Variables required in the macro */
6093     Mat        A                    = aij->A;
6094     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6095     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6096     MatScalar  *aa                  = a->a;
6097     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6098     Mat        B                    = aij->B;
6099     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6100     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6101     MatScalar  *ba                  = b->a;
6102     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6103      * cannot use "#if defined" inside a macro. */
6104     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6105 
6106     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6107     PetscInt  nonew = a->nonew;
6108     MatScalar *ap1,*ap2;
6109 
6110     PetscFunctionBegin;
6111     for (i=0; i<m; i++) {
6112       if (im[i] < 0) continue;
6113       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6114       if (im[i] >= rstart && im[i] < rend) {
6115         row      = im[i] - rstart;
6116         lastcol1 = -1;
6117         rp1      = aj + ai[row];
6118         ap1      = aa + ai[row];
6119         rmax1    = aimax[row];
6120         nrow1    = ailen[row];
6121         low1     = 0;
6122         high1    = nrow1;
6123         lastcol2 = -1;
6124         rp2      = bj + bi[row];
6125         ap2      = ba + bi[row];
6126         rmax2    = bimax[row];
6127         nrow2    = bilen[row];
6128         low2     = 0;
6129         high2    = nrow2;
6130 
6131         for (j=0; j<n; j++) {
6132           if (roworiented) value = v[i*n+j];
6133           else value = v[i+j*m];
6134           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6135           if (in[j] >= cstart && in[j] < cend) {
6136             col = in[j] - cstart;
6137             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6138 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6139             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6140 #endif
6141           } else if (in[j] < 0) continue;
6142           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6143             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6144             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6145           } else {
6146             if (mat->was_assembled) {
6147               if (!aij->colmap) {
6148                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6149               }
6150 #if defined(PETSC_USE_CTABLE)
6151               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6152               col--;
6153 #else
6154               col = aij->colmap[in[j]] - 1;
6155 #endif
6156               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6157                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6158                 col  =  in[j];
6159                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6160                 B        = aij->B;
6161                 b        = (Mat_SeqAIJ*)B->data;
6162                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6163                 rp2      = bj + bi[row];
6164                 ap2      = ba + bi[row];
6165                 rmax2    = bimax[row];
6166                 nrow2    = bilen[row];
6167                 low2     = 0;
6168                 high2    = nrow2;
6169                 bm       = aij->B->rmap->n;
6170                 ba       = b->a;
6171                 inserted = PETSC_FALSE;
6172               }
6173             } else col = in[j];
6174             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6175 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6176             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6177 #endif
6178           }
6179         }
6180       } else if (!aij->donotstash) {
6181         if (roworiented) {
6182           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6183         } else {
6184           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6185         }
6186       }
6187     }
6188   }
6189   PetscFunctionReturnVoid();
6190 }
6191