xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 5fa78c884a9fa25e8efcdbcd52b1c63bca5c7be9)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatPinToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->pinnedtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatPinToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatPinToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = 0;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           goto a_noinsert; \
468         } \
469       }  \
470       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
471       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
472       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
473       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
474       N = nrow1++ - 1; a->nz++; high1++; \
475       /* shift up all the later entries in this row */ \
476       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
477       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
478       rp1[_i] = col;  \
479       ap1[_i] = value;  \
480       A->nonzerostate++;\
481       a_noinsert: ; \
482       ailen[row] = nrow1; \
483 }
484 
485 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
486   { \
487     if (col <= lastcol2) low2 = 0;                        \
488     else high2 = nrow2;                                   \
489     lastcol2 = col;                                       \
490     while (high2-low2 > 5) {                              \
491       t = (low2+high2)/2;                                 \
492       if (rp2[t] > col) high2 = t;                        \
493       else             low2  = t;                         \
494     }                                                     \
495     for (_i=low2; _i<high2; _i++) {                       \
496       if (rp2[_i] > col) break;                           \
497       if (rp2[_i] == col) {                               \
498         if (addv == ADD_VALUES) {                         \
499           ap2[_i] += value;                               \
500           (void)PetscLogFlops(1.0);                       \
501         }                                                 \
502         else                    ap2[_i] = value;          \
503         goto b_noinsert;                                  \
504       }                                                   \
505     }                                                     \
506     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
507     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
508     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
509     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
510     N = nrow2++ - 1; b->nz++; high2++;                    \
511     /* shift up all the later entries in this row */      \
512     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
513     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
514     rp2[_i] = col;                                        \
515     ap2[_i] = value;                                      \
516     B->nonzerostate++;                                    \
517     b_noinsert: ;                                         \
518     bilen[row] = nrow2;                                   \
519   }
520 
521 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
522 {
523   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
524   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
525   PetscErrorCode ierr;
526   PetscInt       l,*garray = mat->garray,diag;
527 
528   PetscFunctionBegin;
529   /* code only works for square matrices A */
530 
531   /* find size of row to the left of the diagonal part */
532   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
533   row  = row - diag;
534   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
535     if (garray[b->j[b->i[row]+l]] > diag) break;
536   }
537   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
538 
539   /* diagonal part */
540   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
541 
542   /* right of diagonal part */
543   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
544   PetscFunctionReturn(0);
545 }
546 
547 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
548 {
549   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
550   PetscScalar    value = 0.0;
551   PetscErrorCode ierr;
552   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
553   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
554   PetscBool      roworiented = aij->roworiented;
555 
556   /* Some Variables required in the macro */
557   Mat        A                 = aij->A;
558   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
559   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
560   MatScalar  *aa               = a->a;
561   PetscBool  ignorezeroentries = a->ignorezeroentries;
562   Mat        B                 = aij->B;
563   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
564   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
565   MatScalar  *ba               = b->a;
566 
567   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
568   PetscInt  nonew;
569   MatScalar *ap1,*ap2;
570 
571   PetscFunctionBegin;
572   for (i=0; i<m; i++) {
573     if (im[i] < 0) continue;
574 #if defined(PETSC_USE_DEBUG)
575     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
576 #endif
577     if (im[i] >= rstart && im[i] < rend) {
578       row      = im[i] - rstart;
579       lastcol1 = -1;
580       rp1      = aj + ai[row];
581       ap1      = aa + ai[row];
582       rmax1    = aimax[row];
583       nrow1    = ailen[row];
584       low1     = 0;
585       high1    = nrow1;
586       lastcol2 = -1;
587       rp2      = bj + bi[row];
588       ap2      = ba + bi[row];
589       rmax2    = bimax[row];
590       nrow2    = bilen[row];
591       low2     = 0;
592       high2    = nrow2;
593 
594       for (j=0; j<n; j++) {
595         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
596         if (in[j] >= cstart && in[j] < cend) {
597           col   = in[j] - cstart;
598           nonew = a->nonew;
599           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
600           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
601         } else if (in[j] < 0) continue;
602 #if defined(PETSC_USE_DEBUG)
603         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
604 #endif
605         else {
606           if (mat->was_assembled) {
607             if (!aij->colmap) {
608               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
609             }
610 #if defined(PETSC_USE_CTABLE)
611             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
612             col--;
613 #else
614             col = aij->colmap[in[j]] - 1;
615 #endif
616             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
617               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
618               col  =  in[j];
619               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
620               B     = aij->B;
621               b     = (Mat_SeqAIJ*)B->data;
622               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
623               rp2   = bj + bi[row];
624               ap2   = ba + bi[row];
625               rmax2 = bimax[row];
626               nrow2 = bilen[row];
627               low2  = 0;
628               high2 = nrow2;
629               bm    = aij->B->rmap->n;
630               ba    = b->a;
631             } else if (col < 0) {
632               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
633                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
634               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
635             }
636           } else col = in[j];
637           nonew = b->nonew;
638           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
639         }
640       }
641     } else {
642       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
643       if (!aij->donotstash) {
644         mat->assembled = PETSC_FALSE;
645         if (roworiented) {
646           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
647         } else {
648           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
649         }
650       }
651     }
652   }
653   PetscFunctionReturn(0);
654 }
655 
656 /*
657     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
658     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
659     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
660 */
661 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
662 {
663   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
664   Mat            A           = aij->A; /* diagonal part of the matrix */
665   Mat            B           = aij->B; /* offdiagonal part of the matrix */
666   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
667   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
668   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
669   PetscInt       *ailen      = a->ilen,*aj = a->j;
670   PetscInt       *bilen      = b->ilen,*bj = b->j;
671   PetscInt       am          = aij->A->rmap->n,j;
672   PetscInt       diag_so_far = 0,dnz;
673   PetscInt       offd_so_far = 0,onz;
674 
675   PetscFunctionBegin;
676   /* Iterate over all rows of the matrix */
677   for (j=0; j<am; j++) {
678     dnz = onz = 0;
679     /*  Iterate over all non-zero columns of the current row */
680     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
681       /* If column is in the diagonal */
682       if (mat_j[col] >= cstart && mat_j[col] < cend) {
683         aj[diag_so_far++] = mat_j[col] - cstart;
684         dnz++;
685       } else { /* off-diagonal entries */
686         bj[offd_so_far++] = mat_j[col];
687         onz++;
688       }
689     }
690     ailen[j] = dnz;
691     bilen[j] = onz;
692   }
693   PetscFunctionReturn(0);
694 }
695 
696 /*
697     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
698     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
699     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
700     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
701     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
702 */
703 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
704 {
705   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
706   Mat            A      = aij->A; /* diagonal part of the matrix */
707   Mat            B      = aij->B; /* offdiagonal part of the matrix */
708   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
709   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
710   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
711   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
712   PetscInt       *ailen = a->ilen,*aj = a->j;
713   PetscInt       *bilen = b->ilen,*bj = b->j;
714   PetscInt       am     = aij->A->rmap->n,j;
715   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
716   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
717   PetscScalar    *aa = a->a,*ba = b->a;
718 
719   PetscFunctionBegin;
720   /* Iterate over all rows of the matrix */
721   for (j=0; j<am; j++) {
722     dnz_row = onz_row = 0;
723     rowstart_offd = full_offd_i[j];
724     rowstart_diag = full_diag_i[j];
725     /*  Iterate over all non-zero columns of the current row */
726     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
727       /* If column is in the diagonal */
728       if (mat_j[col] >= cstart && mat_j[col] < cend) {
729         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
730         aa[rowstart_diag+dnz_row] = mat_a[col];
731         dnz_row++;
732       } else { /* off-diagonal entries */
733         bj[rowstart_offd+onz_row] = mat_j[col];
734         ba[rowstart_offd+onz_row] = mat_a[col];
735         onz_row++;
736       }
737     }
738     ailen[j] = dnz_row;
739     bilen[j] = onz_row;
740   }
741   PetscFunctionReturn(0);
742 }
743 
744 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
745 {
746   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
747   PetscErrorCode ierr;
748   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
749   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
750 
751   PetscFunctionBegin;
752   for (i=0; i<m; i++) {
753     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
754     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
755     if (idxm[i] >= rstart && idxm[i] < rend) {
756       row = idxm[i] - rstart;
757       for (j=0; j<n; j++) {
758         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
759         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
760         if (idxn[j] >= cstart && idxn[j] < cend) {
761           col  = idxn[j] - cstart;
762           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
763         } else {
764           if (!aij->colmap) {
765             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
766           }
767 #if defined(PETSC_USE_CTABLE)
768           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
769           col--;
770 #else
771           col = aij->colmap[idxn[j]] - 1;
772 #endif
773           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
774           else {
775             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
776           }
777         }
778       }
779     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
780   }
781   PetscFunctionReturn(0);
782 }
783 
784 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
785 
786 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
787 {
788   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
789   PetscErrorCode ierr;
790   PetscInt       nstash,reallocs;
791 
792   PetscFunctionBegin;
793   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
794 
795   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
796   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
797   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
798   PetscFunctionReturn(0);
799 }
800 
801 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
802 {
803   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
804   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
805   PetscErrorCode ierr;
806   PetscMPIInt    n;
807   PetscInt       i,j,rstart,ncols,flg;
808   PetscInt       *row,*col;
809   PetscBool      other_disassembled;
810   PetscScalar    *val;
811 
812   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
813 
814   PetscFunctionBegin;
815   if (!aij->donotstash && !mat->nooffprocentries) {
816     while (1) {
817       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
818       if (!flg) break;
819 
820       for (i=0; i<n; ) {
821         /* Now identify the consecutive vals belonging to the same row */
822         for (j=i,rstart=row[j]; j<n; j++) {
823           if (row[j] != rstart) break;
824         }
825         if (j < n) ncols = j-i;
826         else       ncols = n-i;
827         /* Now assemble all these values with a single function call */
828         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
829 
830         i = j;
831       }
832     }
833     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
834   }
835 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
836   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
837 #endif
838   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
839   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
840 
841   /* determine if any processor has disassembled, if so we must
842      also disassemble ourself, in order that we may reassemble. */
843   /*
844      if nonzero structure of submatrix B cannot change then we know that
845      no processor disassembled thus we can skip this stuff
846   */
847   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
848     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
849     if (mat->was_assembled && !other_disassembled) {
850 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
851       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
852 #endif
853       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
854     }
855   }
856   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
857     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
858   }
859   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
860 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
861   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
862 #endif
863   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
864   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
865 
866   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
867 
868   aij->rowvalues = 0;
869 
870   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
871   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
872 
873   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
874   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
875     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
876     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
877   }
878 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
879   mat->offloadmask = PETSC_OFFLOAD_BOTH;
880 #endif
881   PetscFunctionReturn(0);
882 }
883 
884 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
885 {
886   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
887   PetscErrorCode ierr;
888 
889   PetscFunctionBegin;
890   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
891   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
892   PetscFunctionReturn(0);
893 }
894 
895 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
896 {
897   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
898   PetscObjectState sA, sB;
899   PetscInt        *lrows;
900   PetscInt         r, len;
901   PetscBool        cong, lch, gch;
902   PetscErrorCode   ierr;
903 
904   PetscFunctionBegin;
905   /* get locally owned rows */
906   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
907   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
908   /* fix right hand side if needed */
909   if (x && b) {
910     const PetscScalar *xx;
911     PetscScalar       *bb;
912 
913     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
914     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
915     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
916     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
917     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
918     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
919   }
920 
921   sA = mat->A->nonzerostate;
922   sB = mat->B->nonzerostate;
923 
924   if (diag != 0.0 && cong) {
925     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
926     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
927   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
928     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
929     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
930     PetscInt   nnwA, nnwB;
931     PetscBool  nnzA, nnzB;
932 
933     nnwA = aijA->nonew;
934     nnwB = aijB->nonew;
935     nnzA = aijA->keepnonzeropattern;
936     nnzB = aijB->keepnonzeropattern;
937     if (!nnzA) {
938       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
939       aijA->nonew = 0;
940     }
941     if (!nnzB) {
942       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
943       aijB->nonew = 0;
944     }
945     /* Must zero here before the next loop */
946     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
947     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
948     for (r = 0; r < len; ++r) {
949       const PetscInt row = lrows[r] + A->rmap->rstart;
950       if (row >= A->cmap->N) continue;
951       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
952     }
953     aijA->nonew = nnwA;
954     aijB->nonew = nnwB;
955   } else {
956     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
957     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
958   }
959   ierr = PetscFree(lrows);CHKERRQ(ierr);
960   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
961   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
962 
963   /* reduce nonzerostate */
964   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
965   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
966   if (gch) A->nonzerostate++;
967   PetscFunctionReturn(0);
968 }
969 
970 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
971 {
972   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
973   PetscErrorCode    ierr;
974   PetscMPIInt       n = A->rmap->n;
975   PetscInt          i,j,r,m,p = 0,len = 0;
976   PetscInt          *lrows,*owners = A->rmap->range;
977   PetscSFNode       *rrows;
978   PetscSF           sf;
979   const PetscScalar *xx;
980   PetscScalar       *bb,*mask;
981   Vec               xmask,lmask;
982   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
983   const PetscInt    *aj, *ii,*ridx;
984   PetscScalar       *aa;
985 
986   PetscFunctionBegin;
987   /* Create SF where leaves are input rows and roots are owned rows */
988   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
989   for (r = 0; r < n; ++r) lrows[r] = -1;
990   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
991   for (r = 0; r < N; ++r) {
992     const PetscInt idx   = rows[r];
993     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
994     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
995       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
996     }
997     rrows[r].rank  = p;
998     rrows[r].index = rows[r] - owners[p];
999   }
1000   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1001   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1002   /* Collect flags for rows to be zeroed */
1003   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1004   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1005   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1006   /* Compress and put in row numbers */
1007   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1008   /* zero diagonal part of matrix */
1009   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1010   /* handle off diagonal part of matrix */
1011   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1012   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1013   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1014   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1015   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1016   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1017   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1018   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1019   if (x && b) { /* this code is buggy when the row and column layout don't match */
1020     PetscBool cong;
1021 
1022     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1023     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1024     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1025     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1026     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1027     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1028   }
1029   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1030   /* remove zeroed rows of off diagonal matrix */
1031   ii = aij->i;
1032   for (i=0; i<len; i++) {
1033     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1034   }
1035   /* loop over all elements of off process part of matrix zeroing removed columns*/
1036   if (aij->compressedrow.use) {
1037     m    = aij->compressedrow.nrows;
1038     ii   = aij->compressedrow.i;
1039     ridx = aij->compressedrow.rindex;
1040     for (i=0; i<m; i++) {
1041       n  = ii[i+1] - ii[i];
1042       aj = aij->j + ii[i];
1043       aa = aij->a + ii[i];
1044 
1045       for (j=0; j<n; j++) {
1046         if (PetscAbsScalar(mask[*aj])) {
1047           if (b) bb[*ridx] -= *aa*xx[*aj];
1048           *aa = 0.0;
1049         }
1050         aa++;
1051         aj++;
1052       }
1053       ridx++;
1054     }
1055   } else { /* do not use compressed row format */
1056     m = l->B->rmap->n;
1057     for (i=0; i<m; i++) {
1058       n  = ii[i+1] - ii[i];
1059       aj = aij->j + ii[i];
1060       aa = aij->a + ii[i];
1061       for (j=0; j<n; j++) {
1062         if (PetscAbsScalar(mask[*aj])) {
1063           if (b) bb[i] -= *aa*xx[*aj];
1064           *aa = 0.0;
1065         }
1066         aa++;
1067         aj++;
1068       }
1069     }
1070   }
1071   if (x && b) {
1072     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1073     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1074   }
1075   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1076   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1077   ierr = PetscFree(lrows);CHKERRQ(ierr);
1078 
1079   /* only change matrix nonzero state if pattern was allowed to be changed */
1080   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1081     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1082     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1083   }
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1088 {
1089   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1090   PetscErrorCode ierr;
1091   PetscInt       nt;
1092   VecScatter     Mvctx = a->Mvctx;
1093 
1094   PetscFunctionBegin;
1095   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1096   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1097 
1098   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1099   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1100   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1101   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1106 {
1107   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1116 {
1117   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1118   PetscErrorCode ierr;
1119   VecScatter     Mvctx = a->Mvctx;
1120 
1121   PetscFunctionBegin;
1122   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1123   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1124   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1125   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1126   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1131 {
1132   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1133   PetscErrorCode ierr;
1134 
1135   PetscFunctionBegin;
1136   /* do nondiagonal part */
1137   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1138   /* do local part */
1139   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1140   /* add partial results together */
1141   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1142   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1147 {
1148   MPI_Comm       comm;
1149   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1150   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1151   IS             Me,Notme;
1152   PetscErrorCode ierr;
1153   PetscInt       M,N,first,last,*notme,i;
1154   PetscBool      lf;
1155   PetscMPIInt    size;
1156 
1157   PetscFunctionBegin;
1158   /* Easy test: symmetric diagonal block */
1159   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1160   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1161   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1162   if (!*f) PetscFunctionReturn(0);
1163   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1164   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1165   if (size == 1) PetscFunctionReturn(0);
1166 
1167   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1168   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1169   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1170   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1171   for (i=0; i<first; i++) notme[i] = i;
1172   for (i=last; i<M; i++) notme[i-last+first] = i;
1173   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1174   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1175   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1176   Aoff = Aoffs[0];
1177   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1178   Boff = Boffs[0];
1179   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1180   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1181   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1182   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1183   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1184   ierr = PetscFree(notme);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1189 {
1190   PetscErrorCode ierr;
1191 
1192   PetscFunctionBegin;
1193   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1198 {
1199   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   /* do nondiagonal part */
1204   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1205   /* do local part */
1206   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1207   /* add partial results together */
1208   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1209   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 /*
1214   This only works correctly for square matrices where the subblock A->A is the
1215    diagonal block
1216 */
1217 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1218 {
1219   PetscErrorCode ierr;
1220   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1221 
1222   PetscFunctionBegin;
1223   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1224   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1225   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1230 {
1231   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1232   PetscErrorCode ierr;
1233 
1234   PetscFunctionBegin;
1235   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1236   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1241 {
1242   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1243   PetscErrorCode ierr;
1244 
1245   PetscFunctionBegin;
1246 #if defined(PETSC_USE_LOG)
1247   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1248 #endif
1249   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1250   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1251   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1252   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1253 #if defined(PETSC_USE_CTABLE)
1254   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1255 #else
1256   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1257 #endif
1258   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1259   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1260   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1261   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1262   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1263   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1264   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1265 
1266   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1267   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1268   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1269   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1270   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1271   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1276 #if defined(PETSC_HAVE_ELEMENTAL)
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1278 #endif
1279 #if defined(PETSC_HAVE_HYPRE)
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1282 #endif
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1289 {
1290   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1291   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1292   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1293   PetscErrorCode ierr;
1294   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1295   int            fd;
1296   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1297   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1298   PetscScalar    *column_values;
1299   PetscInt       message_count,flowcontrolcount;
1300   FILE           *file;
1301 
1302   PetscFunctionBegin;
1303   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1304   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1305   nz   = A->nz + B->nz;
1306   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1307   if (!rank) {
1308     header[0] = MAT_FILE_CLASSID;
1309     header[1] = mat->rmap->N;
1310     header[2] = mat->cmap->N;
1311 
1312     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1313     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1314     /* get largest number of rows any processor has */
1315     rlen  = mat->rmap->n;
1316     range = mat->rmap->range;
1317     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1318   } else {
1319     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1320     rlen = mat->rmap->n;
1321   }
1322 
1323   /* load up the local row counts */
1324   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1325   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1326 
1327   /* store the row lengths to the file */
1328   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1329   if (!rank) {
1330     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1331     for (i=1; i<size; i++) {
1332       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1333       rlen = range[i+1] - range[i];
1334       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1335       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1336     }
1337     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1338   } else {
1339     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1340     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1341     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1342   }
1343   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1344 
1345   /* load up the local column indices */
1346   nzmax = nz; /* th processor needs space a largest processor needs */
1347   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1348   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1349   cnt   = 0;
1350   for (i=0; i<mat->rmap->n; i++) {
1351     for (j=B->i[i]; j<B->i[i+1]; j++) {
1352       if ((col = garray[B->j[j]]) > cstart) break;
1353       column_indices[cnt++] = col;
1354     }
1355     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1356     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1357   }
1358   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1359 
1360   /* store the column indices to the file */
1361   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1362   if (!rank) {
1363     MPI_Status status;
1364     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1365     for (i=1; i<size; i++) {
1366       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1367       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1368       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1369       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1370       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1371     }
1372     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1373   } else {
1374     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1375     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1376     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1377     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1378   }
1379   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1380 
1381   /* load up the local column values */
1382   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1383   cnt  = 0;
1384   for (i=0; i<mat->rmap->n; i++) {
1385     for (j=B->i[i]; j<B->i[i+1]; j++) {
1386       if (garray[B->j[j]] > cstart) break;
1387       column_values[cnt++] = B->a[j];
1388     }
1389     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1390     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1391   }
1392   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1393 
1394   /* store the column values to the file */
1395   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1396   if (!rank) {
1397     MPI_Status status;
1398     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1399     for (i=1; i<size; i++) {
1400       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1401       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1402       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1403       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1404       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1405     }
1406     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1407   } else {
1408     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1409     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1410     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1411     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1412   }
1413   ierr = PetscFree(column_values);CHKERRQ(ierr);
1414 
1415   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1416   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #include <petscdraw.h>
1421 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1422 {
1423   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1424   PetscErrorCode    ierr;
1425   PetscMPIInt       rank = aij->rank,size = aij->size;
1426   PetscBool         isdraw,iascii,isbinary;
1427   PetscViewer       sviewer;
1428   PetscViewerFormat format;
1429 
1430   PetscFunctionBegin;
1431   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1432   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1433   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1434   if (iascii) {
1435     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1436     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1437       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1438       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1439       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1440       for (i=0; i<(PetscInt)size; i++) {
1441         nmax = PetscMax(nmax,nz[i]);
1442         nmin = PetscMin(nmin,nz[i]);
1443         navg += nz[i];
1444       }
1445       ierr = PetscFree(nz);CHKERRQ(ierr);
1446       navg = navg/size;
1447       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1448       PetscFunctionReturn(0);
1449     }
1450     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1451     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1452       MatInfo   info;
1453       PetscBool inodes;
1454 
1455       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1456       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1457       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1458       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1459       if (!inodes) {
1460         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1461                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1462       } else {
1463         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1464                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1465       }
1466       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1467       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1468       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1469       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1470       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1471       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1472       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1473       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1474       PetscFunctionReturn(0);
1475     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1476       PetscInt inodecount,inodelimit,*inodes;
1477       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1478       if (inodes) {
1479         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1480       } else {
1481         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1482       }
1483       PetscFunctionReturn(0);
1484     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1485       PetscFunctionReturn(0);
1486     }
1487   } else if (isbinary) {
1488     if (size == 1) {
1489       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1490       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1491     } else {
1492       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1493     }
1494     PetscFunctionReturn(0);
1495   } else if (iascii && size == 1) {
1496     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1497     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1498     PetscFunctionReturn(0);
1499   } else if (isdraw) {
1500     PetscDraw draw;
1501     PetscBool isnull;
1502     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1503     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1504     if (isnull) PetscFunctionReturn(0);
1505   }
1506 
1507   { /* assemble the entire matrix onto first processor */
1508     Mat A = NULL, Av;
1509     IS  isrow,iscol;
1510 
1511     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1512     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1513     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1514     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1515 /*  The commented code uses MatCreateSubMatrices instead */
1516 /*
1517     Mat *AA, A = NULL, Av;
1518     IS  isrow,iscol;
1519 
1520     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1521     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1522     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1523     if (!rank) {
1524        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1525        A    = AA[0];
1526        Av   = AA[0];
1527     }
1528     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1529 */
1530     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1531     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1532     /*
1533        Everyone has to call to draw the matrix since the graphics waits are
1534        synchronized across all processors that share the PetscDraw object
1535     */
1536     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1537     if (!rank) {
1538       if (((PetscObject)mat)->name) {
1539         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1540       }
1541       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1542     }
1543     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1544     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1545     ierr = MatDestroy(&A);CHKERRQ(ierr);
1546   }
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1551 {
1552   PetscErrorCode ierr;
1553   PetscBool      iascii,isdraw,issocket,isbinary;
1554 
1555   PetscFunctionBegin;
1556   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1557   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1558   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1559   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1560   if (iascii || isdraw || isbinary || issocket) {
1561     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1562   }
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1567 {
1568   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1569   PetscErrorCode ierr;
1570   Vec            bb1 = 0;
1571   PetscBool      hasop;
1572 
1573   PetscFunctionBegin;
1574   if (flag == SOR_APPLY_UPPER) {
1575     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1576     PetscFunctionReturn(0);
1577   }
1578 
1579   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1580     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1581   }
1582 
1583   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1584     if (flag & SOR_ZERO_INITIAL_GUESS) {
1585       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1586       its--;
1587     }
1588 
1589     while (its--) {
1590       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1591       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1592 
1593       /* update rhs: bb1 = bb - B*x */
1594       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1595       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1596 
1597       /* local sweep */
1598       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1599     }
1600   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1601     if (flag & SOR_ZERO_INITIAL_GUESS) {
1602       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1603       its--;
1604     }
1605     while (its--) {
1606       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1607       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1608 
1609       /* update rhs: bb1 = bb - B*x */
1610       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1611       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1612 
1613       /* local sweep */
1614       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1615     }
1616   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1617     if (flag & SOR_ZERO_INITIAL_GUESS) {
1618       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1619       its--;
1620     }
1621     while (its--) {
1622       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1623       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1624 
1625       /* update rhs: bb1 = bb - B*x */
1626       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1627       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1628 
1629       /* local sweep */
1630       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1631     }
1632   } else if (flag & SOR_EISENSTAT) {
1633     Vec xx1;
1634 
1635     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1636     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1637 
1638     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1639     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1640     if (!mat->diag) {
1641       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1642       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1643     }
1644     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1645     if (hasop) {
1646       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1647     } else {
1648       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1649     }
1650     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1651 
1652     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1653 
1654     /* local sweep */
1655     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1656     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1657     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1658   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1659 
1660   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1661 
1662   matin->factorerrortype = mat->A->factorerrortype;
1663   PetscFunctionReturn(0);
1664 }
1665 
1666 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1667 {
1668   Mat            aA,aB,Aperm;
1669   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1670   PetscScalar    *aa,*ba;
1671   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1672   PetscSF        rowsf,sf;
1673   IS             parcolp = NULL;
1674   PetscBool      done;
1675   PetscErrorCode ierr;
1676 
1677   PetscFunctionBegin;
1678   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1679   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1680   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1681   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1682 
1683   /* Invert row permutation to find out where my rows should go */
1684   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1685   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1686   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1687   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1688   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1689   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1690 
1691   /* Invert column permutation to find out where my columns should go */
1692   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1693   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1694   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1695   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1696   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1697   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1698   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1699 
1700   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1701   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1702   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1703 
1704   /* Find out where my gcols should go */
1705   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1706   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1707   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1708   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1709   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1710   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1711   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1712   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1713 
1714   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1715   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1716   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1717   for (i=0; i<m; i++) {
1718     PetscInt row = rdest[i],rowner;
1719     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1720     for (j=ai[i]; j<ai[i+1]; j++) {
1721       PetscInt cowner,col = cdest[aj[j]];
1722       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1723       if (rowner == cowner) dnnz[i]++;
1724       else onnz[i]++;
1725     }
1726     for (j=bi[i]; j<bi[i+1]; j++) {
1727       PetscInt cowner,col = gcdest[bj[j]];
1728       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1729       if (rowner == cowner) dnnz[i]++;
1730       else onnz[i]++;
1731     }
1732   }
1733   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1734   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1735   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1736   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1737   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1738 
1739   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1740   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1741   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1742   for (i=0; i<m; i++) {
1743     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1744     PetscInt j0,rowlen;
1745     rowlen = ai[i+1] - ai[i];
1746     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1747       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1748       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1749     }
1750     rowlen = bi[i+1] - bi[i];
1751     for (j0=j=0; j<rowlen; j0=j) {
1752       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1753       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1754     }
1755   }
1756   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1757   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1758   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1759   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1760   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1761   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1762   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1763   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1764   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1765   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1766   *B = Aperm;
1767   PetscFunctionReturn(0);
1768 }
1769 
1770 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1771 {
1772   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1773   PetscErrorCode ierr;
1774 
1775   PetscFunctionBegin;
1776   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1777   if (ghosts) *ghosts = aij->garray;
1778   PetscFunctionReturn(0);
1779 }
1780 
1781 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1782 {
1783   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1784   Mat            A    = mat->A,B = mat->B;
1785   PetscErrorCode ierr;
1786   PetscLogDouble isend[5],irecv[5];
1787 
1788   PetscFunctionBegin;
1789   info->block_size = 1.0;
1790   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1791 
1792   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1793   isend[3] = info->memory;  isend[4] = info->mallocs;
1794 
1795   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1796 
1797   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1798   isend[3] += info->memory;  isend[4] += info->mallocs;
1799   if (flag == MAT_LOCAL) {
1800     info->nz_used      = isend[0];
1801     info->nz_allocated = isend[1];
1802     info->nz_unneeded  = isend[2];
1803     info->memory       = isend[3];
1804     info->mallocs      = isend[4];
1805   } else if (flag == MAT_GLOBAL_MAX) {
1806     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1807 
1808     info->nz_used      = irecv[0];
1809     info->nz_allocated = irecv[1];
1810     info->nz_unneeded  = irecv[2];
1811     info->memory       = irecv[3];
1812     info->mallocs      = irecv[4];
1813   } else if (flag == MAT_GLOBAL_SUM) {
1814     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1815 
1816     info->nz_used      = irecv[0];
1817     info->nz_allocated = irecv[1];
1818     info->nz_unneeded  = irecv[2];
1819     info->memory       = irecv[3];
1820     info->mallocs      = irecv[4];
1821   }
1822   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1823   info->fill_ratio_needed = 0;
1824   info->factor_mallocs    = 0;
1825   PetscFunctionReturn(0);
1826 }
1827 
1828 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1829 {
1830   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1831   PetscErrorCode ierr;
1832 
1833   PetscFunctionBegin;
1834   switch (op) {
1835   case MAT_NEW_NONZERO_LOCATIONS:
1836   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1837   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1838   case MAT_KEEP_NONZERO_PATTERN:
1839   case MAT_NEW_NONZERO_LOCATION_ERR:
1840   case MAT_USE_INODES:
1841   case MAT_IGNORE_ZERO_ENTRIES:
1842     MatCheckPreallocated(A,1);
1843     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1844     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1845     break;
1846   case MAT_ROW_ORIENTED:
1847     MatCheckPreallocated(A,1);
1848     a->roworiented = flg;
1849 
1850     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1851     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1852     break;
1853   case MAT_NEW_DIAGONALS:
1854   case MAT_SORTED_FULL:
1855     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1856     break;
1857   case MAT_IGNORE_OFF_PROC_ENTRIES:
1858     a->donotstash = flg;
1859     break;
1860   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1861   case MAT_SPD:
1862   case MAT_SYMMETRIC:
1863   case MAT_STRUCTURALLY_SYMMETRIC:
1864   case MAT_HERMITIAN:
1865   case MAT_SYMMETRY_ETERNAL:
1866     break;
1867   case MAT_SUBMAT_SINGLEIS:
1868     A->submat_singleis = flg;
1869     break;
1870   case MAT_STRUCTURE_ONLY:
1871     /* The option is handled directly by MatSetOption() */
1872     break;
1873   default:
1874     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1875   }
1876   PetscFunctionReturn(0);
1877 }
1878 
1879 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1880 {
1881   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1882   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1883   PetscErrorCode ierr;
1884   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1885   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1886   PetscInt       *cmap,*idx_p;
1887 
1888   PetscFunctionBegin;
1889   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1890   mat->getrowactive = PETSC_TRUE;
1891 
1892   if (!mat->rowvalues && (idx || v)) {
1893     /*
1894         allocate enough space to hold information from the longest row.
1895     */
1896     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1897     PetscInt   max = 1,tmp;
1898     for (i=0; i<matin->rmap->n; i++) {
1899       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1900       if (max < tmp) max = tmp;
1901     }
1902     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1903   }
1904 
1905   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1906   lrow = row - rstart;
1907 
1908   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1909   if (!v)   {pvA = 0; pvB = 0;}
1910   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1911   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1912   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1913   nztot = nzA + nzB;
1914 
1915   cmap = mat->garray;
1916   if (v  || idx) {
1917     if (nztot) {
1918       /* Sort by increasing column numbers, assuming A and B already sorted */
1919       PetscInt imark = -1;
1920       if (v) {
1921         *v = v_p = mat->rowvalues;
1922         for (i=0; i<nzB; i++) {
1923           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1924           else break;
1925         }
1926         imark = i;
1927         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1928         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1929       }
1930       if (idx) {
1931         *idx = idx_p = mat->rowindices;
1932         if (imark > -1) {
1933           for (i=0; i<imark; i++) {
1934             idx_p[i] = cmap[cworkB[i]];
1935           }
1936         } else {
1937           for (i=0; i<nzB; i++) {
1938             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1939             else break;
1940           }
1941           imark = i;
1942         }
1943         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1944         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1945       }
1946     } else {
1947       if (idx) *idx = 0;
1948       if (v)   *v   = 0;
1949     }
1950   }
1951   *nz  = nztot;
1952   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1953   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1954   PetscFunctionReturn(0);
1955 }
1956 
1957 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1958 {
1959   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1960 
1961   PetscFunctionBegin;
1962   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1963   aij->getrowactive = PETSC_FALSE;
1964   PetscFunctionReturn(0);
1965 }
1966 
1967 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1968 {
1969   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1970   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1971   PetscErrorCode ierr;
1972   PetscInt       i,j,cstart = mat->cmap->rstart;
1973   PetscReal      sum = 0.0;
1974   MatScalar      *v;
1975 
1976   PetscFunctionBegin;
1977   if (aij->size == 1) {
1978     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1979   } else {
1980     if (type == NORM_FROBENIUS) {
1981       v = amat->a;
1982       for (i=0; i<amat->nz; i++) {
1983         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1984       }
1985       v = bmat->a;
1986       for (i=0; i<bmat->nz; i++) {
1987         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1988       }
1989       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1990       *norm = PetscSqrtReal(*norm);
1991       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1992     } else if (type == NORM_1) { /* max column norm */
1993       PetscReal *tmp,*tmp2;
1994       PetscInt  *jj,*garray = aij->garray;
1995       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1996       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1997       *norm = 0.0;
1998       v     = amat->a; jj = amat->j;
1999       for (j=0; j<amat->nz; j++) {
2000         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2001       }
2002       v = bmat->a; jj = bmat->j;
2003       for (j=0; j<bmat->nz; j++) {
2004         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2005       }
2006       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2007       for (j=0; j<mat->cmap->N; j++) {
2008         if (tmp2[j] > *norm) *norm = tmp2[j];
2009       }
2010       ierr = PetscFree(tmp);CHKERRQ(ierr);
2011       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2012       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2013     } else if (type == NORM_INFINITY) { /* max row norm */
2014       PetscReal ntemp = 0.0;
2015       for (j=0; j<aij->A->rmap->n; j++) {
2016         v   = amat->a + amat->i[j];
2017         sum = 0.0;
2018         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2019           sum += PetscAbsScalar(*v); v++;
2020         }
2021         v = bmat->a + bmat->i[j];
2022         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2023           sum += PetscAbsScalar(*v); v++;
2024         }
2025         if (sum > ntemp) ntemp = sum;
2026       }
2027       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2028       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2029     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2030   }
2031   PetscFunctionReturn(0);
2032 }
2033 
2034 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2035 {
2036   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2037   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2038   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2039   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2040   PetscErrorCode  ierr;
2041   Mat             B,A_diag,*B_diag;
2042   const MatScalar *array;
2043 
2044   PetscFunctionBegin;
2045   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2046   ai = Aloc->i; aj = Aloc->j;
2047   bi = Bloc->i; bj = Bloc->j;
2048   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2049     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2050     PetscSFNode          *oloc;
2051     PETSC_UNUSED PetscSF sf;
2052 
2053     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2054     /* compute d_nnz for preallocation */
2055     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2056     for (i=0; i<ai[ma]; i++) {
2057       d_nnz[aj[i]]++;
2058     }
2059     /* compute local off-diagonal contributions */
2060     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2061     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2062     /* map those to global */
2063     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2064     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2065     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2066     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2067     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2068     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2069     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2070 
2071     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2072     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2073     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2074     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2075     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2076     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2077   } else {
2078     B    = *matout;
2079     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2080   }
2081 
2082   b           = (Mat_MPIAIJ*)B->data;
2083   A_diag      = a->A;
2084   B_diag      = &b->A;
2085   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2086   A_diag_ncol = A_diag->cmap->N;
2087   B_diag_ilen = sub_B_diag->ilen;
2088   B_diag_i    = sub_B_diag->i;
2089 
2090   /* Set ilen for diagonal of B */
2091   for (i=0; i<A_diag_ncol; i++) {
2092     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2093   }
2094 
2095   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2096   very quickly (=without using MatSetValues), because all writes are local. */
2097   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2098 
2099   /* copy over the B part */
2100   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2101   array = Bloc->a;
2102   row   = A->rmap->rstart;
2103   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2104   cols_tmp = cols;
2105   for (i=0; i<mb; i++) {
2106     ncol = bi[i+1]-bi[i];
2107     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2108     row++;
2109     array += ncol; cols_tmp += ncol;
2110   }
2111   ierr = PetscFree(cols);CHKERRQ(ierr);
2112 
2113   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2114   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2115   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2116     *matout = B;
2117   } else {
2118     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2119   }
2120   PetscFunctionReturn(0);
2121 }
2122 
2123 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2124 {
2125   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2126   Mat            a    = aij->A,b = aij->B;
2127   PetscErrorCode ierr;
2128   PetscInt       s1,s2,s3;
2129 
2130   PetscFunctionBegin;
2131   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2132   if (rr) {
2133     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2134     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2135     /* Overlap communication with computation. */
2136     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2137   }
2138   if (ll) {
2139     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2140     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2141     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2142   }
2143   /* scale  the diagonal block */
2144   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2145 
2146   if (rr) {
2147     /* Do a scatter end and then right scale the off-diagonal block */
2148     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2149     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2150   }
2151   PetscFunctionReturn(0);
2152 }
2153 
2154 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2155 {
2156   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2157   PetscErrorCode ierr;
2158 
2159   PetscFunctionBegin;
2160   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2161   PetscFunctionReturn(0);
2162 }
2163 
2164 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2165 {
2166   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2167   Mat            a,b,c,d;
2168   PetscBool      flg;
2169   PetscErrorCode ierr;
2170 
2171   PetscFunctionBegin;
2172   a = matA->A; b = matA->B;
2173   c = matB->A; d = matB->B;
2174 
2175   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2176   if (flg) {
2177     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2178   }
2179   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2180   PetscFunctionReturn(0);
2181 }
2182 
2183 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2184 {
2185   PetscErrorCode ierr;
2186   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2187   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2188 
2189   PetscFunctionBegin;
2190   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2191   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2192     /* because of the column compression in the off-processor part of the matrix a->B,
2193        the number of columns in a->B and b->B may be different, hence we cannot call
2194        the MatCopy() directly on the two parts. If need be, we can provide a more
2195        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2196        then copying the submatrices */
2197     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2198   } else {
2199     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2200     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2201   }
2202   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2203   PetscFunctionReturn(0);
2204 }
2205 
2206 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2207 {
2208   PetscErrorCode ierr;
2209 
2210   PetscFunctionBegin;
2211   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2212   PetscFunctionReturn(0);
2213 }
2214 
2215 /*
2216    Computes the number of nonzeros per row needed for preallocation when X and Y
2217    have different nonzero structure.
2218 */
2219 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2220 {
2221   PetscInt       i,j,k,nzx,nzy;
2222 
2223   PetscFunctionBegin;
2224   /* Set the number of nonzeros in the new matrix */
2225   for (i=0; i<m; i++) {
2226     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2227     nzx = xi[i+1] - xi[i];
2228     nzy = yi[i+1] - yi[i];
2229     nnz[i] = 0;
2230     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2231       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2232       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2233       nnz[i]++;
2234     }
2235     for (; k<nzy; k++) nnz[i]++;
2236   }
2237   PetscFunctionReturn(0);
2238 }
2239 
2240 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2241 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2242 {
2243   PetscErrorCode ierr;
2244   PetscInt       m = Y->rmap->N;
2245   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2246   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2247 
2248   PetscFunctionBegin;
2249   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2250   PetscFunctionReturn(0);
2251 }
2252 
2253 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2254 {
2255   PetscErrorCode ierr;
2256   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2257   PetscBLASInt   bnz,one=1;
2258   Mat_SeqAIJ     *x,*y;
2259 
2260   PetscFunctionBegin;
2261   if (str == SAME_NONZERO_PATTERN) {
2262     PetscScalar alpha = a;
2263     x    = (Mat_SeqAIJ*)xx->A->data;
2264     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2265     y    = (Mat_SeqAIJ*)yy->A->data;
2266     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2267     x    = (Mat_SeqAIJ*)xx->B->data;
2268     y    = (Mat_SeqAIJ*)yy->B->data;
2269     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2270     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2271     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2272     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2273        will be updated */
2274 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2275     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2276       Y->offloadmask = PETSC_OFFLOAD_CPU;
2277     }
2278 #endif
2279   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2280     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2281   } else {
2282     Mat      B;
2283     PetscInt *nnz_d,*nnz_o;
2284     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2285     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2286     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2287     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2288     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2289     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2290     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2291     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2292     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2293     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2294     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2295     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2296     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2297     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2298   }
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2303 
2304 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2305 {
2306 #if defined(PETSC_USE_COMPLEX)
2307   PetscErrorCode ierr;
2308   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2309 
2310   PetscFunctionBegin;
2311   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2312   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2313 #else
2314   PetscFunctionBegin;
2315 #endif
2316   PetscFunctionReturn(0);
2317 }
2318 
2319 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2320 {
2321   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2322   PetscErrorCode ierr;
2323 
2324   PetscFunctionBegin;
2325   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2326   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2327   PetscFunctionReturn(0);
2328 }
2329 
2330 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2331 {
2332   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2333   PetscErrorCode ierr;
2334 
2335   PetscFunctionBegin;
2336   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2337   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2338   PetscFunctionReturn(0);
2339 }
2340 
2341 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2342 {
2343   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2344   PetscErrorCode ierr;
2345   PetscInt       i,*idxb = 0;
2346   PetscScalar    *va,*vb;
2347   Vec            vtmp;
2348 
2349   PetscFunctionBegin;
2350   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2351   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2352   if (idx) {
2353     for (i=0; i<A->rmap->n; i++) {
2354       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2355     }
2356   }
2357 
2358   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2359   if (idx) {
2360     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2361   }
2362   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2363   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2364 
2365   for (i=0; i<A->rmap->n; i++) {
2366     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2367       va[i] = vb[i];
2368       if (idx) idx[i] = a->garray[idxb[i]];
2369     }
2370   }
2371 
2372   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2373   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2374   ierr = PetscFree(idxb);CHKERRQ(ierr);
2375   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2376   PetscFunctionReturn(0);
2377 }
2378 
2379 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2380 {
2381   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2382   PetscErrorCode ierr;
2383   PetscInt       i,*idxb = 0;
2384   PetscScalar    *va,*vb;
2385   Vec            vtmp;
2386 
2387   PetscFunctionBegin;
2388   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2389   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2390   if (idx) {
2391     for (i=0; i<A->cmap->n; i++) {
2392       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2393     }
2394   }
2395 
2396   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2397   if (idx) {
2398     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2399   }
2400   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2401   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2402 
2403   for (i=0; i<A->rmap->n; i++) {
2404     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2405       va[i] = vb[i];
2406       if (idx) idx[i] = a->garray[idxb[i]];
2407     }
2408   }
2409 
2410   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2411   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2412   ierr = PetscFree(idxb);CHKERRQ(ierr);
2413   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2414   PetscFunctionReturn(0);
2415 }
2416 
2417 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2418 {
2419   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2420   PetscInt       n      = A->rmap->n;
2421   PetscInt       cstart = A->cmap->rstart;
2422   PetscInt       *cmap  = mat->garray;
2423   PetscInt       *diagIdx, *offdiagIdx;
2424   Vec            diagV, offdiagV;
2425   PetscScalar    *a, *diagA, *offdiagA;
2426   PetscInt       r;
2427   PetscErrorCode ierr;
2428 
2429   PetscFunctionBegin;
2430   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2431   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2432   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2433   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2434   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2435   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2436   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2437   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2438   for (r = 0; r < n; ++r) {
2439     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2440       a[r]   = diagA[r];
2441       idx[r] = cstart + diagIdx[r];
2442     } else {
2443       a[r]   = offdiagA[r];
2444       idx[r] = cmap[offdiagIdx[r]];
2445     }
2446   }
2447   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2448   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2449   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2450   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2451   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2452   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2457 {
2458   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2459   PetscInt       n      = A->rmap->n;
2460   PetscInt       cstart = A->cmap->rstart;
2461   PetscInt       *cmap  = mat->garray;
2462   PetscInt       *diagIdx, *offdiagIdx;
2463   Vec            diagV, offdiagV;
2464   PetscScalar    *a, *diagA, *offdiagA;
2465   PetscInt       r;
2466   PetscErrorCode ierr;
2467 
2468   PetscFunctionBegin;
2469   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2470   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2471   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2472   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2473   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2474   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2475   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2476   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2477   for (r = 0; r < n; ++r) {
2478     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2479       a[r]   = diagA[r];
2480       idx[r] = cstart + diagIdx[r];
2481     } else {
2482       a[r]   = offdiagA[r];
2483       idx[r] = cmap[offdiagIdx[r]];
2484     }
2485   }
2486   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2487   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2488   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2489   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2490   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2491   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2492   PetscFunctionReturn(0);
2493 }
2494 
2495 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2496 {
2497   PetscErrorCode ierr;
2498   Mat            *dummy;
2499 
2500   PetscFunctionBegin;
2501   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2502   *newmat = *dummy;
2503   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2504   PetscFunctionReturn(0);
2505 }
2506 
2507 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2508 {
2509   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2510   PetscErrorCode ierr;
2511 
2512   PetscFunctionBegin;
2513   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2514   A->factorerrortype = a->A->factorerrortype;
2515   PetscFunctionReturn(0);
2516 }
2517 
2518 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2519 {
2520   PetscErrorCode ierr;
2521   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2522 
2523   PetscFunctionBegin;
2524   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2525   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2526   if (x->assembled) {
2527     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2528   } else {
2529     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2530   }
2531   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2532   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2533   PetscFunctionReturn(0);
2534 }
2535 
2536 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2537 {
2538   PetscFunctionBegin;
2539   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2540   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2541   PetscFunctionReturn(0);
2542 }
2543 
2544 /*@
2545    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2546 
2547    Collective on Mat
2548 
2549    Input Parameters:
2550 +    A - the matrix
2551 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2552 
2553  Level: advanced
2554 
2555 @*/
2556 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2557 {
2558   PetscErrorCode       ierr;
2559 
2560   PetscFunctionBegin;
2561   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2566 {
2567   PetscErrorCode       ierr;
2568   PetscBool            sc = PETSC_FALSE,flg;
2569 
2570   PetscFunctionBegin;
2571   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2572   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2573   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2574   if (flg) {
2575     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2576   }
2577   ierr = PetscOptionsTail();CHKERRQ(ierr);
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2582 {
2583   PetscErrorCode ierr;
2584   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2585   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2586 
2587   PetscFunctionBegin;
2588   if (!Y->preallocated) {
2589     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2590   } else if (!aij->nz) {
2591     PetscInt nonew = aij->nonew;
2592     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2593     aij->nonew = nonew;
2594   }
2595   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2596   PetscFunctionReturn(0);
2597 }
2598 
2599 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2600 {
2601   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2602   PetscErrorCode ierr;
2603 
2604   PetscFunctionBegin;
2605   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2606   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2607   if (d) {
2608     PetscInt rstart;
2609     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2610     *d += rstart;
2611 
2612   }
2613   PetscFunctionReturn(0);
2614 }
2615 
2616 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2617 {
2618   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2619   PetscErrorCode ierr;
2620 
2621   PetscFunctionBegin;
2622   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2623   PetscFunctionReturn(0);
2624 }
2625 
2626 /* -------------------------------------------------------------------*/
2627 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2628                                        MatGetRow_MPIAIJ,
2629                                        MatRestoreRow_MPIAIJ,
2630                                        MatMult_MPIAIJ,
2631                                 /* 4*/ MatMultAdd_MPIAIJ,
2632                                        MatMultTranspose_MPIAIJ,
2633                                        MatMultTransposeAdd_MPIAIJ,
2634                                        0,
2635                                        0,
2636                                        0,
2637                                 /*10*/ 0,
2638                                        0,
2639                                        0,
2640                                        MatSOR_MPIAIJ,
2641                                        MatTranspose_MPIAIJ,
2642                                 /*15*/ MatGetInfo_MPIAIJ,
2643                                        MatEqual_MPIAIJ,
2644                                        MatGetDiagonal_MPIAIJ,
2645                                        MatDiagonalScale_MPIAIJ,
2646                                        MatNorm_MPIAIJ,
2647                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2648                                        MatAssemblyEnd_MPIAIJ,
2649                                        MatSetOption_MPIAIJ,
2650                                        MatZeroEntries_MPIAIJ,
2651                                 /*24*/ MatZeroRows_MPIAIJ,
2652                                        0,
2653                                        0,
2654                                        0,
2655                                        0,
2656                                 /*29*/ MatSetUp_MPIAIJ,
2657                                        0,
2658                                        0,
2659                                        MatGetDiagonalBlock_MPIAIJ,
2660                                        0,
2661                                 /*34*/ MatDuplicate_MPIAIJ,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                        0,
2666                                 /*39*/ MatAXPY_MPIAIJ,
2667                                        MatCreateSubMatrices_MPIAIJ,
2668                                        MatIncreaseOverlap_MPIAIJ,
2669                                        MatGetValues_MPIAIJ,
2670                                        MatCopy_MPIAIJ,
2671                                 /*44*/ MatGetRowMax_MPIAIJ,
2672                                        MatScale_MPIAIJ,
2673                                        MatShift_MPIAIJ,
2674                                        MatDiagonalSet_MPIAIJ,
2675                                        MatZeroRowsColumns_MPIAIJ,
2676                                 /*49*/ MatSetRandom_MPIAIJ,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                        0,
2681                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2682                                        0,
2683                                        MatSetUnfactored_MPIAIJ,
2684                                        MatPermute_MPIAIJ,
2685                                        0,
2686                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2687                                        MatDestroy_MPIAIJ,
2688                                        MatView_MPIAIJ,
2689                                        0,
2690                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2691                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2692                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2693                                        0,
2694                                        0,
2695                                        0,
2696                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2697                                        MatGetRowMinAbs_MPIAIJ,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                        0,
2702                                 /*75*/ MatFDColoringApply_AIJ,
2703                                        MatSetFromOptions_MPIAIJ,
2704                                        0,
2705                                        0,
2706                                        MatFindZeroDiagonals_MPIAIJ,
2707                                 /*80*/ 0,
2708                                        0,
2709                                        0,
2710                                 /*83*/ MatLoad_MPIAIJ,
2711                                        MatIsSymmetric_MPIAIJ,
2712                                        0,
2713                                        0,
2714                                        0,
2715                                        0,
2716                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2717                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2718                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2719                                        MatPtAP_MPIAIJ_MPIAIJ,
2720                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2721                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2722                                        0,
2723                                        0,
2724                                        0,
2725                                        MatPinToCPU_MPIAIJ,
2726                                 /*99*/ 0,
2727                                        0,
2728                                        0,
2729                                        MatConjugate_MPIAIJ,
2730                                        0,
2731                                 /*104*/MatSetValuesRow_MPIAIJ,
2732                                        MatRealPart_MPIAIJ,
2733                                        MatImaginaryPart_MPIAIJ,
2734                                        0,
2735                                        0,
2736                                 /*109*/0,
2737                                        0,
2738                                        MatGetRowMin_MPIAIJ,
2739                                        0,
2740                                        MatMissingDiagonal_MPIAIJ,
2741                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2742                                        0,
2743                                        MatGetGhosts_MPIAIJ,
2744                                        0,
2745                                        0,
2746                                 /*119*/0,
2747                                        0,
2748                                        0,
2749                                        0,
2750                                        MatGetMultiProcBlock_MPIAIJ,
2751                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2752                                        MatGetColumnNorms_MPIAIJ,
2753                                        MatInvertBlockDiagonal_MPIAIJ,
2754                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2755                                        MatCreateSubMatricesMPI_MPIAIJ,
2756                                 /*129*/0,
2757                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2758                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2759                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2760                                        0,
2761                                 /*134*/0,
2762                                        0,
2763                                        MatRARt_MPIAIJ_MPIAIJ,
2764                                        0,
2765                                        0,
2766                                 /*139*/MatSetBlockSizes_MPIAIJ,
2767                                        0,
2768                                        0,
2769                                        MatFDColoringSetUp_MPIXAIJ,
2770                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2771                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2772 };
2773 
2774 /* ----------------------------------------------------------------------------------------*/
2775 
2776 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2777 {
2778   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2779   PetscErrorCode ierr;
2780 
2781   PetscFunctionBegin;
2782   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2783   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2784   PetscFunctionReturn(0);
2785 }
2786 
2787 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2788 {
2789   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2790   PetscErrorCode ierr;
2791 
2792   PetscFunctionBegin;
2793   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2794   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2795   PetscFunctionReturn(0);
2796 }
2797 
2798 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2799 {
2800   Mat_MPIAIJ     *b;
2801   PetscErrorCode ierr;
2802   PetscMPIInt    size;
2803 
2804   PetscFunctionBegin;
2805   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2806   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2807   b = (Mat_MPIAIJ*)B->data;
2808 
2809 #if defined(PETSC_USE_CTABLE)
2810   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2811 #else
2812   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2813 #endif
2814   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2815   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2816   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2817 
2818   /* Because the B will have been resized we simply destroy it and create a new one each time */
2819   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2820   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2821   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2822   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2823   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2824   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2825   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2826 
2827   if (!B->preallocated) {
2828     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2829     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2830     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2831     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2832     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2833   }
2834 
2835   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2836   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2837   B->preallocated  = PETSC_TRUE;
2838   B->was_assembled = PETSC_FALSE;
2839   B->assembled     = PETSC_FALSE;
2840   PetscFunctionReturn(0);
2841 }
2842 
2843 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2844 {
2845   Mat_MPIAIJ     *b;
2846   PetscErrorCode ierr;
2847 
2848   PetscFunctionBegin;
2849   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2850   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2851   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2852   b = (Mat_MPIAIJ*)B->data;
2853 
2854 #if defined(PETSC_USE_CTABLE)
2855   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2856 #else
2857   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2858 #endif
2859   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2860   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2861   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2862 
2863   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2864   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2865   B->preallocated  = PETSC_TRUE;
2866   B->was_assembled = PETSC_FALSE;
2867   B->assembled = PETSC_FALSE;
2868   PetscFunctionReturn(0);
2869 }
2870 
2871 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2872 {
2873   Mat            mat;
2874   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2875   PetscErrorCode ierr;
2876 
2877   PetscFunctionBegin;
2878   *newmat = 0;
2879   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2880   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2881   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2882   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2883   a       = (Mat_MPIAIJ*)mat->data;
2884 
2885   mat->factortype   = matin->factortype;
2886   mat->assembled    = PETSC_TRUE;
2887   mat->insertmode   = NOT_SET_VALUES;
2888   mat->preallocated = PETSC_TRUE;
2889 
2890   a->size         = oldmat->size;
2891   a->rank         = oldmat->rank;
2892   a->donotstash   = oldmat->donotstash;
2893   a->roworiented  = oldmat->roworiented;
2894   a->rowindices   = 0;
2895   a->rowvalues    = 0;
2896   a->getrowactive = PETSC_FALSE;
2897 
2898   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2899   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2900 
2901   if (oldmat->colmap) {
2902 #if defined(PETSC_USE_CTABLE)
2903     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2904 #else
2905     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2906     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2907     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2908 #endif
2909   } else a->colmap = 0;
2910   if (oldmat->garray) {
2911     PetscInt len;
2912     len  = oldmat->B->cmap->n;
2913     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2914     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2915     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2916   } else a->garray = 0;
2917 
2918   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2919   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2920   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2921   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2922 
2923   if (oldmat->Mvctx_mpi1) {
2924     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2925     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2926   }
2927 
2928   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2929   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2930   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2931   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2932   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2933   *newmat = mat;
2934   PetscFunctionReturn(0);
2935 }
2936 
2937 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2938 {
2939   PetscBool      isbinary, ishdf5;
2940   PetscErrorCode ierr;
2941 
2942   PetscFunctionBegin;
2943   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2944   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2945   /* force binary viewer to load .info file if it has not yet done so */
2946   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2947   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2948   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2949   if (isbinary) {
2950     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2951   } else if (ishdf5) {
2952 #if defined(PETSC_HAVE_HDF5)
2953     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2954 #else
2955     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2956 #endif
2957   } else {
2958     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2959   }
2960   PetscFunctionReturn(0);
2961 }
2962 
2963 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2964 {
2965   PetscScalar    *vals,*svals;
2966   MPI_Comm       comm;
2967   PetscErrorCode ierr;
2968   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2969   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2970   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2971   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2972   PetscInt       cend,cstart,n,*rowners;
2973   int            fd;
2974   PetscInt       bs = newMat->rmap->bs;
2975 
2976   PetscFunctionBegin;
2977   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2978   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2979   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2980   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2981   if (!rank) {
2982     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2983     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2984     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2985   }
2986 
2987   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2988   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2989   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2990   if (bs < 0) bs = 1;
2991 
2992   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2993   M    = header[1]; N = header[2];
2994 
2995   /* If global sizes are set, check if they are consistent with that given in the file */
2996   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2997   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2998 
2999   /* determine ownership of all (block) rows */
3000   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3001   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3002   else m = newMat->rmap->n; /* Set by user */
3003 
3004   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
3005   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3006 
3007   /* First process needs enough room for process with most rows */
3008   if (!rank) {
3009     mmax = rowners[1];
3010     for (i=2; i<=size; i++) {
3011       mmax = PetscMax(mmax, rowners[i]);
3012     }
3013   } else mmax = -1;             /* unused, but compilers complain */
3014 
3015   rowners[0] = 0;
3016   for (i=2; i<=size; i++) {
3017     rowners[i] += rowners[i-1];
3018   }
3019   rstart = rowners[rank];
3020   rend   = rowners[rank+1];
3021 
3022   /* distribute row lengths to all processors */
3023   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3024   if (!rank) {
3025     ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr);
3026     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3027     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3028     for (j=0; j<m; j++) {
3029       procsnz[0] += ourlens[j];
3030     }
3031     for (i=1; i<size; i++) {
3032       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr);
3033       /* calculate the number of nonzeros on each processor */
3034       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3035         procsnz[i] += rowlengths[j];
3036       }
3037       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3038     }
3039     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3040   } else {
3041     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3042   }
3043 
3044   if (!rank) {
3045     /* determine max buffer needed and allocate it */
3046     maxnz = 0;
3047     for (i=0; i<size; i++) {
3048       maxnz = PetscMax(maxnz,procsnz[i]);
3049     }
3050     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3051 
3052     /* read in my part of the matrix column indices  */
3053     nz   = procsnz[0];
3054     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3055     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3056 
3057     /* read in every one elses and ship off */
3058     for (i=1; i<size; i++) {
3059       nz   = procsnz[i];
3060       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3061       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3062     }
3063     ierr = PetscFree(cols);CHKERRQ(ierr);
3064   } else {
3065     /* determine buffer space needed for message */
3066     nz = 0;
3067     for (i=0; i<m; i++) {
3068       nz += ourlens[i];
3069     }
3070     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3071 
3072     /* receive message of column indices*/
3073     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3074   }
3075 
3076   /* determine column ownership if matrix is not square */
3077   if (N != M) {
3078     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3079     else n = newMat->cmap->n;
3080     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3081     cstart = cend - n;
3082   } else {
3083     cstart = rstart;
3084     cend   = rend;
3085     n      = cend - cstart;
3086   }
3087 
3088   /* loop over local rows, determining number of off diagonal entries */
3089   ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr);
3090   jj   = 0;
3091   for (i=0; i<m; i++) {
3092     for (j=0; j<ourlens[i]; j++) {
3093       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3094       jj++;
3095     }
3096   }
3097 
3098   for (i=0; i<m; i++) {
3099     ourlens[i] -= offlens[i];
3100   }
3101   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3102 
3103   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3104 
3105   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3106 
3107   for (i=0; i<m; i++) {
3108     ourlens[i] += offlens[i];
3109   }
3110 
3111   if (!rank) {
3112     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3113 
3114     /* read in my part of the matrix numerical values  */
3115     nz   = procsnz[0];
3116     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3117 
3118     /* insert into matrix */
3119     jj      = rstart;
3120     smycols = mycols;
3121     svals   = vals;
3122     for (i=0; i<m; i++) {
3123       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3124       smycols += ourlens[i];
3125       svals   += ourlens[i];
3126       jj++;
3127     }
3128 
3129     /* read in other processors and ship out */
3130     for (i=1; i<size; i++) {
3131       nz   = procsnz[i];
3132       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3133       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3134     }
3135     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3136   } else {
3137     /* receive numeric values */
3138     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3139 
3140     /* receive message of values*/
3141     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3142 
3143     /* insert into matrix */
3144     jj      = rstart;
3145     smycols = mycols;
3146     svals   = vals;
3147     for (i=0; i<m; i++) {
3148       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3149       smycols += ourlens[i];
3150       svals   += ourlens[i];
3151       jj++;
3152     }
3153   }
3154   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3155   ierr = PetscFree(vals);CHKERRQ(ierr);
3156   ierr = PetscFree(mycols);CHKERRQ(ierr);
3157   ierr = PetscFree(rowners);CHKERRQ(ierr);
3158   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3159   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3160   PetscFunctionReturn(0);
3161 }
3162 
3163 /* Not scalable because of ISAllGather() unless getting all columns. */
3164 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3165 {
3166   PetscErrorCode ierr;
3167   IS             iscol_local;
3168   PetscBool      isstride;
3169   PetscMPIInt    lisstride=0,gisstride;
3170 
3171   PetscFunctionBegin;
3172   /* check if we are grabbing all columns*/
3173   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3174 
3175   if (isstride) {
3176     PetscInt  start,len,mstart,mlen;
3177     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3178     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3179     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3180     if (mstart == start && mlen-mstart == len) lisstride = 1;
3181   }
3182 
3183   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3184   if (gisstride) {
3185     PetscInt N;
3186     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3187     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3188     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3189     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3190   } else {
3191     PetscInt cbs;
3192     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3193     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3194     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3195   }
3196 
3197   *isseq = iscol_local;
3198   PetscFunctionReturn(0);
3199 }
3200 
3201 /*
3202  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3203  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3204 
3205  Input Parameters:
3206    mat - matrix
3207    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3208            i.e., mat->rstart <= isrow[i] < mat->rend
3209    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3210            i.e., mat->cstart <= iscol[i] < mat->cend
3211  Output Parameter:
3212    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3213    iscol_o - sequential column index set for retrieving mat->B
3214    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3215  */
3216 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3217 {
3218   PetscErrorCode ierr;
3219   Vec            x,cmap;
3220   const PetscInt *is_idx;
3221   PetscScalar    *xarray,*cmaparray;
3222   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3223   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3224   Mat            B=a->B;
3225   Vec            lvec=a->lvec,lcmap;
3226   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3227   MPI_Comm       comm;
3228   VecScatter     Mvctx=a->Mvctx;
3229 
3230   PetscFunctionBegin;
3231   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3232   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3233 
3234   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3235   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3236   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3237   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3238   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3239 
3240   /* Get start indices */
3241   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3242   isstart -= ncols;
3243   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3244 
3245   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3246   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3247   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3248   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3249   for (i=0; i<ncols; i++) {
3250     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3251     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3252     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3253   }
3254   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3255   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3256   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3257 
3258   /* Get iscol_d */
3259   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3260   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3261   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3262 
3263   /* Get isrow_d */
3264   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3265   rstart = mat->rmap->rstart;
3266   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3267   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3268   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3269   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3270 
3271   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3272   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3273   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3274 
3275   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3276   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3277   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3278 
3279   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3280 
3281   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3282   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3283 
3284   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3285   /* off-process column indices */
3286   count = 0;
3287   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3288   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3289 
3290   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3291   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3292   for (i=0; i<Bn; i++) {
3293     if (PetscRealPart(xarray[i]) > -1.0) {
3294       idx[count]     = i;                   /* local column index in off-diagonal part B */
3295       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3296       count++;
3297     }
3298   }
3299   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3300   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3301 
3302   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3303   /* cannot ensure iscol_o has same blocksize as iscol! */
3304 
3305   ierr = PetscFree(idx);CHKERRQ(ierr);
3306   *garray = cmap1;
3307 
3308   ierr = VecDestroy(&x);CHKERRQ(ierr);
3309   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3310   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3311   PetscFunctionReturn(0);
3312 }
3313 
3314 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3315 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3316 {
3317   PetscErrorCode ierr;
3318   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3319   Mat            M = NULL;
3320   MPI_Comm       comm;
3321   IS             iscol_d,isrow_d,iscol_o;
3322   Mat            Asub = NULL,Bsub = NULL;
3323   PetscInt       n;
3324 
3325   PetscFunctionBegin;
3326   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3327 
3328   if (call == MAT_REUSE_MATRIX) {
3329     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3330     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3331     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3332 
3333     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3334     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3335 
3336     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3337     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3338 
3339     /* Update diagonal and off-diagonal portions of submat */
3340     asub = (Mat_MPIAIJ*)(*submat)->data;
3341     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3342     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3343     if (n) {
3344       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3345     }
3346     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3347     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3348 
3349   } else { /* call == MAT_INITIAL_MATRIX) */
3350     const PetscInt *garray;
3351     PetscInt        BsubN;
3352 
3353     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3354     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3355 
3356     /* Create local submatrices Asub and Bsub */
3357     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3358     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3359 
3360     /* Create submatrix M */
3361     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3362 
3363     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3364     asub = (Mat_MPIAIJ*)M->data;
3365 
3366     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3367     n = asub->B->cmap->N;
3368     if (BsubN > n) {
3369       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3370       const PetscInt *idx;
3371       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3372       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3373 
3374       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3375       j = 0;
3376       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3377       for (i=0; i<n; i++) {
3378         if (j >= BsubN) break;
3379         while (subgarray[i] > garray[j]) j++;
3380 
3381         if (subgarray[i] == garray[j]) {
3382           idx_new[i] = idx[j++];
3383         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3384       }
3385       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3386 
3387       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3388       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3389 
3390     } else if (BsubN < n) {
3391       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3392     }
3393 
3394     ierr = PetscFree(garray);CHKERRQ(ierr);
3395     *submat = M;
3396 
3397     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3398     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3399     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3400 
3401     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3402     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3403 
3404     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3405     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3406   }
3407   PetscFunctionReturn(0);
3408 }
3409 
3410 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3411 {
3412   PetscErrorCode ierr;
3413   IS             iscol_local=NULL,isrow_d;
3414   PetscInt       csize;
3415   PetscInt       n,i,j,start,end;
3416   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3417   MPI_Comm       comm;
3418 
3419   PetscFunctionBegin;
3420   /* If isrow has same processor distribution as mat,
3421      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3422   if (call == MAT_REUSE_MATRIX) {
3423     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3424     if (isrow_d) {
3425       sameRowDist  = PETSC_TRUE;
3426       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3427     } else {
3428       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3429       if (iscol_local) {
3430         sameRowDist  = PETSC_TRUE;
3431         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3432       }
3433     }
3434   } else {
3435     /* Check if isrow has same processor distribution as mat */
3436     sameDist[0] = PETSC_FALSE;
3437     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3438     if (!n) {
3439       sameDist[0] = PETSC_TRUE;
3440     } else {
3441       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3442       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3443       if (i >= start && j < end) {
3444         sameDist[0] = PETSC_TRUE;
3445       }
3446     }
3447 
3448     /* Check if iscol has same processor distribution as mat */
3449     sameDist[1] = PETSC_FALSE;
3450     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3451     if (!n) {
3452       sameDist[1] = PETSC_TRUE;
3453     } else {
3454       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3455       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3456       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3457     }
3458 
3459     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3460     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3461     sameRowDist = tsameDist[0];
3462   }
3463 
3464   if (sameRowDist) {
3465     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3466       /* isrow and iscol have same processor distribution as mat */
3467       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3468       PetscFunctionReturn(0);
3469     } else { /* sameRowDist */
3470       /* isrow has same processor distribution as mat */
3471       if (call == MAT_INITIAL_MATRIX) {
3472         PetscBool sorted;
3473         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3474         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3475         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3476         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3477 
3478         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3479         if (sorted) {
3480           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3481           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3482           PetscFunctionReturn(0);
3483         }
3484       } else { /* call == MAT_REUSE_MATRIX */
3485         IS    iscol_sub;
3486         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3487         if (iscol_sub) {
3488           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3489           PetscFunctionReturn(0);
3490         }
3491       }
3492     }
3493   }
3494 
3495   /* General case: iscol -> iscol_local which has global size of iscol */
3496   if (call == MAT_REUSE_MATRIX) {
3497     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3498     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3499   } else {
3500     if (!iscol_local) {
3501       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3502     }
3503   }
3504 
3505   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3506   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3507 
3508   if (call == MAT_INITIAL_MATRIX) {
3509     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3510     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3511   }
3512   PetscFunctionReturn(0);
3513 }
3514 
3515 /*@C
3516      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3517          and "off-diagonal" part of the matrix in CSR format.
3518 
3519    Collective
3520 
3521    Input Parameters:
3522 +  comm - MPI communicator
3523 .  A - "diagonal" portion of matrix
3524 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3525 -  garray - global index of B columns
3526 
3527    Output Parameter:
3528 .   mat - the matrix, with input A as its local diagonal matrix
3529    Level: advanced
3530 
3531    Notes:
3532        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3533        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3534 
3535 .seealso: MatCreateMPIAIJWithSplitArrays()
3536 @*/
3537 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3538 {
3539   PetscErrorCode ierr;
3540   Mat_MPIAIJ     *maij;
3541   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3542   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3543   PetscScalar    *oa=b->a;
3544   Mat            Bnew;
3545   PetscInt       m,n,N;
3546 
3547   PetscFunctionBegin;
3548   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3549   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3550   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3551   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3552   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3553   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3554 
3555   /* Get global columns of mat */
3556   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3557 
3558   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3559   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3560   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3561   maij = (Mat_MPIAIJ*)(*mat)->data;
3562 
3563   (*mat)->preallocated = PETSC_TRUE;
3564 
3565   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3566   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3567 
3568   /* Set A as diagonal portion of *mat */
3569   maij->A = A;
3570 
3571   nz = oi[m];
3572   for (i=0; i<nz; i++) {
3573     col   = oj[i];
3574     oj[i] = garray[col];
3575   }
3576 
3577    /* Set Bnew as off-diagonal portion of *mat */
3578   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3579   bnew        = (Mat_SeqAIJ*)Bnew->data;
3580   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3581   maij->B     = Bnew;
3582 
3583   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3584 
3585   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3586   b->free_a       = PETSC_FALSE;
3587   b->free_ij      = PETSC_FALSE;
3588   ierr = MatDestroy(&B);CHKERRQ(ierr);
3589 
3590   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3591   bnew->free_a       = PETSC_TRUE;
3592   bnew->free_ij      = PETSC_TRUE;
3593 
3594   /* condense columns of maij->B */
3595   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3596   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3597   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3598   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3599   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3600   PetscFunctionReturn(0);
3601 }
3602 
3603 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3604 
3605 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3606 {
3607   PetscErrorCode ierr;
3608   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3609   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3610   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3611   Mat            M,Msub,B=a->B;
3612   MatScalar      *aa;
3613   Mat_SeqAIJ     *aij;
3614   PetscInt       *garray = a->garray,*colsub,Ncols;
3615   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3616   IS             iscol_sub,iscmap;
3617   const PetscInt *is_idx,*cmap;
3618   PetscBool      allcolumns=PETSC_FALSE;
3619   MPI_Comm       comm;
3620 
3621   PetscFunctionBegin;
3622   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3623 
3624   if (call == MAT_REUSE_MATRIX) {
3625     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3626     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3627     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3628 
3629     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3630     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3631 
3632     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3633     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3634 
3635     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3636 
3637   } else { /* call == MAT_INITIAL_MATRIX) */
3638     PetscBool flg;
3639 
3640     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3641     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3642 
3643     /* (1) iscol -> nonscalable iscol_local */
3644     /* Check for special case: each processor gets entire matrix columns */
3645     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3646     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3647     if (allcolumns) {
3648       iscol_sub = iscol_local;
3649       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3650       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3651 
3652     } else {
3653       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3654       PetscInt *idx,*cmap1,k;
3655       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3656       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3657       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3658       count = 0;
3659       k     = 0;
3660       for (i=0; i<Ncols; i++) {
3661         j = is_idx[i];
3662         if (j >= cstart && j < cend) {
3663           /* diagonal part of mat */
3664           idx[count]     = j;
3665           cmap1[count++] = i; /* column index in submat */
3666         } else if (Bn) {
3667           /* off-diagonal part of mat */
3668           if (j == garray[k]) {
3669             idx[count]     = j;
3670             cmap1[count++] = i;  /* column index in submat */
3671           } else if (j > garray[k]) {
3672             while (j > garray[k] && k < Bn-1) k++;
3673             if (j == garray[k]) {
3674               idx[count]     = j;
3675               cmap1[count++] = i; /* column index in submat */
3676             }
3677           }
3678         }
3679       }
3680       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3681 
3682       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3683       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3684       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3685 
3686       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3687     }
3688 
3689     /* (3) Create sequential Msub */
3690     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3691   }
3692 
3693   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3694   aij  = (Mat_SeqAIJ*)(Msub)->data;
3695   ii   = aij->i;
3696   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3697 
3698   /*
3699       m - number of local rows
3700       Ncols - number of columns (same on all processors)
3701       rstart - first row in new global matrix generated
3702   */
3703   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3704 
3705   if (call == MAT_INITIAL_MATRIX) {
3706     /* (4) Create parallel newmat */
3707     PetscMPIInt    rank,size;
3708     PetscInt       csize;
3709 
3710     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3711     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3712 
3713     /*
3714         Determine the number of non-zeros in the diagonal and off-diagonal
3715         portions of the matrix in order to do correct preallocation
3716     */
3717 
3718     /* first get start and end of "diagonal" columns */
3719     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3720     if (csize == PETSC_DECIDE) {
3721       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3722       if (mglobal == Ncols) { /* square matrix */
3723         nlocal = m;
3724       } else {
3725         nlocal = Ncols/size + ((Ncols % size) > rank);
3726       }
3727     } else {
3728       nlocal = csize;
3729     }
3730     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3731     rstart = rend - nlocal;
3732     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3733 
3734     /* next, compute all the lengths */
3735     jj    = aij->j;
3736     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3737     olens = dlens + m;
3738     for (i=0; i<m; i++) {
3739       jend = ii[i+1] - ii[i];
3740       olen = 0;
3741       dlen = 0;
3742       for (j=0; j<jend; j++) {
3743         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3744         else dlen++;
3745         jj++;
3746       }
3747       olens[i] = olen;
3748       dlens[i] = dlen;
3749     }
3750 
3751     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3752     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3753 
3754     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3755     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3756     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3757     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3758     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3759     ierr = PetscFree(dlens);CHKERRQ(ierr);
3760 
3761   } else { /* call == MAT_REUSE_MATRIX */
3762     M    = *newmat;
3763     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3764     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3765     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3766     /*
3767          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3768        rather than the slower MatSetValues().
3769     */
3770     M->was_assembled = PETSC_TRUE;
3771     M->assembled     = PETSC_FALSE;
3772   }
3773 
3774   /* (5) Set values of Msub to *newmat */
3775   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3776   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3777 
3778   jj   = aij->j;
3779   aa   = aij->a;
3780   for (i=0; i<m; i++) {
3781     row = rstart + i;
3782     nz  = ii[i+1] - ii[i];
3783     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3784     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3785     jj += nz; aa += nz;
3786   }
3787   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3788 
3789   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3790   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3791 
3792   ierr = PetscFree(colsub);CHKERRQ(ierr);
3793 
3794   /* save Msub, iscol_sub and iscmap used in processor for next request */
3795   if (call ==  MAT_INITIAL_MATRIX) {
3796     *newmat = M;
3797     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3798     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3799 
3800     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3801     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3802 
3803     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3804     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3805 
3806     if (iscol_local) {
3807       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3808       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3809     }
3810   }
3811   PetscFunctionReturn(0);
3812 }
3813 
3814 /*
3815     Not great since it makes two copies of the submatrix, first an SeqAIJ
3816   in local and then by concatenating the local matrices the end result.
3817   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3818 
3819   Note: This requires a sequential iscol with all indices.
3820 */
3821 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3822 {
3823   PetscErrorCode ierr;
3824   PetscMPIInt    rank,size;
3825   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3826   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3827   Mat            M,Mreuse;
3828   MatScalar      *aa,*vwork;
3829   MPI_Comm       comm;
3830   Mat_SeqAIJ     *aij;
3831   PetscBool      colflag,allcolumns=PETSC_FALSE;
3832 
3833   PetscFunctionBegin;
3834   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3835   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3836   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3837 
3838   /* Check for special case: each processor gets entire matrix columns */
3839   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3840   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3841   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3842 
3843   if (call ==  MAT_REUSE_MATRIX) {
3844     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3845     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3846     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3847   } else {
3848     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3849   }
3850 
3851   /*
3852       m - number of local rows
3853       n - number of columns (same on all processors)
3854       rstart - first row in new global matrix generated
3855   */
3856   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3857   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3858   if (call == MAT_INITIAL_MATRIX) {
3859     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3860     ii  = aij->i;
3861     jj  = aij->j;
3862 
3863     /*
3864         Determine the number of non-zeros in the diagonal and off-diagonal
3865         portions of the matrix in order to do correct preallocation
3866     */
3867 
3868     /* first get start and end of "diagonal" columns */
3869     if (csize == PETSC_DECIDE) {
3870       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3871       if (mglobal == n) { /* square matrix */
3872         nlocal = m;
3873       } else {
3874         nlocal = n/size + ((n % size) > rank);
3875       }
3876     } else {
3877       nlocal = csize;
3878     }
3879     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3880     rstart = rend - nlocal;
3881     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3882 
3883     /* next, compute all the lengths */
3884     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3885     olens = dlens + m;
3886     for (i=0; i<m; i++) {
3887       jend = ii[i+1] - ii[i];
3888       olen = 0;
3889       dlen = 0;
3890       for (j=0; j<jend; j++) {
3891         if (*jj < rstart || *jj >= rend) olen++;
3892         else dlen++;
3893         jj++;
3894       }
3895       olens[i] = olen;
3896       dlens[i] = dlen;
3897     }
3898     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3899     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3900     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3901     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3902     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3903     ierr = PetscFree(dlens);CHKERRQ(ierr);
3904   } else {
3905     PetscInt ml,nl;
3906 
3907     M    = *newmat;
3908     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3909     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3910     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3911     /*
3912          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3913        rather than the slower MatSetValues().
3914     */
3915     M->was_assembled = PETSC_TRUE;
3916     M->assembled     = PETSC_FALSE;
3917   }
3918   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3919   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3920   ii   = aij->i;
3921   jj   = aij->j;
3922   aa   = aij->a;
3923   for (i=0; i<m; i++) {
3924     row   = rstart + i;
3925     nz    = ii[i+1] - ii[i];
3926     cwork = jj;     jj += nz;
3927     vwork = aa;     aa += nz;
3928     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3929   }
3930 
3931   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3932   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3933   *newmat = M;
3934 
3935   /* save submatrix used in processor for next request */
3936   if (call ==  MAT_INITIAL_MATRIX) {
3937     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3938     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3939   }
3940   PetscFunctionReturn(0);
3941 }
3942 
3943 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3944 {
3945   PetscInt       m,cstart, cend,j,nnz,i,d;
3946   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3947   const PetscInt *JJ;
3948   PetscErrorCode ierr;
3949   PetscBool      nooffprocentries;
3950 
3951   PetscFunctionBegin;
3952   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3953 
3954   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3955   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3956   m      = B->rmap->n;
3957   cstart = B->cmap->rstart;
3958   cend   = B->cmap->rend;
3959   rstart = B->rmap->rstart;
3960 
3961   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3962 
3963 #if defined(PETSC_USE_DEBUG)
3964   for (i=0; i<m; i++) {
3965     nnz = Ii[i+1]- Ii[i];
3966     JJ  = J + Ii[i];
3967     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3968     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3969     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3970   }
3971 #endif
3972 
3973   for (i=0; i<m; i++) {
3974     nnz     = Ii[i+1]- Ii[i];
3975     JJ      = J + Ii[i];
3976     nnz_max = PetscMax(nnz_max,nnz);
3977     d       = 0;
3978     for (j=0; j<nnz; j++) {
3979       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3980     }
3981     d_nnz[i] = d;
3982     o_nnz[i] = nnz - d;
3983   }
3984   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3985   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3986 
3987   for (i=0; i<m; i++) {
3988     ii   = i + rstart;
3989     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3990   }
3991   nooffprocentries    = B->nooffprocentries;
3992   B->nooffprocentries = PETSC_TRUE;
3993   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3994   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3995   B->nooffprocentries = nooffprocentries;
3996 
3997   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3998   PetscFunctionReturn(0);
3999 }
4000 
4001 /*@
4002    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
4003    (the default parallel PETSc format).
4004 
4005    Collective
4006 
4007    Input Parameters:
4008 +  B - the matrix
4009 .  i - the indices into j for the start of each local row (starts with zero)
4010 .  j - the column indices for each local row (starts with zero)
4011 -  v - optional values in the matrix
4012 
4013    Level: developer
4014 
4015    Notes:
4016        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
4017      thus you CANNOT change the matrix entries by changing the values of v[] after you have
4018      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4019 
4020        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4021 
4022        The format which is used for the sparse matrix input, is equivalent to a
4023     row-major ordering.. i.e for the following matrix, the input data expected is
4024     as shown
4025 
4026 $        1 0 0
4027 $        2 0 3     P0
4028 $       -------
4029 $        4 5 6     P1
4030 $
4031 $     Process0 [P0]: rows_owned=[0,1]
4032 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4033 $        j =  {0,0,2}  [size = 3]
4034 $        v =  {1,2,3}  [size = 3]
4035 $
4036 $     Process1 [P1]: rows_owned=[2]
4037 $        i =  {0,3}    [size = nrow+1  = 1+1]
4038 $        j =  {0,1,2}  [size = 3]
4039 $        v =  {4,5,6}  [size = 3]
4040 
4041 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4042           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4043 @*/
4044 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4045 {
4046   PetscErrorCode ierr;
4047 
4048   PetscFunctionBegin;
4049   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4050   PetscFunctionReturn(0);
4051 }
4052 
4053 /*@C
4054    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4055    (the default parallel PETSc format).  For good matrix assembly performance
4056    the user should preallocate the matrix storage by setting the parameters
4057    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4058    performance can be increased by more than a factor of 50.
4059 
4060    Collective
4061 
4062    Input Parameters:
4063 +  B - the matrix
4064 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4065            (same value is used for all local rows)
4066 .  d_nnz - array containing the number of nonzeros in the various rows of the
4067            DIAGONAL portion of the local submatrix (possibly different for each row)
4068            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4069            The size of this array is equal to the number of local rows, i.e 'm'.
4070            For matrices that will be factored, you must leave room for (and set)
4071            the diagonal entry even if it is zero.
4072 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4073            submatrix (same value is used for all local rows).
4074 -  o_nnz - array containing the number of nonzeros in the various rows of the
4075            OFF-DIAGONAL portion of the local submatrix (possibly different for
4076            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4077            structure. The size of this array is equal to the number
4078            of local rows, i.e 'm'.
4079 
4080    If the *_nnz parameter is given then the *_nz parameter is ignored
4081 
4082    The AIJ format (also called the Yale sparse matrix format or
4083    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4084    storage.  The stored row and column indices begin with zero.
4085    See Users-Manual: ch_mat for details.
4086 
4087    The parallel matrix is partitioned such that the first m0 rows belong to
4088    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4089    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4090 
4091    The DIAGONAL portion of the local submatrix of a processor can be defined
4092    as the submatrix which is obtained by extraction the part corresponding to
4093    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4094    first row that belongs to the processor, r2 is the last row belonging to
4095    the this processor, and c1-c2 is range of indices of the local part of a
4096    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4097    common case of a square matrix, the row and column ranges are the same and
4098    the DIAGONAL part is also square. The remaining portion of the local
4099    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4100 
4101    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4102 
4103    You can call MatGetInfo() to get information on how effective the preallocation was;
4104    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4105    You can also run with the option -info and look for messages with the string
4106    malloc in them to see if additional memory allocation was needed.
4107 
4108    Example usage:
4109 
4110    Consider the following 8x8 matrix with 34 non-zero values, that is
4111    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4112    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4113    as follows:
4114 
4115 .vb
4116             1  2  0  |  0  3  0  |  0  4
4117     Proc0   0  5  6  |  7  0  0  |  8  0
4118             9  0 10  | 11  0  0  | 12  0
4119     -------------------------------------
4120            13  0 14  | 15 16 17  |  0  0
4121     Proc1   0 18  0  | 19 20 21  |  0  0
4122             0  0  0  | 22 23  0  | 24  0
4123     -------------------------------------
4124     Proc2  25 26 27  |  0  0 28  | 29  0
4125            30  0  0  | 31 32 33  |  0 34
4126 .ve
4127 
4128    This can be represented as a collection of submatrices as:
4129 
4130 .vb
4131       A B C
4132       D E F
4133       G H I
4134 .ve
4135 
4136    Where the submatrices A,B,C are owned by proc0, D,E,F are
4137    owned by proc1, G,H,I are owned by proc2.
4138 
4139    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4140    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4141    The 'M','N' parameters are 8,8, and have the same values on all procs.
4142 
4143    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4144    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4145    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4146    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4147    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4148    matrix, ans [DF] as another SeqAIJ matrix.
4149 
4150    When d_nz, o_nz parameters are specified, d_nz storage elements are
4151    allocated for every row of the local diagonal submatrix, and o_nz
4152    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4153    One way to choose d_nz and o_nz is to use the max nonzerors per local
4154    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4155    In this case, the values of d_nz,o_nz are:
4156 .vb
4157      proc0 : dnz = 2, o_nz = 2
4158      proc1 : dnz = 3, o_nz = 2
4159      proc2 : dnz = 1, o_nz = 4
4160 .ve
4161    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4162    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4163    for proc3. i.e we are using 12+15+10=37 storage locations to store
4164    34 values.
4165 
4166    When d_nnz, o_nnz parameters are specified, the storage is specified
4167    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4168    In the above case the values for d_nnz,o_nnz are:
4169 .vb
4170      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4171      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4172      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4173 .ve
4174    Here the space allocated is sum of all the above values i.e 34, and
4175    hence pre-allocation is perfect.
4176 
4177    Level: intermediate
4178 
4179 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4180           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4181 @*/
4182 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4183 {
4184   PetscErrorCode ierr;
4185 
4186   PetscFunctionBegin;
4187   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4188   PetscValidType(B,1);
4189   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4190   PetscFunctionReturn(0);
4191 }
4192 
4193 /*@
4194      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4195          CSR format for the local rows.
4196 
4197    Collective
4198 
4199    Input Parameters:
4200 +  comm - MPI communicator
4201 .  m - number of local rows (Cannot be PETSC_DECIDE)
4202 .  n - This value should be the same as the local size used in creating the
4203        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4204        calculated if N is given) For square matrices n is almost always m.
4205 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4206 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4207 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4208 .   j - column indices
4209 -   a - matrix values
4210 
4211    Output Parameter:
4212 .   mat - the matrix
4213 
4214    Level: intermediate
4215 
4216    Notes:
4217        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4218      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4219      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4220 
4221        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4222 
4223        The format which is used for the sparse matrix input, is equivalent to a
4224     row-major ordering.. i.e for the following matrix, the input data expected is
4225     as shown
4226 
4227        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4228 
4229 $        1 0 0
4230 $        2 0 3     P0
4231 $       -------
4232 $        4 5 6     P1
4233 $
4234 $     Process0 [P0]: rows_owned=[0,1]
4235 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4236 $        j =  {0,0,2}  [size = 3]
4237 $        v =  {1,2,3}  [size = 3]
4238 $
4239 $     Process1 [P1]: rows_owned=[2]
4240 $        i =  {0,3}    [size = nrow+1  = 1+1]
4241 $        j =  {0,1,2}  [size = 3]
4242 $        v =  {4,5,6}  [size = 3]
4243 
4244 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4245           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4246 @*/
4247 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4248 {
4249   PetscErrorCode ierr;
4250 
4251   PetscFunctionBegin;
4252   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4253   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4254   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4255   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4256   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4257   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4258   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4259   PetscFunctionReturn(0);
4260 }
4261 
4262 /*@
4263      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4264          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4265 
4266    Collective
4267 
4268    Input Parameters:
4269 +  mat - the matrix
4270 .  m - number of local rows (Cannot be PETSC_DECIDE)
4271 .  n - This value should be the same as the local size used in creating the
4272        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4273        calculated if N is given) For square matrices n is almost always m.
4274 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4275 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4276 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4277 .  J - column indices
4278 -  v - matrix values
4279 
4280    Level: intermediate
4281 
4282 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4283           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4284 @*/
4285 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4286 {
4287   PetscErrorCode ierr;
4288   PetscInt       cstart,nnz,i,j;
4289   PetscInt       *ld;
4290   PetscBool      nooffprocentries;
4291   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4292   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4293   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4294   const PetscInt *Adi = Ad->i;
4295   PetscInt       ldi,Iii,md;
4296 
4297   PetscFunctionBegin;
4298   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4299   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4300   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4301   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4302 
4303   cstart = mat->cmap->rstart;
4304   if (!Aij->ld) {
4305     /* count number of entries below block diagonal */
4306     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4307     Aij->ld = ld;
4308     for (i=0; i<m; i++) {
4309       nnz  = Ii[i+1]- Ii[i];
4310       j     = 0;
4311       while  (J[j] < cstart && j < nnz) {j++;}
4312       J    += nnz;
4313       ld[i] = j;
4314     }
4315   } else {
4316     ld = Aij->ld;
4317   }
4318 
4319   for (i=0; i<m; i++) {
4320     nnz  = Ii[i+1]- Ii[i];
4321     Iii  = Ii[i];
4322     ldi  = ld[i];
4323     md   = Adi[i+1]-Adi[i];
4324     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4325     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4326     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4327     ad  += md;
4328     ao  += nnz - md;
4329   }
4330   nooffprocentries      = mat->nooffprocentries;
4331   mat->nooffprocentries = PETSC_TRUE;
4332   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4333   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4334   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4335   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4336   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4337   mat->nooffprocentries = nooffprocentries;
4338   PetscFunctionReturn(0);
4339 }
4340 
4341 /*@C
4342    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4343    (the default parallel PETSc format).  For good matrix assembly performance
4344    the user should preallocate the matrix storage by setting the parameters
4345    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4346    performance can be increased by more than a factor of 50.
4347 
4348    Collective
4349 
4350    Input Parameters:
4351 +  comm - MPI communicator
4352 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4353            This value should be the same as the local size used in creating the
4354            y vector for the matrix-vector product y = Ax.
4355 .  n - This value should be the same as the local size used in creating the
4356        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4357        calculated if N is given) For square matrices n is almost always m.
4358 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4359 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4360 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4361            (same value is used for all local rows)
4362 .  d_nnz - array containing the number of nonzeros in the various rows of the
4363            DIAGONAL portion of the local submatrix (possibly different for each row)
4364            or NULL, if d_nz is used to specify the nonzero structure.
4365            The size of this array is equal to the number of local rows, i.e 'm'.
4366 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4367            submatrix (same value is used for all local rows).
4368 -  o_nnz - array containing the number of nonzeros in the various rows of the
4369            OFF-DIAGONAL portion of the local submatrix (possibly different for
4370            each row) or NULL, if o_nz is used to specify the nonzero
4371            structure. The size of this array is equal to the number
4372            of local rows, i.e 'm'.
4373 
4374    Output Parameter:
4375 .  A - the matrix
4376 
4377    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4378    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4379    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4380 
4381    Notes:
4382    If the *_nnz parameter is given then the *_nz parameter is ignored
4383 
4384    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4385    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4386    storage requirements for this matrix.
4387 
4388    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4389    processor than it must be used on all processors that share the object for
4390    that argument.
4391 
4392    The user MUST specify either the local or global matrix dimensions
4393    (possibly both).
4394 
4395    The parallel matrix is partitioned across processors such that the
4396    first m0 rows belong to process 0, the next m1 rows belong to
4397    process 1, the next m2 rows belong to process 2 etc.. where
4398    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4399    values corresponding to [m x N] submatrix.
4400 
4401    The columns are logically partitioned with the n0 columns belonging
4402    to 0th partition, the next n1 columns belonging to the next
4403    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4404 
4405    The DIAGONAL portion of the local submatrix on any given processor
4406    is the submatrix corresponding to the rows and columns m,n
4407    corresponding to the given processor. i.e diagonal matrix on
4408    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4409    etc. The remaining portion of the local submatrix [m x (N-n)]
4410    constitute the OFF-DIAGONAL portion. The example below better
4411    illustrates this concept.
4412 
4413    For a square global matrix we define each processor's diagonal portion
4414    to be its local rows and the corresponding columns (a square submatrix);
4415    each processor's off-diagonal portion encompasses the remainder of the
4416    local matrix (a rectangular submatrix).
4417 
4418    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4419 
4420    When calling this routine with a single process communicator, a matrix of
4421    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4422    type of communicator, use the construction mechanism
4423 .vb
4424      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4425 .ve
4426 
4427 $     MatCreate(...,&A);
4428 $     MatSetType(A,MATMPIAIJ);
4429 $     MatSetSizes(A, m,n,M,N);
4430 $     MatMPIAIJSetPreallocation(A,...);
4431 
4432    By default, this format uses inodes (identical nodes) when possible.
4433    We search for consecutive rows with the same nonzero structure, thereby
4434    reusing matrix information to achieve increased efficiency.
4435 
4436    Options Database Keys:
4437 +  -mat_no_inode  - Do not use inodes
4438 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4439 
4440 
4441 
4442    Example usage:
4443 
4444    Consider the following 8x8 matrix with 34 non-zero values, that is
4445    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4446    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4447    as follows
4448 
4449 .vb
4450             1  2  0  |  0  3  0  |  0  4
4451     Proc0   0  5  6  |  7  0  0  |  8  0
4452             9  0 10  | 11  0  0  | 12  0
4453     -------------------------------------
4454            13  0 14  | 15 16 17  |  0  0
4455     Proc1   0 18  0  | 19 20 21  |  0  0
4456             0  0  0  | 22 23  0  | 24  0
4457     -------------------------------------
4458     Proc2  25 26 27  |  0  0 28  | 29  0
4459            30  0  0  | 31 32 33  |  0 34
4460 .ve
4461 
4462    This can be represented as a collection of submatrices as
4463 
4464 .vb
4465       A B C
4466       D E F
4467       G H I
4468 .ve
4469 
4470    Where the submatrices A,B,C are owned by proc0, D,E,F are
4471    owned by proc1, G,H,I are owned by proc2.
4472 
4473    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4474    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4475    The 'M','N' parameters are 8,8, and have the same values on all procs.
4476 
4477    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4478    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4479    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4480    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4481    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4482    matrix, ans [DF] as another SeqAIJ matrix.
4483 
4484    When d_nz, o_nz parameters are specified, d_nz storage elements are
4485    allocated for every row of the local diagonal submatrix, and o_nz
4486    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4487    One way to choose d_nz and o_nz is to use the max nonzerors per local
4488    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4489    In this case, the values of d_nz,o_nz are
4490 .vb
4491      proc0 : dnz = 2, o_nz = 2
4492      proc1 : dnz = 3, o_nz = 2
4493      proc2 : dnz = 1, o_nz = 4
4494 .ve
4495    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4496    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4497    for proc3. i.e we are using 12+15+10=37 storage locations to store
4498    34 values.
4499 
4500    When d_nnz, o_nnz parameters are specified, the storage is specified
4501    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4502    In the above case the values for d_nnz,o_nnz are
4503 .vb
4504      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4505      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4506      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4507 .ve
4508    Here the space allocated is sum of all the above values i.e 34, and
4509    hence pre-allocation is perfect.
4510 
4511    Level: intermediate
4512 
4513 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4514           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4515 @*/
4516 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4517 {
4518   PetscErrorCode ierr;
4519   PetscMPIInt    size;
4520 
4521   PetscFunctionBegin;
4522   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4523   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4524   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4525   if (size > 1) {
4526     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4527     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4528   } else {
4529     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4530     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4531   }
4532   PetscFunctionReturn(0);
4533 }
4534 
4535 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4536 {
4537   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4538   PetscBool      flg;
4539   PetscErrorCode ierr;
4540 
4541   PetscFunctionBegin;
4542   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4543   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4544   if (Ad)     *Ad     = a->A;
4545   if (Ao)     *Ao     = a->B;
4546   if (colmap) *colmap = a->garray;
4547   PetscFunctionReturn(0);
4548 }
4549 
4550 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4551 {
4552   PetscErrorCode ierr;
4553   PetscInt       m,N,i,rstart,nnz,Ii;
4554   PetscInt       *indx;
4555   PetscScalar    *values;
4556 
4557   PetscFunctionBegin;
4558   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4559   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4560     PetscInt       *dnz,*onz,sum,bs,cbs;
4561 
4562     if (n == PETSC_DECIDE) {
4563       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4564     }
4565     /* Check sum(n) = N */
4566     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4567     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4568 
4569     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4570     rstart -= m;
4571 
4572     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4573     for (i=0; i<m; i++) {
4574       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4575       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4576       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4577     }
4578 
4579     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4580     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4581     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4582     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4583     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4584     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4585     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4586     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4587   }
4588 
4589   /* numeric phase */
4590   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4591   for (i=0; i<m; i++) {
4592     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4593     Ii   = i + rstart;
4594     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4595     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4596   }
4597   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4598   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4599   PetscFunctionReturn(0);
4600 }
4601 
4602 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4603 {
4604   PetscErrorCode    ierr;
4605   PetscMPIInt       rank;
4606   PetscInt          m,N,i,rstart,nnz;
4607   size_t            len;
4608   const PetscInt    *indx;
4609   PetscViewer       out;
4610   char              *name;
4611   Mat               B;
4612   const PetscScalar *values;
4613 
4614   PetscFunctionBegin;
4615   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4616   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4617   /* Should this be the type of the diagonal block of A? */
4618   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4619   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4620   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4621   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4622   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4623   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4624   for (i=0; i<m; i++) {
4625     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4626     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4627     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4628   }
4629   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4630   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4631 
4632   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4633   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4634   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4635   sprintf(name,"%s.%d",outfile,rank);
4636   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4637   ierr = PetscFree(name);CHKERRQ(ierr);
4638   ierr = MatView(B,out);CHKERRQ(ierr);
4639   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4640   ierr = MatDestroy(&B);CHKERRQ(ierr);
4641   PetscFunctionReturn(0);
4642 }
4643 
4644 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4645 {
4646   PetscErrorCode      ierr;
4647   Mat_Merge_SeqsToMPI *merge;
4648   PetscContainer      container;
4649 
4650   PetscFunctionBegin;
4651   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4652   if (container) {
4653     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4654     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4655     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4656     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4657     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4658     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4659     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4660     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4661     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4662     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4663     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4664     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4665     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4666     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4667     ierr = PetscFree(merge);CHKERRQ(ierr);
4668     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4669   }
4670   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4671   PetscFunctionReturn(0);
4672 }
4673 
4674 #include <../src/mat/utils/freespace.h>
4675 #include <petscbt.h>
4676 
4677 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4678 {
4679   PetscErrorCode      ierr;
4680   MPI_Comm            comm;
4681   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4682   PetscMPIInt         size,rank,taga,*len_s;
4683   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4684   PetscInt            proc,m;
4685   PetscInt            **buf_ri,**buf_rj;
4686   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4687   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4688   MPI_Request         *s_waits,*r_waits;
4689   MPI_Status          *status;
4690   MatScalar           *aa=a->a;
4691   MatScalar           **abuf_r,*ba_i;
4692   Mat_Merge_SeqsToMPI *merge;
4693   PetscContainer      container;
4694 
4695   PetscFunctionBegin;
4696   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4697   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4698 
4699   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4700   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4701 
4702   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4703   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4704 
4705   bi     = merge->bi;
4706   bj     = merge->bj;
4707   buf_ri = merge->buf_ri;
4708   buf_rj = merge->buf_rj;
4709 
4710   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4711   owners = merge->rowmap->range;
4712   len_s  = merge->len_s;
4713 
4714   /* send and recv matrix values */
4715   /*-----------------------------*/
4716   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4717   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4718 
4719   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4720   for (proc=0,k=0; proc<size; proc++) {
4721     if (!len_s[proc]) continue;
4722     i    = owners[proc];
4723     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4724     k++;
4725   }
4726 
4727   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4728   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4729   ierr = PetscFree(status);CHKERRQ(ierr);
4730 
4731   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4732   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4733 
4734   /* insert mat values of mpimat */
4735   /*----------------------------*/
4736   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4737   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4738 
4739   for (k=0; k<merge->nrecv; k++) {
4740     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4741     nrows       = *(buf_ri_k[k]);
4742     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4743     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4744   }
4745 
4746   /* set values of ba */
4747   m = merge->rowmap->n;
4748   for (i=0; i<m; i++) {
4749     arow = owners[rank] + i;
4750     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4751     bnzi = bi[i+1] - bi[i];
4752     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4753 
4754     /* add local non-zero vals of this proc's seqmat into ba */
4755     anzi   = ai[arow+1] - ai[arow];
4756     aj     = a->j + ai[arow];
4757     aa     = a->a + ai[arow];
4758     nextaj = 0;
4759     for (j=0; nextaj<anzi; j++) {
4760       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4761         ba_i[j] += aa[nextaj++];
4762       }
4763     }
4764 
4765     /* add received vals into ba */
4766     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4767       /* i-th row */
4768       if (i == *nextrow[k]) {
4769         anzi   = *(nextai[k]+1) - *nextai[k];
4770         aj     = buf_rj[k] + *(nextai[k]);
4771         aa     = abuf_r[k] + *(nextai[k]);
4772         nextaj = 0;
4773         for (j=0; nextaj<anzi; j++) {
4774           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4775             ba_i[j] += aa[nextaj++];
4776           }
4777         }
4778         nextrow[k]++; nextai[k]++;
4779       }
4780     }
4781     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4782   }
4783   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4784   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4785 
4786   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4787   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4788   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4789   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4790   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4791   PetscFunctionReturn(0);
4792 }
4793 
4794 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4795 {
4796   PetscErrorCode      ierr;
4797   Mat                 B_mpi;
4798   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4799   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4800   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4801   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4802   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4803   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4804   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4805   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4806   MPI_Status          *status;
4807   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4808   PetscBT             lnkbt;
4809   Mat_Merge_SeqsToMPI *merge;
4810   PetscContainer      container;
4811 
4812   PetscFunctionBegin;
4813   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4814 
4815   /* make sure it is a PETSc comm */
4816   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4817   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4818   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4819 
4820   ierr = PetscNew(&merge);CHKERRQ(ierr);
4821   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4822 
4823   /* determine row ownership */
4824   /*---------------------------------------------------------*/
4825   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4826   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4827   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4828   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4829   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4830   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4831   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4832 
4833   m      = merge->rowmap->n;
4834   owners = merge->rowmap->range;
4835 
4836   /* determine the number of messages to send, their lengths */
4837   /*---------------------------------------------------------*/
4838   len_s = merge->len_s;
4839 
4840   len          = 0; /* length of buf_si[] */
4841   merge->nsend = 0;
4842   for (proc=0; proc<size; proc++) {
4843     len_si[proc] = 0;
4844     if (proc == rank) {
4845       len_s[proc] = 0;
4846     } else {
4847       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4848       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4849     }
4850     if (len_s[proc]) {
4851       merge->nsend++;
4852       nrows = 0;
4853       for (i=owners[proc]; i<owners[proc+1]; i++) {
4854         if (ai[i+1] > ai[i]) nrows++;
4855       }
4856       len_si[proc] = 2*(nrows+1);
4857       len         += len_si[proc];
4858     }
4859   }
4860 
4861   /* determine the number and length of messages to receive for ij-structure */
4862   /*-------------------------------------------------------------------------*/
4863   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4864   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4865 
4866   /* post the Irecv of j-structure */
4867   /*-------------------------------*/
4868   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4869   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4870 
4871   /* post the Isend of j-structure */
4872   /*--------------------------------*/
4873   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4874 
4875   for (proc=0, k=0; proc<size; proc++) {
4876     if (!len_s[proc]) continue;
4877     i    = owners[proc];
4878     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4879     k++;
4880   }
4881 
4882   /* receives and sends of j-structure are complete */
4883   /*------------------------------------------------*/
4884   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4885   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4886 
4887   /* send and recv i-structure */
4888   /*---------------------------*/
4889   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4890   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4891 
4892   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4893   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4894   for (proc=0,k=0; proc<size; proc++) {
4895     if (!len_s[proc]) continue;
4896     /* form outgoing message for i-structure:
4897          buf_si[0]:                 nrows to be sent
4898                [1:nrows]:           row index (global)
4899                [nrows+1:2*nrows+1]: i-structure index
4900     */
4901     /*-------------------------------------------*/
4902     nrows       = len_si[proc]/2 - 1;
4903     buf_si_i    = buf_si + nrows+1;
4904     buf_si[0]   = nrows;
4905     buf_si_i[0] = 0;
4906     nrows       = 0;
4907     for (i=owners[proc]; i<owners[proc+1]; i++) {
4908       anzi = ai[i+1] - ai[i];
4909       if (anzi) {
4910         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4911         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4912         nrows++;
4913       }
4914     }
4915     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4916     k++;
4917     buf_si += len_si[proc];
4918   }
4919 
4920   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4921   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4922 
4923   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4924   for (i=0; i<merge->nrecv; i++) {
4925     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4926   }
4927 
4928   ierr = PetscFree(len_si);CHKERRQ(ierr);
4929   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4930   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4931   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4932   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4933   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4934   ierr = PetscFree(status);CHKERRQ(ierr);
4935 
4936   /* compute a local seq matrix in each processor */
4937   /*----------------------------------------------*/
4938   /* allocate bi array and free space for accumulating nonzero column info */
4939   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4940   bi[0] = 0;
4941 
4942   /* create and initialize a linked list */
4943   nlnk = N+1;
4944   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4945 
4946   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4947   len  = ai[owners[rank+1]] - ai[owners[rank]];
4948   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4949 
4950   current_space = free_space;
4951 
4952   /* determine symbolic info for each local row */
4953   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4954 
4955   for (k=0; k<merge->nrecv; k++) {
4956     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4957     nrows       = *buf_ri_k[k];
4958     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4959     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4960   }
4961 
4962   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4963   len  = 0;
4964   for (i=0; i<m; i++) {
4965     bnzi = 0;
4966     /* add local non-zero cols of this proc's seqmat into lnk */
4967     arow  = owners[rank] + i;
4968     anzi  = ai[arow+1] - ai[arow];
4969     aj    = a->j + ai[arow];
4970     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4971     bnzi += nlnk;
4972     /* add received col data into lnk */
4973     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4974       if (i == *nextrow[k]) { /* i-th row */
4975         anzi  = *(nextai[k]+1) - *nextai[k];
4976         aj    = buf_rj[k] + *nextai[k];
4977         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4978         bnzi += nlnk;
4979         nextrow[k]++; nextai[k]++;
4980       }
4981     }
4982     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4983 
4984     /* if free space is not available, make more free space */
4985     if (current_space->local_remaining<bnzi) {
4986       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4987       nspacedouble++;
4988     }
4989     /* copy data into free space, then initialize lnk */
4990     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4991     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4992 
4993     current_space->array           += bnzi;
4994     current_space->local_used      += bnzi;
4995     current_space->local_remaining -= bnzi;
4996 
4997     bi[i+1] = bi[i] + bnzi;
4998   }
4999 
5000   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5001 
5002   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
5003   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5004   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5005 
5006   /* create symbolic parallel matrix B_mpi */
5007   /*---------------------------------------*/
5008   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5009   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5010   if (n==PETSC_DECIDE) {
5011     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5012   } else {
5013     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5014   }
5015   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5016   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5017   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5018   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5019   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5020 
5021   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5022   B_mpi->assembled    = PETSC_FALSE;
5023   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
5024   merge->bi           = bi;
5025   merge->bj           = bj;
5026   merge->buf_ri       = buf_ri;
5027   merge->buf_rj       = buf_rj;
5028   merge->coi          = NULL;
5029   merge->coj          = NULL;
5030   merge->owners_co    = NULL;
5031 
5032   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5033 
5034   /* attach the supporting struct to B_mpi for reuse */
5035   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5036   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5037   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5038   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5039   *mpimat = B_mpi;
5040 
5041   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5042   PetscFunctionReturn(0);
5043 }
5044 
5045 /*@C
5046       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5047                  matrices from each processor
5048 
5049     Collective
5050 
5051    Input Parameters:
5052 +    comm - the communicators the parallel matrix will live on
5053 .    seqmat - the input sequential matrices
5054 .    m - number of local rows (or PETSC_DECIDE)
5055 .    n - number of local columns (or PETSC_DECIDE)
5056 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5057 
5058    Output Parameter:
5059 .    mpimat - the parallel matrix generated
5060 
5061     Level: advanced
5062 
5063    Notes:
5064      The dimensions of the sequential matrix in each processor MUST be the same.
5065      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5066      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5067 @*/
5068 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5069 {
5070   PetscErrorCode ierr;
5071   PetscMPIInt    size;
5072 
5073   PetscFunctionBegin;
5074   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5075   if (size == 1) {
5076     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5077     if (scall == MAT_INITIAL_MATRIX) {
5078       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5079     } else {
5080       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5081     }
5082     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5083     PetscFunctionReturn(0);
5084   }
5085   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5086   if (scall == MAT_INITIAL_MATRIX) {
5087     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5088   }
5089   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5090   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5091   PetscFunctionReturn(0);
5092 }
5093 
5094 /*@
5095      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5096           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5097           with MatGetSize()
5098 
5099     Not Collective
5100 
5101    Input Parameters:
5102 +    A - the matrix
5103 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5104 
5105    Output Parameter:
5106 .    A_loc - the local sequential matrix generated
5107 
5108     Level: developer
5109 
5110 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5111 
5112 @*/
5113 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5114 {
5115   PetscErrorCode ierr;
5116   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5117   Mat_SeqAIJ     *mat,*a,*b;
5118   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5119   MatScalar      *aa,*ba,*cam;
5120   PetscScalar    *ca;
5121   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5122   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5123   PetscBool      match;
5124   MPI_Comm       comm;
5125   PetscMPIInt    size;
5126 
5127   PetscFunctionBegin;
5128   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5129   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5130   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5131   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5132   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5133 
5134   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5135   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5136   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5137   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5138   aa = a->a; ba = b->a;
5139   if (scall == MAT_INITIAL_MATRIX) {
5140     if (size == 1) {
5141       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5142       PetscFunctionReturn(0);
5143     }
5144 
5145     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5146     ci[0] = 0;
5147     for (i=0; i<am; i++) {
5148       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5149     }
5150     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5151     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5152     k    = 0;
5153     for (i=0; i<am; i++) {
5154       ncols_o = bi[i+1] - bi[i];
5155       ncols_d = ai[i+1] - ai[i];
5156       /* off-diagonal portion of A */
5157       for (jo=0; jo<ncols_o; jo++) {
5158         col = cmap[*bj];
5159         if (col >= cstart) break;
5160         cj[k]   = col; bj++;
5161         ca[k++] = *ba++;
5162       }
5163       /* diagonal portion of A */
5164       for (j=0; j<ncols_d; j++) {
5165         cj[k]   = cstart + *aj++;
5166         ca[k++] = *aa++;
5167       }
5168       /* off-diagonal portion of A */
5169       for (j=jo; j<ncols_o; j++) {
5170         cj[k]   = cmap[*bj++];
5171         ca[k++] = *ba++;
5172       }
5173     }
5174     /* put together the new matrix */
5175     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5176     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5177     /* Since these are PETSc arrays, change flags to free them as necessary. */
5178     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5179     mat->free_a  = PETSC_TRUE;
5180     mat->free_ij = PETSC_TRUE;
5181     mat->nonew   = 0;
5182   } else if (scall == MAT_REUSE_MATRIX) {
5183     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5184     ci = mat->i; cj = mat->j; cam = mat->a;
5185     for (i=0; i<am; i++) {
5186       /* off-diagonal portion of A */
5187       ncols_o = bi[i+1] - bi[i];
5188       for (jo=0; jo<ncols_o; jo++) {
5189         col = cmap[*bj];
5190         if (col >= cstart) break;
5191         *cam++ = *ba++; bj++;
5192       }
5193       /* diagonal portion of A */
5194       ncols_d = ai[i+1] - ai[i];
5195       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5196       /* off-diagonal portion of A */
5197       for (j=jo; j<ncols_o; j++) {
5198         *cam++ = *ba++; bj++;
5199       }
5200     }
5201   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5202   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5203   PetscFunctionReturn(0);
5204 }
5205 
5206 /*@C
5207      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5208 
5209     Not Collective
5210 
5211    Input Parameters:
5212 +    A - the matrix
5213 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5214 -    row, col - index sets of rows and columns to extract (or NULL)
5215 
5216    Output Parameter:
5217 .    A_loc - the local sequential matrix generated
5218 
5219     Level: developer
5220 
5221 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5222 
5223 @*/
5224 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5225 {
5226   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5227   PetscErrorCode ierr;
5228   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5229   IS             isrowa,iscola;
5230   Mat            *aloc;
5231   PetscBool      match;
5232 
5233   PetscFunctionBegin;
5234   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5235   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5236   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5237   if (!row) {
5238     start = A->rmap->rstart; end = A->rmap->rend;
5239     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5240   } else {
5241     isrowa = *row;
5242   }
5243   if (!col) {
5244     start = A->cmap->rstart;
5245     cmap  = a->garray;
5246     nzA   = a->A->cmap->n;
5247     nzB   = a->B->cmap->n;
5248     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5249     ncols = 0;
5250     for (i=0; i<nzB; i++) {
5251       if (cmap[i] < start) idx[ncols++] = cmap[i];
5252       else break;
5253     }
5254     imark = i;
5255     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5256     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5257     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5258   } else {
5259     iscola = *col;
5260   }
5261   if (scall != MAT_INITIAL_MATRIX) {
5262     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5263     aloc[0] = *A_loc;
5264   }
5265   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5266   if (!col) { /* attach global id of condensed columns */
5267     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5268   }
5269   *A_loc = aloc[0];
5270   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5271   if (!row) {
5272     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5273   }
5274   if (!col) {
5275     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5276   }
5277   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5278   PetscFunctionReturn(0);
5279 }
5280 
5281 /*
5282  * Destroy a mat that may be composed with PetscSF communication objects.
5283  * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private.
5284  * */
5285 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat)
5286 {
5287   PetscSF          sf,osf;
5288   IS               map;
5289   PetscErrorCode   ierr;
5290 
5291   PetscFunctionBegin;
5292   ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5293   ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5294   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5295   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5296   ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr);
5297   ierr = ISDestroy(&map);CHKERRQ(ierr);
5298   ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr);
5299   PetscFunctionReturn(0);
5300 }
5301 
5302 /*
5303  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5304  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5305  * on a global size.
5306  * */
5307 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5308 {
5309   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5310   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5311   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,owner,lidx,*nrcols,*nlcols,ncol;
5312   PetscSFNode              *iremote,*oiremote;
5313   const PetscInt           *lrowindices;
5314   PetscErrorCode           ierr;
5315   PetscSF                  sf,osf;
5316   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5317   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5318   MPI_Comm                 comm;
5319   ISLocalToGlobalMapping   mapping;
5320 
5321   PetscFunctionBegin;
5322   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5323   /* plocalsize is the number of roots
5324    * nrows is the number of leaves
5325    * */
5326   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5327   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5328   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5329   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5330   for (i=0;i<nrows;i++) {
5331     /* Find a remote index and an owner for a row
5332      * The row could be local or remote
5333      * */
5334     owner = 0;
5335     lidx  = 0;
5336     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5337     iremote[i].index = lidx;
5338     iremote[i].rank  = owner;
5339   }
5340   /* Create SF to communicate how many nonzero columns for each row */
5341   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5342   /* SF will figure out the number of nonzero colunms for each row, and their
5343    * offsets
5344    * */
5345   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5346   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5347   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5348 
5349   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5350   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5351   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5352   roffsets[0] = 0;
5353   roffsets[1] = 0;
5354   for (i=0;i<plocalsize;i++) {
5355     /* diag */
5356     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5357     /* off diag */
5358     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5359     /* compute offsets so that we relative location for each row */
5360     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5361     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5362   }
5363   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5364   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5365   /* 'r' means root, and 'l' means leaf */
5366   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5367   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5368   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5369   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5370   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5371   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5372   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5373   dntotalcols = 0;
5374   ontotalcols = 0;
5375   ncol = 0;
5376   for (i=0;i<nrows;i++) {
5377     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5378     ncol = PetscMax(pnnz[i],ncol);
5379     /* diag */
5380     dntotalcols += nlcols[i*2+0];
5381     /* off diag */
5382     ontotalcols += nlcols[i*2+1];
5383   }
5384   /* We do not need to figure the right number of columns
5385    * since all the calculations will be done by going through the raw data
5386    * */
5387   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5388   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5389   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5390   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5391   /* diag */
5392   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5393   /* off diag */
5394   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5395   /* diag */
5396   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5397   /* off diag */
5398   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5399   dntotalcols = 0;
5400   ontotalcols = 0;
5401   ntotalcols  = 0;
5402   for (i=0;i<nrows;i++) {
5403     owner = 0;
5404     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5405     /* Set iremote for diag matrix */
5406     for (j=0;j<nlcols[i*2+0];j++) {
5407       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5408       iremote[dntotalcols].rank    = owner;
5409       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5410       ilocal[dntotalcols++]        = ntotalcols++;
5411     }
5412     /* off diag */
5413     for (j=0;j<nlcols[i*2+1];j++) {
5414       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5415       oiremote[ontotalcols].rank    = owner;
5416       oilocal[ontotalcols++]        = ntotalcols++;
5417     }
5418   }
5419   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5420   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5421   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5422   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5423   /* P serves as roots and P_oth is leaves
5424    * Diag matrix
5425    * */
5426   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5427   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5428   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5429 
5430   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5431   /* Off diag */
5432   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5433   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5434   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5435   /* We operate on the matrix internal data for saving memory */
5436   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5437   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5438   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5439   /* Convert to global indices for diag matrix */
5440   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5441   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5442   /* We want P_oth store global indices */
5443   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5444   /* Use memory scalable approach */
5445   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5446   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5447   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5448   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5449   /* Convert back to local indices */
5450   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5451   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5452   nout = 0;
5453   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5454   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5455   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5456   /* Exchange values */
5457   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5458   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5459   /* Stop PETSc from shrinking memory */
5460   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5461   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5462   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5463   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5464   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5465   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5466   /* ``New MatDestroy" takes care of PetscSF objects as well */
5467   (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF;
5468   PetscFunctionReturn(0);
5469 }
5470 
5471 /*
5472  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5473  * This supports MPIAIJ and MAIJ
5474  * */
5475 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5476 {
5477   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5478   Mat_SeqAIJ            *p_oth;
5479   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5480   IS                    rows,map;
5481   PetscHMapI            hamp;
5482   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5483   MPI_Comm              comm;
5484   PetscSF               sf,osf;
5485   PetscBool             has;
5486   PetscErrorCode        ierr;
5487 
5488   PetscFunctionBegin;
5489   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5490   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5491   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5492    *  and then create a submatrix (that often is an overlapping matrix)
5493    * */
5494   if (reuse==MAT_INITIAL_MATRIX) {
5495     /* Use a hash table to figure out unique keys */
5496     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5497     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5498     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5499     count = 0;
5500     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5501     for (i=0;i<a->B->cmap->n;i++) {
5502       key  = a->garray[i]/dof;
5503       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5504       if (!has) {
5505         mapping[i] = count;
5506         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5507       } else {
5508         /* Current 'i' has the same value the previous step */
5509         mapping[i] = count-1;
5510       }
5511     }
5512     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5513     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5514     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5515     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5516     off = 0;
5517     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5518     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5519     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5520     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5521     /* In case, the matrix was already created but users want to recreate the matrix */
5522     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5523     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5524     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5525     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5526   } else if (reuse==MAT_REUSE_MATRIX) {
5527     /* If matrix was already created, we simply update values using SF objects
5528      * that as attached to the matrix ealier.
5529      *  */
5530     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5531     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5532     if (!sf || !osf) {
5533       SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n");
5534     }
5535     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5536     /* Update values in place */
5537     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5538     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5539     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5540     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5541   } else {
5542     SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n");
5543   }
5544   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5545   PetscFunctionReturn(0);
5546 }
5547 
5548 /*@C
5549     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5550 
5551     Collective on Mat
5552 
5553    Input Parameters:
5554 +    A,B - the matrices in mpiaij format
5555 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5556 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5557 
5558    Output Parameter:
5559 +    rowb, colb - index sets of rows and columns of B to extract
5560 -    B_seq - the sequential matrix generated
5561 
5562     Level: developer
5563 
5564 @*/
5565 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5566 {
5567   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5568   PetscErrorCode ierr;
5569   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5570   IS             isrowb,iscolb;
5571   Mat            *bseq=NULL;
5572 
5573   PetscFunctionBegin;
5574   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5575     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5576   }
5577   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5578 
5579   if (scall == MAT_INITIAL_MATRIX) {
5580     start = A->cmap->rstart;
5581     cmap  = a->garray;
5582     nzA   = a->A->cmap->n;
5583     nzB   = a->B->cmap->n;
5584     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5585     ncols = 0;
5586     for (i=0; i<nzB; i++) {  /* row < local row index */
5587       if (cmap[i] < start) idx[ncols++] = cmap[i];
5588       else break;
5589     }
5590     imark = i;
5591     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5592     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5593     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5594     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5595   } else {
5596     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5597     isrowb  = *rowb; iscolb = *colb;
5598     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5599     bseq[0] = *B_seq;
5600   }
5601   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5602   *B_seq = bseq[0];
5603   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5604   if (!rowb) {
5605     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5606   } else {
5607     *rowb = isrowb;
5608   }
5609   if (!colb) {
5610     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5611   } else {
5612     *colb = iscolb;
5613   }
5614   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5615   PetscFunctionReturn(0);
5616 }
5617 
5618 /*
5619     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5620     of the OFF-DIAGONAL portion of local A
5621 
5622     Collective on Mat
5623 
5624    Input Parameters:
5625 +    A,B - the matrices in mpiaij format
5626 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5627 
5628    Output Parameter:
5629 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5630 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5631 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5632 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5633 
5634     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5635      for this matrix. This is not desirable..
5636 
5637     Level: developer
5638 
5639 */
5640 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5641 {
5642   PetscErrorCode         ierr;
5643   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5644   Mat_SeqAIJ             *b_oth;
5645   VecScatter             ctx;
5646   MPI_Comm               comm;
5647   const PetscMPIInt      *rprocs,*sprocs;
5648   const PetscInt         *srow,*rstarts,*sstarts;
5649   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5650   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5651   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5652   MPI_Request            *rwaits = NULL,*swaits = NULL;
5653   MPI_Status             rstatus;
5654   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5655 
5656   PetscFunctionBegin;
5657   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5658   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5659 
5660   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5661     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5662   }
5663   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5664   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5665 
5666   if (size == 1) {
5667     startsj_s = NULL;
5668     bufa_ptr  = NULL;
5669     *B_oth    = NULL;
5670     PetscFunctionReturn(0);
5671   }
5672 
5673   ctx = a->Mvctx;
5674   tag = ((PetscObject)ctx)->tag;
5675 
5676   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5677   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5678   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5679   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5680   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5681   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5682   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5683 
5684   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5685   if (scall == MAT_INITIAL_MATRIX) {
5686     /* i-array */
5687     /*---------*/
5688     /*  post receives */
5689     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5690     for (i=0; i<nrecvs; i++) {
5691       rowlen = rvalues + rstarts[i]*rbs;
5692       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5693       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5694     }
5695 
5696     /* pack the outgoing message */
5697     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5698 
5699     sstartsj[0] = 0;
5700     rstartsj[0] = 0;
5701     len         = 0; /* total length of j or a array to be sent */
5702     if (nsends) {
5703       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5704       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5705     }
5706     for (i=0; i<nsends; i++) {
5707       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5708       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5709       for (j=0; j<nrows; j++) {
5710         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5711         for (l=0; l<sbs; l++) {
5712           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5713 
5714           rowlen[j*sbs+l] = ncols;
5715 
5716           len += ncols;
5717           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5718         }
5719         k++;
5720       }
5721       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5722 
5723       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5724     }
5725     /* recvs and sends of i-array are completed */
5726     i = nrecvs;
5727     while (i--) {
5728       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5729     }
5730     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5731     ierr = PetscFree(svalues);CHKERRQ(ierr);
5732 
5733     /* allocate buffers for sending j and a arrays */
5734     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5735     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5736 
5737     /* create i-array of B_oth */
5738     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5739 
5740     b_othi[0] = 0;
5741     len       = 0; /* total length of j or a array to be received */
5742     k         = 0;
5743     for (i=0; i<nrecvs; i++) {
5744       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5745       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5746       for (j=0; j<nrows; j++) {
5747         b_othi[k+1] = b_othi[k] + rowlen[j];
5748         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5749         k++;
5750       }
5751       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5752     }
5753     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5754 
5755     /* allocate space for j and a arrrays of B_oth */
5756     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5757     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5758 
5759     /* j-array */
5760     /*---------*/
5761     /*  post receives of j-array */
5762     for (i=0; i<nrecvs; i++) {
5763       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5764       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5765     }
5766 
5767     /* pack the outgoing message j-array */
5768     if (nsends) k = sstarts[0];
5769     for (i=0; i<nsends; i++) {
5770       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5771       bufJ  = bufj+sstartsj[i];
5772       for (j=0; j<nrows; j++) {
5773         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5774         for (ll=0; ll<sbs; ll++) {
5775           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5776           for (l=0; l<ncols; l++) {
5777             *bufJ++ = cols[l];
5778           }
5779           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5780         }
5781       }
5782       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5783     }
5784 
5785     /* recvs and sends of j-array are completed */
5786     i = nrecvs;
5787     while (i--) {
5788       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5789     }
5790     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5791   } else if (scall == MAT_REUSE_MATRIX) {
5792     sstartsj = *startsj_s;
5793     rstartsj = *startsj_r;
5794     bufa     = *bufa_ptr;
5795     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5796     b_otha   = b_oth->a;
5797   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5798 
5799   /* a-array */
5800   /*---------*/
5801   /*  post receives of a-array */
5802   for (i=0; i<nrecvs; i++) {
5803     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5804     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5805   }
5806 
5807   /* pack the outgoing message a-array */
5808   if (nsends) k = sstarts[0];
5809   for (i=0; i<nsends; i++) {
5810     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5811     bufA  = bufa+sstartsj[i];
5812     for (j=0; j<nrows; j++) {
5813       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5814       for (ll=0; ll<sbs; ll++) {
5815         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5816         for (l=0; l<ncols; l++) {
5817           *bufA++ = vals[l];
5818         }
5819         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5820       }
5821     }
5822     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5823   }
5824   /* recvs and sends of a-array are completed */
5825   i = nrecvs;
5826   while (i--) {
5827     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5828   }
5829   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5830   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5831 
5832   if (scall == MAT_INITIAL_MATRIX) {
5833     /* put together the new matrix */
5834     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5835 
5836     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5837     /* Since these are PETSc arrays, change flags to free them as necessary. */
5838     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5839     b_oth->free_a  = PETSC_TRUE;
5840     b_oth->free_ij = PETSC_TRUE;
5841     b_oth->nonew   = 0;
5842 
5843     ierr = PetscFree(bufj);CHKERRQ(ierr);
5844     if (!startsj_s || !bufa_ptr) {
5845       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5846       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5847     } else {
5848       *startsj_s = sstartsj;
5849       *startsj_r = rstartsj;
5850       *bufa_ptr  = bufa;
5851     }
5852   }
5853 
5854   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5855   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5856   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5857   PetscFunctionReturn(0);
5858 }
5859 
5860 /*@C
5861   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5862 
5863   Not Collective
5864 
5865   Input Parameters:
5866 . A - The matrix in mpiaij format
5867 
5868   Output Parameter:
5869 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5870 . colmap - A map from global column index to local index into lvec
5871 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5872 
5873   Level: developer
5874 
5875 @*/
5876 #if defined(PETSC_USE_CTABLE)
5877 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5878 #else
5879 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5880 #endif
5881 {
5882   Mat_MPIAIJ *a;
5883 
5884   PetscFunctionBegin;
5885   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5886   PetscValidPointer(lvec, 2);
5887   PetscValidPointer(colmap, 3);
5888   PetscValidPointer(multScatter, 4);
5889   a = (Mat_MPIAIJ*) A->data;
5890   if (lvec) *lvec = a->lvec;
5891   if (colmap) *colmap = a->colmap;
5892   if (multScatter) *multScatter = a->Mvctx;
5893   PetscFunctionReturn(0);
5894 }
5895 
5896 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5897 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5898 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5899 #if defined(PETSC_HAVE_MKL_SPARSE)
5900 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5901 #endif
5902 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5903 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5904 #if defined(PETSC_HAVE_ELEMENTAL)
5905 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5906 #endif
5907 #if defined(PETSC_HAVE_HYPRE)
5908 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5909 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5910 #endif
5911 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5912 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5913 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5914 
5915 /*
5916     Computes (B'*A')' since computing B*A directly is untenable
5917 
5918                n                       p                          p
5919         (              )       (              )         (                  )
5920       m (      A       )  *  n (       B      )   =   m (         C        )
5921         (              )       (              )         (                  )
5922 
5923 */
5924 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5925 {
5926   PetscErrorCode ierr;
5927   Mat            At,Bt,Ct;
5928 
5929   PetscFunctionBegin;
5930   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5931   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5932   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5933   ierr = MatDestroy(&At);CHKERRQ(ierr);
5934   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5935   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5936   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5937   PetscFunctionReturn(0);
5938 }
5939 
5940 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5941 {
5942   PetscErrorCode ierr;
5943   PetscInt       m=A->rmap->n,n=B->cmap->n;
5944   Mat            Cmat;
5945 
5946   PetscFunctionBegin;
5947   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5948   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5949   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5950   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5951   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5952   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5953   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5954   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5955 
5956   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5957 
5958   *C = Cmat;
5959   PetscFunctionReturn(0);
5960 }
5961 
5962 /* ----------------------------------------------------------------*/
5963 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5964 {
5965   PetscErrorCode ierr;
5966 
5967   PetscFunctionBegin;
5968   if (scall == MAT_INITIAL_MATRIX) {
5969     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5970     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5971     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5972   }
5973   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5974   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5975   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5976   PetscFunctionReturn(0);
5977 }
5978 
5979 /*MC
5980    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5981 
5982    Options Database Keys:
5983 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5984 
5985    Level: beginner
5986 
5987    Notes:
5988     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
5989     in this case the values associated with the rows and columns one passes in are set to zero
5990     in the matrix
5991 
5992     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
5993     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
5994 
5995 .seealso: MatCreateAIJ()
5996 M*/
5997 
5998 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5999 {
6000   Mat_MPIAIJ     *b;
6001   PetscErrorCode ierr;
6002   PetscMPIInt    size;
6003 
6004   PetscFunctionBegin;
6005   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
6006 
6007   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6008   B->data       = (void*)b;
6009   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6010   B->assembled  = PETSC_FALSE;
6011   B->insertmode = NOT_SET_VALUES;
6012   b->size       = size;
6013 
6014   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
6015 
6016   /* build cache for off array entries formed */
6017   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6018 
6019   b->donotstash  = PETSC_FALSE;
6020   b->colmap      = 0;
6021   b->garray      = 0;
6022   b->roworiented = PETSC_TRUE;
6023 
6024   /* stuff used for matrix vector multiply */
6025   b->lvec  = NULL;
6026   b->Mvctx = NULL;
6027 
6028   /* stuff for MatGetRow() */
6029   b->rowindices   = 0;
6030   b->rowvalues    = 0;
6031   b->getrowactive = PETSC_FALSE;
6032 
6033   /* flexible pointer used in CUSP/CUSPARSE classes */
6034   b->spptr = NULL;
6035 
6036   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6037   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6038   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6039   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6046 #if defined(PETSC_HAVE_MKL_SPARSE)
6047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6048 #endif
6049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6052 #if defined(PETSC_HAVE_ELEMENTAL)
6053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6054 #endif
6055 #if defined(PETSC_HAVE_HYPRE)
6056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6057 #endif
6058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6060   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
6061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
6062   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
6063 #if defined(PETSC_HAVE_HYPRE)
6064   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6065 #endif
6066   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
6067   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6068   PetscFunctionReturn(0);
6069 }
6070 
6071 /*@C
6072      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6073          and "off-diagonal" part of the matrix in CSR format.
6074 
6075    Collective
6076 
6077    Input Parameters:
6078 +  comm - MPI communicator
6079 .  m - number of local rows (Cannot be PETSC_DECIDE)
6080 .  n - This value should be the same as the local size used in creating the
6081        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6082        calculated if N is given) For square matrices n is almost always m.
6083 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6084 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6085 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6086 .   j - column indices
6087 .   a - matrix values
6088 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6089 .   oj - column indices
6090 -   oa - matrix values
6091 
6092    Output Parameter:
6093 .   mat - the matrix
6094 
6095    Level: advanced
6096 
6097    Notes:
6098        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6099        must free the arrays once the matrix has been destroyed and not before.
6100 
6101        The i and j indices are 0 based
6102 
6103        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6104 
6105        This sets local rows and cannot be used to set off-processor values.
6106 
6107        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6108        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6109        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6110        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6111        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6112        communication if it is known that only local entries will be set.
6113 
6114 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6115           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6116 @*/
6117 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6118 {
6119   PetscErrorCode ierr;
6120   Mat_MPIAIJ     *maij;
6121 
6122   PetscFunctionBegin;
6123   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6124   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6125   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6126   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6127   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6128   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6129   maij = (Mat_MPIAIJ*) (*mat)->data;
6130 
6131   (*mat)->preallocated = PETSC_TRUE;
6132 
6133   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6134   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6135 
6136   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6137   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6138 
6139   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6140   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6141   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6142   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6143 
6144   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6145   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6146   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6147   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6148   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6149   PetscFunctionReturn(0);
6150 }
6151 
6152 /*
6153     Special version for direct calls from Fortran
6154 */
6155 #include <petsc/private/fortranimpl.h>
6156 
6157 /* Change these macros so can be used in void function */
6158 #undef CHKERRQ
6159 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6160 #undef SETERRQ2
6161 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6162 #undef SETERRQ3
6163 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6164 #undef SETERRQ
6165 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6166 
6167 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6168 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6169 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6170 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6171 #else
6172 #endif
6173 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6174 {
6175   Mat            mat  = *mmat;
6176   PetscInt       m    = *mm, n = *mn;
6177   InsertMode     addv = *maddv;
6178   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6179   PetscScalar    value;
6180   PetscErrorCode ierr;
6181 
6182   MatCheckPreallocated(mat,1);
6183   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6184 
6185 #if defined(PETSC_USE_DEBUG)
6186   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6187 #endif
6188   {
6189     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6190     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6191     PetscBool roworiented = aij->roworiented;
6192 
6193     /* Some Variables required in the macro */
6194     Mat        A                 = aij->A;
6195     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
6196     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6197     MatScalar  *aa               = a->a;
6198     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6199     Mat        B                 = aij->B;
6200     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
6201     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6202     MatScalar  *ba               = b->a;
6203 
6204     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6205     PetscInt  nonew = a->nonew;
6206     MatScalar *ap1,*ap2;
6207 
6208     PetscFunctionBegin;
6209     for (i=0; i<m; i++) {
6210       if (im[i] < 0) continue;
6211 #if defined(PETSC_USE_DEBUG)
6212       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6213 #endif
6214       if (im[i] >= rstart && im[i] < rend) {
6215         row      = im[i] - rstart;
6216         lastcol1 = -1;
6217         rp1      = aj + ai[row];
6218         ap1      = aa + ai[row];
6219         rmax1    = aimax[row];
6220         nrow1    = ailen[row];
6221         low1     = 0;
6222         high1    = nrow1;
6223         lastcol2 = -1;
6224         rp2      = bj + bi[row];
6225         ap2      = ba + bi[row];
6226         rmax2    = bimax[row];
6227         nrow2    = bilen[row];
6228         low2     = 0;
6229         high2    = nrow2;
6230 
6231         for (j=0; j<n; j++) {
6232           if (roworiented) value = v[i*n+j];
6233           else value = v[i+j*m];
6234           if (in[j] >= cstart && in[j] < cend) {
6235             col = in[j] - cstart;
6236             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
6237             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6238           } else if (in[j] < 0) continue;
6239 #if defined(PETSC_USE_DEBUG)
6240           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6241           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
6242 #endif
6243           else {
6244             if (mat->was_assembled) {
6245               if (!aij->colmap) {
6246                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6247               }
6248 #if defined(PETSC_USE_CTABLE)
6249               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6250               col--;
6251 #else
6252               col = aij->colmap[in[j]] - 1;
6253 #endif
6254               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
6255               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6256                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6257                 col  =  in[j];
6258                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6259                 B     = aij->B;
6260                 b     = (Mat_SeqAIJ*)B->data;
6261                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6262                 rp2   = bj + bi[row];
6263                 ap2   = ba + bi[row];
6264                 rmax2 = bimax[row];
6265                 nrow2 = bilen[row];
6266                 low2  = 0;
6267                 high2 = nrow2;
6268                 bm    = aij->B->rmap->n;
6269                 ba    = b->a;
6270               }
6271             } else col = in[j];
6272             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6273           }
6274         }
6275       } else if (!aij->donotstash) {
6276         if (roworiented) {
6277           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6278         } else {
6279           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6280         }
6281       }
6282     }
6283   }
6284   PetscFunctionReturnVoid();
6285 }
6286