xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 8f8f2f0d3bbfe99bf6fe84d4337dd6d7e3a5b04f)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/vecscatterimpl.h>
6 #include <petsc/private/isimpl.h>
7 #include <petscblaslapack.h>
8 #include <petscsf.h>
9 
10 /*MC
11    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
12 
13    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
14    and MATMPIAIJ otherwise.  As a result, for single process communicators,
15   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
16   for communicators controlling multiple processes.  It is recommended that you call both of
17   the above preallocation routines for simplicity.
18 
19    Options Database Keys:
20 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
21 
22   Developer Notes:
23     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
24    enough exist.
25 
26   Level: beginner
27 
28 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
29 M*/
30 
31 /*MC
32    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
33 
34    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
35    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
36    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
37   for communicators controlling multiple processes.  It is recommended that you call both of
38   the above preallocation routines for simplicity.
39 
40    Options Database Keys:
41 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
42 
43   Level: beginner
44 
45 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
46 M*/
47 
48 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
49 {
50   PetscErrorCode ierr;
51   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
52 
53   PetscFunctionBegin;
54   if (mat->A) {
55     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
56     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
57   }
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
62 {
63   PetscErrorCode  ierr;
64   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
65   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
66   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
67   const PetscInt  *ia,*ib;
68   const MatScalar *aa,*bb;
69   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
70   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
71 
72   PetscFunctionBegin;
73   *keptrows = 0;
74   ia        = a->i;
75   ib        = b->i;
76   for (i=0; i<m; i++) {
77     na = ia[i+1] - ia[i];
78     nb = ib[i+1] - ib[i];
79     if (!na && !nb) {
80       cnt++;
81       goto ok1;
82     }
83     aa = a->a + ia[i];
84     for (j=0; j<na; j++) {
85       if (aa[j] != 0.0) goto ok1;
86     }
87     bb = b->a + ib[i];
88     for (j=0; j <nb; j++) {
89       if (bb[j] != 0.0) goto ok1;
90     }
91     cnt++;
92 ok1:;
93   }
94   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
95   if (!n0rows) PetscFunctionReturn(0);
96   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
97   cnt  = 0;
98   for (i=0; i<m; i++) {
99     na = ia[i+1] - ia[i];
100     nb = ib[i+1] - ib[i];
101     if (!na && !nb) continue;
102     aa = a->a + ia[i];
103     for (j=0; j<na;j++) {
104       if (aa[j] != 0.0) {
105         rows[cnt++] = rstart + i;
106         goto ok2;
107       }
108     }
109     bb = b->a + ib[i];
110     for (j=0; j<nb; j++) {
111       if (bb[j] != 0.0) {
112         rows[cnt++] = rstart + i;
113         goto ok2;
114       }
115     }
116 ok2:;
117   }
118   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
123 {
124   PetscErrorCode    ierr;
125   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
126   PetscBool         cong;
127 
128   PetscFunctionBegin;
129   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
130   if (Y->assembled && cong) {
131     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
132   } else {
133     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
134   }
135   PetscFunctionReturn(0);
136 }
137 
138 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
139 {
140   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
141   PetscErrorCode ierr;
142   PetscInt       i,rstart,nrows,*rows;
143 
144   PetscFunctionBegin;
145   *zrows = NULL;
146   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
147   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
148   for (i=0; i<nrows; i++) rows[i] += rstart;
149   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
150   PetscFunctionReturn(0);
151 }
152 
153 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
154 {
155   PetscErrorCode ierr;
156   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
157   PetscInt       i,n,*garray = aij->garray;
158   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
159   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
160   PetscReal      *work;
161 
162   PetscFunctionBegin;
163   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
164   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
165   if (type == NORM_2) {
166     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
167       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
168     }
169     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
170       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
171     }
172   } else if (type == NORM_1) {
173     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
174       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
175     }
176     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
177       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
178     }
179   } else if (type == NORM_INFINITY) {
180     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
181       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
182     }
183     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
184       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
185     }
186 
187   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
188   if (type == NORM_INFINITY) {
189     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
190   } else {
191     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
192   }
193   ierr = PetscFree(work);CHKERRQ(ierr);
194   if (type == NORM_2) {
195     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
196   }
197   PetscFunctionReturn(0);
198 }
199 
200 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
201 {
202   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
203   IS              sis,gis;
204   PetscErrorCode  ierr;
205   const PetscInt  *isis,*igis;
206   PetscInt        n,*iis,nsis,ngis,rstart,i;
207 
208   PetscFunctionBegin;
209   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
210   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
211   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
212   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
213   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
214   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
215 
216   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
217   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
218   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
219   n    = ngis + nsis;
220   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
221   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
222   for (i=0; i<n; i++) iis[i] += rstart;
223   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
224 
225   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
226   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
227   ierr = ISDestroy(&sis);CHKERRQ(ierr);
228   ierr = ISDestroy(&gis);CHKERRQ(ierr);
229   PetscFunctionReturn(0);
230 }
231 
232 /*
233     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
234     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
235 
236     Only for square matrices
237 
238     Used by a preconditioner, hence PETSC_EXTERN
239 */
240 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
241 {
242   PetscMPIInt    rank,size;
243   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
244   PetscErrorCode ierr;
245   Mat            mat;
246   Mat_SeqAIJ     *gmata;
247   PetscMPIInt    tag;
248   MPI_Status     status;
249   PetscBool      aij;
250   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
251 
252   PetscFunctionBegin;
253   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
254   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
255   if (!rank) {
256     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
257     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
258   }
259   if (reuse == MAT_INITIAL_MATRIX) {
260     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
261     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
262     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
263     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
264     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
265     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
266     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
267     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
268     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
269 
270     rowners[0] = 0;
271     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
272     rstart = rowners[rank];
273     rend   = rowners[rank+1];
274     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
275     if (!rank) {
276       gmata = (Mat_SeqAIJ*) gmat->data;
277       /* send row lengths to all processors */
278       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
279       for (i=1; i<size; i++) {
280         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
281       }
282       /* determine number diagonal and off-diagonal counts */
283       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
284       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
285       jj   = 0;
286       for (i=0; i<m; i++) {
287         for (j=0; j<dlens[i]; j++) {
288           if (gmata->j[jj] < rstart) ld[i]++;
289           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
290           jj++;
291         }
292       }
293       /* send column indices to other processes */
294       for (i=1; i<size; i++) {
295         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
296         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
298       }
299 
300       /* send numerical values to other processes */
301       for (i=1; i<size; i++) {
302         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
303         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
304       }
305       gmataa = gmata->a;
306       gmataj = gmata->j;
307 
308     } else {
309       /* receive row lengths */
310       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* receive column indices */
312       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
313       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
314       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
315       /* determine number diagonal and off-diagonal counts */
316       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
317       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
318       jj   = 0;
319       for (i=0; i<m; i++) {
320         for (j=0; j<dlens[i]; j++) {
321           if (gmataj[jj] < rstart) ld[i]++;
322           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
323           jj++;
324         }
325       }
326       /* receive numerical values */
327       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
328       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
329     }
330     /* set preallocation */
331     for (i=0; i<m; i++) {
332       dlens[i] -= olens[i];
333     }
334     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
335     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
336 
337     for (i=0; i<m; i++) {
338       dlens[i] += olens[i];
339     }
340     cnt = 0;
341     for (i=0; i<m; i++) {
342       row  = rstart + i;
343       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
344       cnt += dlens[i];
345     }
346     if (rank) {
347       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
348     }
349     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
350     ierr = PetscFree(rowners);CHKERRQ(ierr);
351 
352     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
353 
354     *inmat = mat;
355   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
356     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
357     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
358     mat  = *inmat;
359     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
360     if (!rank) {
361       /* send numerical values to other processes */
362       gmata  = (Mat_SeqAIJ*) gmat->data;
363       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
364       gmataa = gmata->a;
365       for (i=1; i<size; i++) {
366         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
367         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
368       }
369       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
370     } else {
371       /* receive numerical values from process 0*/
372       nz   = Ad->nz + Ao->nz;
373       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
374       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
375     }
376     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
377     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
378     ad = Ad->a;
379     ao = Ao->a;
380     if (mat->rmap->n) {
381       i  = 0;
382       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     for (i=1; i<mat->rmap->n; i++) {
386       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
387       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
388     }
389     i--;
390     if (mat->rmap->n) {
391       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
392     }
393     if (rank) {
394       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
395     }
396   }
397   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
399   PetscFunctionReturn(0);
400 }
401 
402 /*
403   Local utility routine that creates a mapping from the global column
404 number to the local number in the off-diagonal part of the local
405 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
406 a slightly higher hash table cost; without it it is not scalable (each processor
407 has an order N integer array but is fast to acess.
408 */
409 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
410 {
411   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscInt       n = aij->B->cmap->n,i;
414 
415   PetscFunctionBegin;
416   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
417 #if defined(PETSC_USE_CTABLE)
418   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
419   for (i=0; i<n; i++) {
420     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
421   }
422 #else
423   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
424   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
425   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
426 #endif
427   PetscFunctionReturn(0);
428 }
429 
430 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
431 { \
432     if (col <= lastcol1)  low1 = 0;     \
433     else                 high1 = nrow1; \
434     lastcol1 = col;\
435     while (high1-low1 > 5) { \
436       t = (low1+high1)/2; \
437       if (rp1[t] > col) high1 = t; \
438       else              low1  = t; \
439     } \
440       for (_i=low1; _i<high1; _i++) { \
441         if (rp1[_i] > col) break; \
442         if (rp1[_i] == col) { \
443           if (addv == ADD_VALUES) { \
444             ap1[_i] += value;   \
445             /* Not sure LogFlops will slow dow the code or not */ \
446             (void)PetscLogFlops(1.0);   \
447            } \
448           else                    ap1[_i] = value; \
449           goto a_noinsert; \
450         } \
451       }  \
452       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
453       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
454       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
455       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
456       N = nrow1++ - 1; a->nz++; high1++; \
457       /* shift up all the later entries in this row */ \
458       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
459       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
460       rp1[_i] = col;  \
461       ap1[_i] = value;  \
462       A->nonzerostate++;\
463       a_noinsert: ; \
464       ailen[row] = nrow1; \
465 }
466 
467 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
468   { \
469     if (col <= lastcol2) low2 = 0;                        \
470     else high2 = nrow2;                                   \
471     lastcol2 = col;                                       \
472     while (high2-low2 > 5) {                              \
473       t = (low2+high2)/2;                                 \
474       if (rp2[t] > col) high2 = t;                        \
475       else             low2  = t;                         \
476     }                                                     \
477     for (_i=low2; _i<high2; _i++) {                       \
478       if (rp2[_i] > col) break;                           \
479       if (rp2[_i] == col) {                               \
480         if (addv == ADD_VALUES) {                         \
481           ap2[_i] += value;                               \
482           (void)PetscLogFlops(1.0);                       \
483         }                                                 \
484         else                    ap2[_i] = value;          \
485         goto b_noinsert;                                  \
486       }                                                   \
487     }                                                     \
488     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
489     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
490     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
491     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
492     N = nrow2++ - 1; b->nz++; high2++;                    \
493     /* shift up all the later entries in this row */      \
494     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
495     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
496     rp2[_i] = col;                                        \
497     ap2[_i] = value;                                      \
498     B->nonzerostate++;                                    \
499     b_noinsert: ;                                         \
500     bilen[row] = nrow2;                                   \
501   }
502 
503 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
504 {
505   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
506   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
507   PetscErrorCode ierr;
508   PetscInt       l,*garray = mat->garray,diag;
509 
510   PetscFunctionBegin;
511   /* code only works for square matrices A */
512 
513   /* find size of row to the left of the diagonal part */
514   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
515   row  = row - diag;
516   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
517     if (garray[b->j[b->i[row]+l]] > diag) break;
518   }
519   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
520 
521   /* diagonal part */
522   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
523 
524   /* right of diagonal part */
525   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
526   PetscFunctionReturn(0);
527 }
528 
529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
530 {
531   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
532   PetscScalar    value;
533   PetscErrorCode ierr;
534   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
535   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
536   PetscBool      roworiented = aij->roworiented;
537 
538   /* Some Variables required in the macro */
539   Mat        A                 = aij->A;
540   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
541   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
542   MatScalar  *aa               = a->a;
543   PetscBool  ignorezeroentries = a->ignorezeroentries;
544   Mat        B                 = aij->B;
545   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
546   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
547   MatScalar  *ba               = b->a;
548 
549   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
550   PetscInt  nonew;
551   MatScalar *ap1,*ap2;
552 
553   PetscFunctionBegin;
554   for (i=0; i<m; i++) {
555     if (im[i] < 0) continue;
556 #if defined(PETSC_USE_DEBUG)
557     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
558 #endif
559     if (im[i] >= rstart && im[i] < rend) {
560       row      = im[i] - rstart;
561       lastcol1 = -1;
562       rp1      = aj + ai[row];
563       ap1      = aa + ai[row];
564       rmax1    = aimax[row];
565       nrow1    = ailen[row];
566       low1     = 0;
567       high1    = nrow1;
568       lastcol2 = -1;
569       rp2      = bj + bi[row];
570       ap2      = ba + bi[row];
571       rmax2    = bimax[row];
572       nrow2    = bilen[row];
573       low2     = 0;
574       high2    = nrow2;
575 
576       for (j=0; j<n; j++) {
577         if (roworiented) value = v[i*n+j];
578         else             value = v[i+j*m];
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
583           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
584         } else if (in[j] < 0) continue;
585 #if defined(PETSC_USE_DEBUG)
586         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
587 #endif
588         else {
589           if (mat->was_assembled) {
590             if (!aij->colmap) {
591               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
592             }
593 #if defined(PETSC_USE_CTABLE)
594             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
595             col--;
596 #else
597             col = aij->colmap[in[j]] - 1;
598 #endif
599             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
600               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
601               col  =  in[j];
602               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
603               B     = aij->B;
604               b     = (Mat_SeqAIJ*)B->data;
605               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
606               rp2   = bj + bi[row];
607               ap2   = ba + bi[row];
608               rmax2 = bimax[row];
609               nrow2 = bilen[row];
610               low2  = 0;
611               high2 = nrow2;
612               bm    = aij->B->rmap->n;
613               ba    = b->a;
614             } else if (col < 0) {
615               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
616                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
617               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
618             }
619           } else col = in[j];
620           nonew = b->nonew;
621           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
622         }
623       }
624     } else {
625       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
626       if (!aij->donotstash) {
627         mat->assembled = PETSC_FALSE;
628         if (roworiented) {
629           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
630         } else {
631           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
632         }
633       }
634     }
635   }
636   PetscFunctionReturn(0);
637 }
638 
639 /*
640     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
641     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
642     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
643 */
644 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
645 {
646   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
647   Mat            A           = aij->A; /* diagonal part of the matrix */
648   Mat            B           = aij->B; /* offdiagonal part of the matrix */
649   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
650   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
651   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
652   PetscInt       *ailen      = a->ilen,*aj = a->j;
653   PetscInt       *bilen      = b->ilen,*bj = b->j;
654   PetscInt       am          = aij->A->rmap->n,j;
655   PetscInt       diag_so_far = 0,dnz;
656   PetscInt       offd_so_far = 0,onz;
657 
658   PetscFunctionBegin;
659   /* Iterate over all rows of the matrix */
660   for (j=0; j<am; j++) {
661     dnz = onz = 0;
662     /*  Iterate over all non-zero columns of the current row */
663     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
664       /* If column is in the diagonal */
665       if (mat_j[col] >= cstart && mat_j[col] < cend) {
666         aj[diag_so_far++] = mat_j[col] - cstart;
667         dnz++;
668       } else { /* off-diagonal entries */
669         bj[offd_so_far++] = mat_j[col];
670         onz++;
671       }
672     }
673     ailen[j] = dnz;
674     bilen[j] = onz;
675   }
676   PetscFunctionReturn(0);
677 }
678 
679 /*
680     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
681     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
682     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
683     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
684     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
685 */
686 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
687 {
688   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
689   Mat            A      = aij->A; /* diagonal part of the matrix */
690   Mat            B      = aij->B; /* offdiagonal part of the matrix */
691   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
692   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
693   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
694   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
695   PetscInt       *ailen = a->ilen,*aj = a->j;
696   PetscInt       *bilen = b->ilen,*bj = b->j;
697   PetscInt       am     = aij->A->rmap->n,j;
698   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
699   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
700   PetscScalar    *aa = a->a,*ba = b->a;
701 
702   PetscFunctionBegin;
703   /* Iterate over all rows of the matrix */
704   for (j=0; j<am; j++) {
705     dnz_row = onz_row = 0;
706     rowstart_offd = full_offd_i[j];
707     rowstart_diag = full_diag_i[j];
708     /*  Iterate over all non-zero columns of the current row */
709     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
710       /* If column is in the diagonal */
711       if (mat_j[col] >= cstart && mat_j[col] < cend) {
712         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
713         aa[rowstart_diag+dnz_row] = mat_a[col];
714         dnz_row++;
715       } else { /* off-diagonal entries */
716         bj[rowstart_offd+onz_row] = mat_j[col];
717         ba[rowstart_offd+onz_row] = mat_a[col];
718         onz_row++;
719       }
720     }
721     ailen[j] = dnz_row;
722     bilen[j] = onz_row;
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
728 {
729   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
730   PetscErrorCode ierr;
731   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
732   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
733 
734   PetscFunctionBegin;
735   for (i=0; i<m; i++) {
736     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
737     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
738     if (idxm[i] >= rstart && idxm[i] < rend) {
739       row = idxm[i] - rstart;
740       for (j=0; j<n; j++) {
741         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
742         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
743         if (idxn[j] >= cstart && idxn[j] < cend) {
744           col  = idxn[j] - cstart;
745           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
746         } else {
747           if (!aij->colmap) {
748             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
749           }
750 #if defined(PETSC_USE_CTABLE)
751           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
752           col--;
753 #else
754           col = aij->colmap[idxn[j]] - 1;
755 #endif
756           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
757           else {
758             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
759           }
760         }
761       }
762     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
763   }
764   PetscFunctionReturn(0);
765 }
766 
767 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
768 
769 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
770 {
771   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
772   PetscErrorCode ierr;
773   PetscInt       nstash,reallocs;
774 
775   PetscFunctionBegin;
776   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
777 
778   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
779   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
780   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
781   PetscFunctionReturn(0);
782 }
783 
784 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
785 {
786   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
787   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
788   PetscErrorCode ierr;
789   PetscMPIInt    n;
790   PetscInt       i,j,rstart,ncols,flg;
791   PetscInt       *row,*col;
792   PetscBool      other_disassembled;
793   PetscScalar    *val;
794 
795   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
796 
797   PetscFunctionBegin;
798   if (!aij->donotstash && !mat->nooffprocentries) {
799     while (1) {
800       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
801       if (!flg) break;
802 
803       for (i=0; i<n; ) {
804         /* Now identify the consecutive vals belonging to the same row */
805         for (j=i,rstart=row[j]; j<n; j++) {
806           if (row[j] != rstart) break;
807         }
808         if (j < n) ncols = j-i;
809         else       ncols = n-i;
810         /* Now assemble all these values with a single function call */
811         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
812 
813         i = j;
814       }
815     }
816     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
817   }
818   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
819   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
820 
821   /* determine if any processor has disassembled, if so we must
822      also disassemble ourselfs, in order that we may reassemble. */
823   /*
824      if nonzero structure of submatrix B cannot change then we know that
825      no processor disassembled thus we can skip this stuff
826   */
827   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
828     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
829     if (mat->was_assembled && !other_disassembled) {
830       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
831     }
832   }
833   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
834     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
835   }
836   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
837   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
838   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
839 
840   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
841 
842   aij->rowvalues = 0;
843 
844   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
845   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
846 
847   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
848   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
849     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
850     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
851   }
852   PetscFunctionReturn(0);
853 }
854 
855 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
856 {
857   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
858   PetscErrorCode ierr;
859 
860   PetscFunctionBegin;
861   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
862   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
867 {
868   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
869   PetscObjectState sA, sB;
870   PetscInt        *lrows;
871   PetscInt         r, len;
872   PetscBool        cong, lch, gch;
873   PetscErrorCode   ierr;
874 
875   PetscFunctionBegin;
876   /* get locally owned rows */
877   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
878   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
879   /* fix right hand side if needed */
880   if (x && b) {
881     const PetscScalar *xx;
882     PetscScalar       *bb;
883 
884     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
885     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
886     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
887     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
888     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
889     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
890   }
891 
892   sA = mat->A->nonzerostate;
893   sB = mat->B->nonzerostate;
894 
895   if (diag != 0.0 && cong) {
896     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
897     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
898   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
899     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
900     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
901     PetscInt   nnwA, nnwB;
902     PetscBool  nnzA, nnzB;
903 
904     nnwA = aijA->nonew;
905     nnwB = aijB->nonew;
906     nnzA = aijA->keepnonzeropattern;
907     nnzB = aijB->keepnonzeropattern;
908     if (!nnzA) {
909       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
910       aijA->nonew = 0;
911     }
912     if (!nnzB) {
913       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
914       aijB->nonew = 0;
915     }
916     /* Must zero here before the next loop */
917     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
918     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
919     for (r = 0; r < len; ++r) {
920       const PetscInt row = lrows[r] + A->rmap->rstart;
921       if (row >= A->cmap->N) continue;
922       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
923     }
924     aijA->nonew = nnwA;
925     aijB->nonew = nnwB;
926   } else {
927     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
928     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
929   }
930   ierr = PetscFree(lrows);CHKERRQ(ierr);
931   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
932   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
933 
934   /* reduce nonzerostate */
935   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
936   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
937   if (gch) A->nonzerostate++;
938   PetscFunctionReturn(0);
939 }
940 
941 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
942 {
943   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
944   PetscErrorCode    ierr;
945   PetscMPIInt       n = A->rmap->n;
946   PetscInt          i,j,r,m,p = 0,len = 0;
947   PetscInt          *lrows,*owners = A->rmap->range;
948   PetscSFNode       *rrows;
949   PetscSF           sf;
950   const PetscScalar *xx;
951   PetscScalar       *bb,*mask;
952   Vec               xmask,lmask;
953   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
954   const PetscInt    *aj, *ii,*ridx;
955   PetscScalar       *aa;
956 
957   PetscFunctionBegin;
958   /* Create SF where leaves are input rows and roots are owned rows */
959   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
960   for (r = 0; r < n; ++r) lrows[r] = -1;
961   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
962   for (r = 0; r < N; ++r) {
963     const PetscInt idx   = rows[r];
964     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
965     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
966       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
967     }
968     rrows[r].rank  = p;
969     rrows[r].index = rows[r] - owners[p];
970   }
971   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
972   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
973   /* Collect flags for rows to be zeroed */
974   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
975   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
976   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
977   /* Compress and put in row numbers */
978   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
979   /* zero diagonal part of matrix */
980   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
981   /* handle off diagonal part of matrix */
982   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
983   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
984   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
985   for (i=0; i<len; i++) bb[lrows[i]] = 1;
986   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
987   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
988   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
989   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
990   if (x && b) { /* this code is buggy when the row and column layout don't match */
991     PetscBool cong;
992 
993     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
994     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
995     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
996     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
997     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
998     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
999   }
1000   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1001   /* remove zeroed rows of off diagonal matrix */
1002   ii = aij->i;
1003   for (i=0; i<len; i++) {
1004     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1005   }
1006   /* loop over all elements of off process part of matrix zeroing removed columns*/
1007   if (aij->compressedrow.use) {
1008     m    = aij->compressedrow.nrows;
1009     ii   = aij->compressedrow.i;
1010     ridx = aij->compressedrow.rindex;
1011     for (i=0; i<m; i++) {
1012       n  = ii[i+1] - ii[i];
1013       aj = aij->j + ii[i];
1014       aa = aij->a + ii[i];
1015 
1016       for (j=0; j<n; j++) {
1017         if (PetscAbsScalar(mask[*aj])) {
1018           if (b) bb[*ridx] -= *aa*xx[*aj];
1019           *aa = 0.0;
1020         }
1021         aa++;
1022         aj++;
1023       }
1024       ridx++;
1025     }
1026   } else { /* do not use compressed row format */
1027     m = l->B->rmap->n;
1028     for (i=0; i<m; i++) {
1029       n  = ii[i+1] - ii[i];
1030       aj = aij->j + ii[i];
1031       aa = aij->a + ii[i];
1032       for (j=0; j<n; j++) {
1033         if (PetscAbsScalar(mask[*aj])) {
1034           if (b) bb[i] -= *aa*xx[*aj];
1035           *aa = 0.0;
1036         }
1037         aa++;
1038         aj++;
1039       }
1040     }
1041   }
1042   if (x && b) {
1043     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1044     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1045   }
1046   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1047   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1048   ierr = PetscFree(lrows);CHKERRQ(ierr);
1049 
1050   /* only change matrix nonzero state if pattern was allowed to be changed */
1051   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1052     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1053     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1054   }
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1059 {
1060   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1061   PetscErrorCode ierr;
1062   PetscInt       nt;
1063   VecScatter     Mvctx = a->Mvctx;
1064 
1065   PetscFunctionBegin;
1066   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1067   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1068 
1069   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1070   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1071   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1072   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1077 {
1078   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1079   PetscErrorCode ierr;
1080 
1081   PetscFunctionBegin;
1082   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1087 {
1088   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1089   PetscErrorCode ierr;
1090   VecScatter     Mvctx = a->Mvctx;
1091 
1092   PetscFunctionBegin;
1093   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1094   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1095   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1096   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1097   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1102 {
1103   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1104   PetscErrorCode ierr;
1105 
1106   PetscFunctionBegin;
1107   /* do nondiagonal part */
1108   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1109   /* do local part */
1110   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1111   /* add partial results together */
1112   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1118 {
1119   MPI_Comm       comm;
1120   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1121   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1122   IS             Me,Notme;
1123   PetscErrorCode ierr;
1124   PetscInt       M,N,first,last,*notme,i;
1125   PetscBool      lf;
1126   PetscMPIInt    size;
1127 
1128   PetscFunctionBegin;
1129   /* Easy test: symmetric diagonal block */
1130   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1131   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1132   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1133   if (!*f) PetscFunctionReturn(0);
1134   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1135   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1136   if (size == 1) PetscFunctionReturn(0);
1137 
1138   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1139   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1140   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1141   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1142   for (i=0; i<first; i++) notme[i] = i;
1143   for (i=last; i<M; i++) notme[i-last+first] = i;
1144   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1145   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1146   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1147   Aoff = Aoffs[0];
1148   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1149   Boff = Boffs[0];
1150   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1151   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1152   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1153   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1154   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1155   ierr = PetscFree(notme);CHKERRQ(ierr);
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1160 {
1161   PetscErrorCode ierr;
1162 
1163   PetscFunctionBegin;
1164   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1169 {
1170   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1171   PetscErrorCode ierr;
1172 
1173   PetscFunctionBegin;
1174   /* do nondiagonal part */
1175   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1176   /* do local part */
1177   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1178   /* add partial results together */
1179   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1180   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 /*
1185   This only works correctly for square matrices where the subblock A->A is the
1186    diagonal block
1187 */
1188 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1189 {
1190   PetscErrorCode ierr;
1191   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1192 
1193   PetscFunctionBegin;
1194   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1195   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1196   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1201 {
1202   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1203   PetscErrorCode ierr;
1204 
1205   PetscFunctionBegin;
1206   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1207   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1212 {
1213   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1214   PetscErrorCode ierr;
1215 
1216   PetscFunctionBegin;
1217 #if defined(PETSC_USE_LOG)
1218   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1219 #endif
1220   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1221   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1222   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1223   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1224 #if defined(PETSC_USE_CTABLE)
1225   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1226 #else
1227   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1228 #endif
1229   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1230   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1231   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1232   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1233   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1234   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1235   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1236 
1237   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1238   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1239   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1240   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1241   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1242   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1243   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1244   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1245   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1246 #if defined(PETSC_HAVE_ELEMENTAL)
1247   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1248 #endif
1249 #if defined(PETSC_HAVE_HYPRE)
1250   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1251   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1252 #endif
1253   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1254   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1259 {
1260   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1261   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1262   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1263   PetscErrorCode ierr;
1264   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1265   int            fd;
1266   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1267   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1268   PetscScalar    *column_values;
1269   PetscInt       message_count,flowcontrolcount;
1270   FILE           *file;
1271 
1272   PetscFunctionBegin;
1273   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1274   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1275   nz   = A->nz + B->nz;
1276   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1277   if (!rank) {
1278     header[0] = MAT_FILE_CLASSID;
1279     header[1] = mat->rmap->N;
1280     header[2] = mat->cmap->N;
1281 
1282     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1283     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1284     /* get largest number of rows any processor has */
1285     rlen  = mat->rmap->n;
1286     range = mat->rmap->range;
1287     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1288   } else {
1289     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1290     rlen = mat->rmap->n;
1291   }
1292 
1293   /* load up the local row counts */
1294   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1295   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1296 
1297   /* store the row lengths to the file */
1298   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1299   if (!rank) {
1300     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1301     for (i=1; i<size; i++) {
1302       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1303       rlen = range[i+1] - range[i];
1304       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1305       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1306     }
1307     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1308   } else {
1309     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1310     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1311     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1312   }
1313   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1314 
1315   /* load up the local column indices */
1316   nzmax = nz; /* th processor needs space a largest processor needs */
1317   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1318   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1319   cnt   = 0;
1320   for (i=0; i<mat->rmap->n; i++) {
1321     for (j=B->i[i]; j<B->i[i+1]; j++) {
1322       if ((col = garray[B->j[j]]) > cstart) break;
1323       column_indices[cnt++] = col;
1324     }
1325     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1326     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1327   }
1328   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1329 
1330   /* store the column indices to the file */
1331   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1332   if (!rank) {
1333     MPI_Status status;
1334     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     for (i=1; i<size; i++) {
1336       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1337       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1338       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1339       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1340       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1341     }
1342     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1343   } else {
1344     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1345     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1346     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1347     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1348   }
1349   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1350 
1351   /* load up the local column values */
1352   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1353   cnt  = 0;
1354   for (i=0; i<mat->rmap->n; i++) {
1355     for (j=B->i[i]; j<B->i[i+1]; j++) {
1356       if (garray[B->j[j]] > cstart) break;
1357       column_values[cnt++] = B->a[j];
1358     }
1359     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1360     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1361   }
1362   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1363 
1364   /* store the column values to the file */
1365   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1366   if (!rank) {
1367     MPI_Status status;
1368     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1369     for (i=1; i<size; i++) {
1370       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1371       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1372       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1373       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1374       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1375     }
1376     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1377   } else {
1378     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1379     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1380     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1381     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1382   }
1383   ierr = PetscFree(column_values);CHKERRQ(ierr);
1384 
1385   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1386   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1387   PetscFunctionReturn(0);
1388 }
1389 
1390 #include <petscdraw.h>
1391 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1392 {
1393   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1394   PetscErrorCode    ierr;
1395   PetscMPIInt       rank = aij->rank,size = aij->size;
1396   PetscBool         isdraw,iascii,isbinary;
1397   PetscViewer       sviewer;
1398   PetscViewerFormat format;
1399 
1400   PetscFunctionBegin;
1401   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1402   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1403   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1404   if (iascii) {
1405     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1406     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1407       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1408       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1409       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1410       for (i=0; i<(PetscInt)size; i++) {
1411         nmax = PetscMax(nmax,nz[i]);
1412         nmin = PetscMin(nmin,nz[i]);
1413         navg += nz[i];
1414       }
1415       ierr = PetscFree(nz);CHKERRQ(ierr);
1416       navg = navg/size;
1417       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1418       PetscFunctionReturn(0);
1419     }
1420     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1421     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1422       MatInfo   info;
1423       PetscBool inodes;
1424 
1425       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1426       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1427       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1428       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1429       if (!inodes) {
1430         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1431                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1432       } else {
1433         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1434                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1435       }
1436       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1437       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1438       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1439       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1440       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1441       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1442       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1443       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1444       PetscFunctionReturn(0);
1445     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1446       PetscInt inodecount,inodelimit,*inodes;
1447       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1448       if (inodes) {
1449         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1450       } else {
1451         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1452       }
1453       PetscFunctionReturn(0);
1454     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1455       PetscFunctionReturn(0);
1456     }
1457   } else if (isbinary) {
1458     if (size == 1) {
1459       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1460       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1461     } else {
1462       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1463     }
1464     PetscFunctionReturn(0);
1465   } else if (iascii && size == 1) {
1466     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1467     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1468     PetscFunctionReturn(0);
1469   } else if (isdraw) {
1470     PetscDraw draw;
1471     PetscBool isnull;
1472     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1473     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1474     if (isnull) PetscFunctionReturn(0);
1475   }
1476 
1477   { /* assemble the entire matrix onto first processor */
1478     Mat A = NULL, Av;
1479     IS  isrow,iscol;
1480 
1481     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1482     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1483     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1484     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1485 /*  The commented code uses MatCreateSubMatrices instead */
1486 /*
1487     Mat *AA, A = NULL, Av;
1488     IS  isrow,iscol;
1489 
1490     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1491     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1492     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1493     if (!rank) {
1494        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1495        A    = AA[0];
1496        Av   = AA[0];
1497     }
1498     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1499 */
1500     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1501     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1502     /*
1503        Everyone has to call to draw the matrix since the graphics waits are
1504        synchronized across all processors that share the PetscDraw object
1505     */
1506     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1507     if (!rank) {
1508       if (((PetscObject)mat)->name) {
1509         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1510       }
1511       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1512     }
1513     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1514     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1515     ierr = MatDestroy(&A);CHKERRQ(ierr);
1516   }
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1521 {
1522   PetscErrorCode ierr;
1523   PetscBool      iascii,isdraw,issocket,isbinary;
1524 
1525   PetscFunctionBegin;
1526   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1527   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1528   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1529   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1530   if (iascii || isdraw || isbinary || issocket) {
1531     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1532   }
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1537 {
1538   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1539   PetscErrorCode ierr;
1540   Vec            bb1 = 0;
1541   PetscBool      hasop;
1542 
1543   PetscFunctionBegin;
1544   if (flag == SOR_APPLY_UPPER) {
1545     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1546     PetscFunctionReturn(0);
1547   }
1548 
1549   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1550     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1551   }
1552 
1553   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1554     if (flag & SOR_ZERO_INITIAL_GUESS) {
1555       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1556       its--;
1557     }
1558 
1559     while (its--) {
1560       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1561       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1562 
1563       /* update rhs: bb1 = bb - B*x */
1564       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1565       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1566 
1567       /* local sweep */
1568       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1569     }
1570   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1571     if (flag & SOR_ZERO_INITIAL_GUESS) {
1572       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1573       its--;
1574     }
1575     while (its--) {
1576       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1577       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1578 
1579       /* update rhs: bb1 = bb - B*x */
1580       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1581       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1582 
1583       /* local sweep */
1584       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1585     }
1586   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1587     if (flag & SOR_ZERO_INITIAL_GUESS) {
1588       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1589       its--;
1590     }
1591     while (its--) {
1592       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1593       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1594 
1595       /* update rhs: bb1 = bb - B*x */
1596       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1597       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1598 
1599       /* local sweep */
1600       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1601     }
1602   } else if (flag & SOR_EISENSTAT) {
1603     Vec xx1;
1604 
1605     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1606     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1607 
1608     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1609     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1610     if (!mat->diag) {
1611       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1612       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1613     }
1614     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1615     if (hasop) {
1616       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1617     } else {
1618       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1619     }
1620     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1621 
1622     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1623 
1624     /* local sweep */
1625     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1626     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1627     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1628   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1629 
1630   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1631 
1632   matin->factorerrortype = mat->A->factorerrortype;
1633   PetscFunctionReturn(0);
1634 }
1635 
1636 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1637 {
1638   Mat            aA,aB,Aperm;
1639   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1640   PetscScalar    *aa,*ba;
1641   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1642   PetscSF        rowsf,sf;
1643   IS             parcolp = NULL;
1644   PetscBool      done;
1645   PetscErrorCode ierr;
1646 
1647   PetscFunctionBegin;
1648   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1649   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1650   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1651   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1652 
1653   /* Invert row permutation to find out where my rows should go */
1654   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1655   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1656   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1657   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1658   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1659   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1660 
1661   /* Invert column permutation to find out where my columns should go */
1662   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1663   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1664   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1665   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1666   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1667   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1668   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1669 
1670   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1671   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1672   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1673 
1674   /* Find out where my gcols should go */
1675   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1676   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1677   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1678   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1679   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1680   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1681   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1682   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1683 
1684   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1685   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1686   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1687   for (i=0; i<m; i++) {
1688     PetscInt row = rdest[i],rowner;
1689     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1690     for (j=ai[i]; j<ai[i+1]; j++) {
1691       PetscInt cowner,col = cdest[aj[j]];
1692       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1693       if (rowner == cowner) dnnz[i]++;
1694       else onnz[i]++;
1695     }
1696     for (j=bi[i]; j<bi[i+1]; j++) {
1697       PetscInt cowner,col = gcdest[bj[j]];
1698       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1699       if (rowner == cowner) dnnz[i]++;
1700       else onnz[i]++;
1701     }
1702   }
1703   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1704   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1705   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1706   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1707   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1708 
1709   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1710   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1711   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1712   for (i=0; i<m; i++) {
1713     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1714     PetscInt j0,rowlen;
1715     rowlen = ai[i+1] - ai[i];
1716     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1717       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1718       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1719     }
1720     rowlen = bi[i+1] - bi[i];
1721     for (j0=j=0; j<rowlen; j0=j) {
1722       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1723       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1724     }
1725   }
1726   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1727   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1728   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1729   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1730   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1731   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1732   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1733   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1734   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1735   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1736   *B = Aperm;
1737   PetscFunctionReturn(0);
1738 }
1739 
1740 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1741 {
1742   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1743   PetscErrorCode ierr;
1744 
1745   PetscFunctionBegin;
1746   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1747   if (ghosts) *ghosts = aij->garray;
1748   PetscFunctionReturn(0);
1749 }
1750 
1751 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1752 {
1753   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1754   Mat            A    = mat->A,B = mat->B;
1755   PetscErrorCode ierr;
1756   PetscReal      isend[5],irecv[5];
1757 
1758   PetscFunctionBegin;
1759   info->block_size = 1.0;
1760   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1761 
1762   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1763   isend[3] = info->memory;  isend[4] = info->mallocs;
1764 
1765   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1766 
1767   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1768   isend[3] += info->memory;  isend[4] += info->mallocs;
1769   if (flag == MAT_LOCAL) {
1770     info->nz_used      = isend[0];
1771     info->nz_allocated = isend[1];
1772     info->nz_unneeded  = isend[2];
1773     info->memory       = isend[3];
1774     info->mallocs      = isend[4];
1775   } else if (flag == MAT_GLOBAL_MAX) {
1776     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1777 
1778     info->nz_used      = irecv[0];
1779     info->nz_allocated = irecv[1];
1780     info->nz_unneeded  = irecv[2];
1781     info->memory       = irecv[3];
1782     info->mallocs      = irecv[4];
1783   } else if (flag == MAT_GLOBAL_SUM) {
1784     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1785 
1786     info->nz_used      = irecv[0];
1787     info->nz_allocated = irecv[1];
1788     info->nz_unneeded  = irecv[2];
1789     info->memory       = irecv[3];
1790     info->mallocs      = irecv[4];
1791   }
1792   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1793   info->fill_ratio_needed = 0;
1794   info->factor_mallocs    = 0;
1795   PetscFunctionReturn(0);
1796 }
1797 
1798 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1799 {
1800   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1801   PetscErrorCode ierr;
1802 
1803   PetscFunctionBegin;
1804   switch (op) {
1805   case MAT_NEW_NONZERO_LOCATIONS:
1806   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1807   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1808   case MAT_KEEP_NONZERO_PATTERN:
1809   case MAT_NEW_NONZERO_LOCATION_ERR:
1810   case MAT_USE_INODES:
1811   case MAT_IGNORE_ZERO_ENTRIES:
1812     MatCheckPreallocated(A,1);
1813     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1814     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1815     break;
1816   case MAT_ROW_ORIENTED:
1817     MatCheckPreallocated(A,1);
1818     a->roworiented = flg;
1819 
1820     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1821     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1822     break;
1823   case MAT_NEW_DIAGONALS:
1824     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1825     break;
1826   case MAT_IGNORE_OFF_PROC_ENTRIES:
1827     a->donotstash = flg;
1828     break;
1829   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1830   case MAT_SPD:
1831   case MAT_SYMMETRIC:
1832   case MAT_STRUCTURALLY_SYMMETRIC:
1833   case MAT_HERMITIAN:
1834   case MAT_SYMMETRY_ETERNAL:
1835     break;
1836   case MAT_SUBMAT_SINGLEIS:
1837     A->submat_singleis = flg;
1838     break;
1839   case MAT_STRUCTURE_ONLY:
1840     /* The option is handled directly by MatSetOption() */
1841     break;
1842   default:
1843     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1844   }
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1849 {
1850   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1851   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1852   PetscErrorCode ierr;
1853   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1854   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1855   PetscInt       *cmap,*idx_p;
1856 
1857   PetscFunctionBegin;
1858   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1859   mat->getrowactive = PETSC_TRUE;
1860 
1861   if (!mat->rowvalues && (idx || v)) {
1862     /*
1863         allocate enough space to hold information from the longest row.
1864     */
1865     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1866     PetscInt   max = 1,tmp;
1867     for (i=0; i<matin->rmap->n; i++) {
1868       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1869       if (max < tmp) max = tmp;
1870     }
1871     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1872   }
1873 
1874   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1875   lrow = row - rstart;
1876 
1877   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1878   if (!v)   {pvA = 0; pvB = 0;}
1879   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1880   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1881   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1882   nztot = nzA + nzB;
1883 
1884   cmap = mat->garray;
1885   if (v  || idx) {
1886     if (nztot) {
1887       /* Sort by increasing column numbers, assuming A and B already sorted */
1888       PetscInt imark = -1;
1889       if (v) {
1890         *v = v_p = mat->rowvalues;
1891         for (i=0; i<nzB; i++) {
1892           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1893           else break;
1894         }
1895         imark = i;
1896         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1897         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1898       }
1899       if (idx) {
1900         *idx = idx_p = mat->rowindices;
1901         if (imark > -1) {
1902           for (i=0; i<imark; i++) {
1903             idx_p[i] = cmap[cworkB[i]];
1904           }
1905         } else {
1906           for (i=0; i<nzB; i++) {
1907             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1908             else break;
1909           }
1910           imark = i;
1911         }
1912         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1913         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1914       }
1915     } else {
1916       if (idx) *idx = 0;
1917       if (v)   *v   = 0;
1918     }
1919   }
1920   *nz  = nztot;
1921   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1922   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1923   PetscFunctionReturn(0);
1924 }
1925 
1926 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1927 {
1928   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1929 
1930   PetscFunctionBegin;
1931   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1932   aij->getrowactive = PETSC_FALSE;
1933   PetscFunctionReturn(0);
1934 }
1935 
1936 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1937 {
1938   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1939   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1940   PetscErrorCode ierr;
1941   PetscInt       i,j,cstart = mat->cmap->rstart;
1942   PetscReal      sum = 0.0;
1943   MatScalar      *v;
1944 
1945   PetscFunctionBegin;
1946   if (aij->size == 1) {
1947     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1948   } else {
1949     if (type == NORM_FROBENIUS) {
1950       v = amat->a;
1951       for (i=0; i<amat->nz; i++) {
1952         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1953       }
1954       v = bmat->a;
1955       for (i=0; i<bmat->nz; i++) {
1956         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1957       }
1958       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1959       *norm = PetscSqrtReal(*norm);
1960       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1961     } else if (type == NORM_1) { /* max column norm */
1962       PetscReal *tmp,*tmp2;
1963       PetscInt  *jj,*garray = aij->garray;
1964       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1965       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1966       *norm = 0.0;
1967       v     = amat->a; jj = amat->j;
1968       for (j=0; j<amat->nz; j++) {
1969         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1970       }
1971       v = bmat->a; jj = bmat->j;
1972       for (j=0; j<bmat->nz; j++) {
1973         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1974       }
1975       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1976       for (j=0; j<mat->cmap->N; j++) {
1977         if (tmp2[j] > *norm) *norm = tmp2[j];
1978       }
1979       ierr = PetscFree(tmp);CHKERRQ(ierr);
1980       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1981       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1982     } else if (type == NORM_INFINITY) { /* max row norm */
1983       PetscReal ntemp = 0.0;
1984       for (j=0; j<aij->A->rmap->n; j++) {
1985         v   = amat->a + amat->i[j];
1986         sum = 0.0;
1987         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1988           sum += PetscAbsScalar(*v); v++;
1989         }
1990         v = bmat->a + bmat->i[j];
1991         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1992           sum += PetscAbsScalar(*v); v++;
1993         }
1994         if (sum > ntemp) ntemp = sum;
1995       }
1996       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1997       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1998     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1999   }
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2004 {
2005   Mat_MPIAIJ     *a    =(Mat_MPIAIJ*)A->data,*b;
2006   Mat_SeqAIJ     *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2007   PetscInt       M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol;
2008   PetscErrorCode ierr;
2009   Mat            B,A_diag,*B_diag;
2010   MatScalar      *array;
2011 
2012   PetscFunctionBegin;
2013   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2014   ai = Aloc->i; aj = Aloc->j;
2015   bi = Bloc->i; bj = Bloc->j;
2016   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2017     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2018     PetscSFNode          *oloc;
2019     PETSC_UNUSED PetscSF sf;
2020 
2021     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2022     /* compute d_nnz for preallocation */
2023     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2024     for (i=0; i<ai[ma]; i++) {
2025       d_nnz[aj[i]]++;
2026     }
2027     /* compute local off-diagonal contributions */
2028     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2029     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2030     /* map those to global */
2031     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2032     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2033     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2034     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2035     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2036     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2037     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2038 
2039     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2040     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2041     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2042     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2043     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2044     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2045   } else {
2046     B    = *matout;
2047     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2048   }
2049 
2050   b           = (Mat_MPIAIJ*)B->data;
2051   A_diag      = a->A;
2052   B_diag      = &b->A;
2053   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2054   A_diag_ncol = A_diag->cmap->N;
2055   B_diag_ilen = sub_B_diag->ilen;
2056   B_diag_i    = sub_B_diag->i;
2057 
2058   /* Set ilen for diagonal of B */
2059   for (i=0; i<A_diag_ncol; i++) {
2060     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2061   }
2062 
2063   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2064   very quickly (=without using MatSetValues), because all writes are local. */
2065   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2066 
2067   /* copy over the B part */
2068   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2069   array = Bloc->a;
2070   row   = A->rmap->rstart;
2071   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2072   cols_tmp = cols;
2073   for (i=0; i<mb; i++) {
2074     ncol = bi[i+1]-bi[i];
2075     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2076     row++;
2077     array += ncol; cols_tmp += ncol;
2078   }
2079   ierr = PetscFree(cols);CHKERRQ(ierr);
2080 
2081   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2082   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2083   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2084     *matout = B;
2085   } else {
2086     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2087   }
2088   PetscFunctionReturn(0);
2089 }
2090 
2091 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2092 {
2093   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2094   Mat            a    = aij->A,b = aij->B;
2095   PetscErrorCode ierr;
2096   PetscInt       s1,s2,s3;
2097 
2098   PetscFunctionBegin;
2099   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2100   if (rr) {
2101     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2102     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2103     /* Overlap communication with computation. */
2104     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2105   }
2106   if (ll) {
2107     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2108     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2109     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2110   }
2111   /* scale  the diagonal block */
2112   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2113 
2114   if (rr) {
2115     /* Do a scatter end and then right scale the off-diagonal block */
2116     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2117     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2118   }
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2123 {
2124   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2125   PetscErrorCode ierr;
2126 
2127   PetscFunctionBegin;
2128   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2133 {
2134   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2135   Mat            a,b,c,d;
2136   PetscBool      flg;
2137   PetscErrorCode ierr;
2138 
2139   PetscFunctionBegin;
2140   a = matA->A; b = matA->B;
2141   c = matB->A; d = matB->B;
2142 
2143   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2144   if (flg) {
2145     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2146   }
2147   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2152 {
2153   PetscErrorCode ierr;
2154   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2155   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2156 
2157   PetscFunctionBegin;
2158   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2159   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2160     /* because of the column compression in the off-processor part of the matrix a->B,
2161        the number of columns in a->B and b->B may be different, hence we cannot call
2162        the MatCopy() directly on the two parts. If need be, we can provide a more
2163        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2164        then copying the submatrices */
2165     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2166   } else {
2167     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2168     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2169   }
2170   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2175 {
2176   PetscErrorCode ierr;
2177 
2178   PetscFunctionBegin;
2179   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2180   PetscFunctionReturn(0);
2181 }
2182 
2183 /*
2184    Computes the number of nonzeros per row needed for preallocation when X and Y
2185    have different nonzero structure.
2186 */
2187 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2188 {
2189   PetscInt       i,j,k,nzx,nzy;
2190 
2191   PetscFunctionBegin;
2192   /* Set the number of nonzeros in the new matrix */
2193   for (i=0; i<m; i++) {
2194     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2195     nzx = xi[i+1] - xi[i];
2196     nzy = yi[i+1] - yi[i];
2197     nnz[i] = 0;
2198     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2199       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2200       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2201       nnz[i]++;
2202     }
2203     for (; k<nzy; k++) nnz[i]++;
2204   }
2205   PetscFunctionReturn(0);
2206 }
2207 
2208 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2209 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2210 {
2211   PetscErrorCode ierr;
2212   PetscInt       m = Y->rmap->N;
2213   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2214   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2215 
2216   PetscFunctionBegin;
2217   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2222 {
2223   PetscErrorCode ierr;
2224   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2225   PetscBLASInt   bnz,one=1;
2226   Mat_SeqAIJ     *x,*y;
2227 
2228   PetscFunctionBegin;
2229   if (str == SAME_NONZERO_PATTERN) {
2230     PetscScalar alpha = a;
2231     x    = (Mat_SeqAIJ*)xx->A->data;
2232     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2233     y    = (Mat_SeqAIJ*)yy->A->data;
2234     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2235     x    = (Mat_SeqAIJ*)xx->B->data;
2236     y    = (Mat_SeqAIJ*)yy->B->data;
2237     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2238     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2239     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2240   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2241     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2242   } else {
2243     Mat      B;
2244     PetscInt *nnz_d,*nnz_o;
2245     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2246     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2247     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2248     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2249     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2250     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2251     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2252     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2253     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2254     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2255     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2256     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2257     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2258     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2259   }
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2264 
2265 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2266 {
2267 #if defined(PETSC_USE_COMPLEX)
2268   PetscErrorCode ierr;
2269   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2270 
2271   PetscFunctionBegin;
2272   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2273   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2274 #else
2275   PetscFunctionBegin;
2276 #endif
2277   PetscFunctionReturn(0);
2278 }
2279 
2280 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2281 {
2282   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2283   PetscErrorCode ierr;
2284 
2285   PetscFunctionBegin;
2286   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2287   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2292 {
2293   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2294   PetscErrorCode ierr;
2295 
2296   PetscFunctionBegin;
2297   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2298   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2303 {
2304   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2305   PetscErrorCode ierr;
2306   PetscInt       i,*idxb = 0;
2307   PetscScalar    *va,*vb;
2308   Vec            vtmp;
2309 
2310   PetscFunctionBegin;
2311   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2312   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2313   if (idx) {
2314     for (i=0; i<A->rmap->n; i++) {
2315       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2316     }
2317   }
2318 
2319   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2320   if (idx) {
2321     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2322   }
2323   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2324   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2325 
2326   for (i=0; i<A->rmap->n; i++) {
2327     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2328       va[i] = vb[i];
2329       if (idx) idx[i] = a->garray[idxb[i]];
2330     }
2331   }
2332 
2333   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2334   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2335   ierr = PetscFree(idxb);CHKERRQ(ierr);
2336   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2341 {
2342   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2343   PetscErrorCode ierr;
2344   PetscInt       i,*idxb = 0;
2345   PetscScalar    *va,*vb;
2346   Vec            vtmp;
2347 
2348   PetscFunctionBegin;
2349   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2350   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2351   if (idx) {
2352     for (i=0; i<A->cmap->n; i++) {
2353       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2354     }
2355   }
2356 
2357   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2358   if (idx) {
2359     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2360   }
2361   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2362   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2363 
2364   for (i=0; i<A->rmap->n; i++) {
2365     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2366       va[i] = vb[i];
2367       if (idx) idx[i] = a->garray[idxb[i]];
2368     }
2369   }
2370 
2371   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2372   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2373   ierr = PetscFree(idxb);CHKERRQ(ierr);
2374   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2375   PetscFunctionReturn(0);
2376 }
2377 
2378 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2379 {
2380   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2381   PetscInt       n      = A->rmap->n;
2382   PetscInt       cstart = A->cmap->rstart;
2383   PetscInt       *cmap  = mat->garray;
2384   PetscInt       *diagIdx, *offdiagIdx;
2385   Vec            diagV, offdiagV;
2386   PetscScalar    *a, *diagA, *offdiagA;
2387   PetscInt       r;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2392   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2393   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2394   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2395   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2396   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2397   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2398   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2399   for (r = 0; r < n; ++r) {
2400     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2401       a[r]   = diagA[r];
2402       idx[r] = cstart + diagIdx[r];
2403     } else {
2404       a[r]   = offdiagA[r];
2405       idx[r] = cmap[offdiagIdx[r]];
2406     }
2407   }
2408   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2409   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2410   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2411   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2412   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2413   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2414   PetscFunctionReturn(0);
2415 }
2416 
2417 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2418 {
2419   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2420   PetscInt       n      = A->rmap->n;
2421   PetscInt       cstart = A->cmap->rstart;
2422   PetscInt       *cmap  = mat->garray;
2423   PetscInt       *diagIdx, *offdiagIdx;
2424   Vec            diagV, offdiagV;
2425   PetscScalar    *a, *diagA, *offdiagA;
2426   PetscInt       r;
2427   PetscErrorCode ierr;
2428 
2429   PetscFunctionBegin;
2430   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2431   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2432   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2433   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2434   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2435   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2436   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2437   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2438   for (r = 0; r < n; ++r) {
2439     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2440       a[r]   = diagA[r];
2441       idx[r] = cstart + diagIdx[r];
2442     } else {
2443       a[r]   = offdiagA[r];
2444       idx[r] = cmap[offdiagIdx[r]];
2445     }
2446   }
2447   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2448   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2449   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2450   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2451   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2452   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2457 {
2458   PetscErrorCode ierr;
2459   Mat            *dummy;
2460 
2461   PetscFunctionBegin;
2462   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2463   *newmat = *dummy;
2464   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2465   PetscFunctionReturn(0);
2466 }
2467 
2468 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2469 {
2470   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2471   PetscErrorCode ierr;
2472 
2473   PetscFunctionBegin;
2474   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2475   A->factorerrortype = a->A->factorerrortype;
2476   PetscFunctionReturn(0);
2477 }
2478 
2479 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2480 {
2481   PetscErrorCode ierr;
2482   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2483 
2484   PetscFunctionBegin;
2485   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2486   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2487   if (x->assembled) {
2488     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2489   } else {
2490     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2491   }
2492   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2493   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2494   PetscFunctionReturn(0);
2495 }
2496 
2497 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2498 {
2499   PetscFunctionBegin;
2500   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2501   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2502   PetscFunctionReturn(0);
2503 }
2504 
2505 /*@
2506    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2507 
2508    Collective on Mat
2509 
2510    Input Parameters:
2511 +    A - the matrix
2512 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2513 
2514  Level: advanced
2515 
2516 @*/
2517 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2518 {
2519   PetscErrorCode       ierr;
2520 
2521   PetscFunctionBegin;
2522   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2527 {
2528   PetscErrorCode       ierr;
2529   PetscBool            sc = PETSC_FALSE,flg;
2530 
2531   PetscFunctionBegin;
2532   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2533   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2534   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2535   if (flg) {
2536     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2537   }
2538   ierr = PetscOptionsTail();CHKERRQ(ierr);
2539   PetscFunctionReturn(0);
2540 }
2541 
2542 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2543 {
2544   PetscErrorCode ierr;
2545   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2546   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2547 
2548   PetscFunctionBegin;
2549   if (!Y->preallocated) {
2550     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2551   } else if (!aij->nz) {
2552     PetscInt nonew = aij->nonew;
2553     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2554     aij->nonew = nonew;
2555   }
2556   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2561 {
2562   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2563   PetscErrorCode ierr;
2564 
2565   PetscFunctionBegin;
2566   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2567   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2568   if (d) {
2569     PetscInt rstart;
2570     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2571     *d += rstart;
2572 
2573   }
2574   PetscFunctionReturn(0);
2575 }
2576 
2577 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2578 {
2579   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2580   PetscErrorCode ierr;
2581 
2582   PetscFunctionBegin;
2583   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2584   PetscFunctionReturn(0);
2585 }
2586 
2587 /* -------------------------------------------------------------------*/
2588 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2589                                        MatGetRow_MPIAIJ,
2590                                        MatRestoreRow_MPIAIJ,
2591                                        MatMult_MPIAIJ,
2592                                 /* 4*/ MatMultAdd_MPIAIJ,
2593                                        MatMultTranspose_MPIAIJ,
2594                                        MatMultTransposeAdd_MPIAIJ,
2595                                        0,
2596                                        0,
2597                                        0,
2598                                 /*10*/ 0,
2599                                        0,
2600                                        0,
2601                                        MatSOR_MPIAIJ,
2602                                        MatTranspose_MPIAIJ,
2603                                 /*15*/ MatGetInfo_MPIAIJ,
2604                                        MatEqual_MPIAIJ,
2605                                        MatGetDiagonal_MPIAIJ,
2606                                        MatDiagonalScale_MPIAIJ,
2607                                        MatNorm_MPIAIJ,
2608                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2609                                        MatAssemblyEnd_MPIAIJ,
2610                                        MatSetOption_MPIAIJ,
2611                                        MatZeroEntries_MPIAIJ,
2612                                 /*24*/ MatZeroRows_MPIAIJ,
2613                                        0,
2614                                        0,
2615                                        0,
2616                                        0,
2617                                 /*29*/ MatSetUp_MPIAIJ,
2618                                        0,
2619                                        0,
2620                                        MatGetDiagonalBlock_MPIAIJ,
2621                                        0,
2622                                 /*34*/ MatDuplicate_MPIAIJ,
2623                                        0,
2624                                        0,
2625                                        0,
2626                                        0,
2627                                 /*39*/ MatAXPY_MPIAIJ,
2628                                        MatCreateSubMatrices_MPIAIJ,
2629                                        MatIncreaseOverlap_MPIAIJ,
2630                                        MatGetValues_MPIAIJ,
2631                                        MatCopy_MPIAIJ,
2632                                 /*44*/ MatGetRowMax_MPIAIJ,
2633                                        MatScale_MPIAIJ,
2634                                        MatShift_MPIAIJ,
2635                                        MatDiagonalSet_MPIAIJ,
2636                                        MatZeroRowsColumns_MPIAIJ,
2637                                 /*49*/ MatSetRandom_MPIAIJ,
2638                                        0,
2639                                        0,
2640                                        0,
2641                                        0,
2642                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2643                                        0,
2644                                        MatSetUnfactored_MPIAIJ,
2645                                        MatPermute_MPIAIJ,
2646                                        0,
2647                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2648                                        MatDestroy_MPIAIJ,
2649                                        MatView_MPIAIJ,
2650                                        0,
2651                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2652                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2653                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2654                                        0,
2655                                        0,
2656                                        0,
2657                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2658                                        MatGetRowMinAbs_MPIAIJ,
2659                                        0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                 /*75*/ MatFDColoringApply_AIJ,
2664                                        MatSetFromOptions_MPIAIJ,
2665                                        0,
2666                                        0,
2667                                        MatFindZeroDiagonals_MPIAIJ,
2668                                 /*80*/ 0,
2669                                        0,
2670                                        0,
2671                                 /*83*/ MatLoad_MPIAIJ,
2672                                        MatIsSymmetric_MPIAIJ,
2673                                        0,
2674                                        0,
2675                                        0,
2676                                        0,
2677                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2678                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2679                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2680                                        MatPtAP_MPIAIJ_MPIAIJ,
2681                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2682                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                 /*99*/ 0,
2688                                        0,
2689                                        0,
2690                                        MatConjugate_MPIAIJ,
2691                                        0,
2692                                 /*104*/MatSetValuesRow_MPIAIJ,
2693                                        MatRealPart_MPIAIJ,
2694                                        MatImaginaryPart_MPIAIJ,
2695                                        0,
2696                                        0,
2697                                 /*109*/0,
2698                                        0,
2699                                        MatGetRowMin_MPIAIJ,
2700                                        0,
2701                                        MatMissingDiagonal_MPIAIJ,
2702                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2703                                        0,
2704                                        MatGetGhosts_MPIAIJ,
2705                                        0,
2706                                        0,
2707                                 /*119*/0,
2708                                        0,
2709                                        0,
2710                                        0,
2711                                        MatGetMultiProcBlock_MPIAIJ,
2712                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2713                                        MatGetColumnNorms_MPIAIJ,
2714                                        MatInvertBlockDiagonal_MPIAIJ,
2715                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2716                                        MatCreateSubMatricesMPI_MPIAIJ,
2717                                 /*129*/0,
2718                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2719                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2720                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2721                                        0,
2722                                 /*134*/0,
2723                                        0,
2724                                        MatRARt_MPIAIJ_MPIAIJ,
2725                                        0,
2726                                        0,
2727                                 /*139*/MatSetBlockSizes_MPIAIJ,
2728                                        0,
2729                                        0,
2730                                        MatFDColoringSetUp_MPIXAIJ,
2731                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2732                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2733 };
2734 
2735 /* ----------------------------------------------------------------------------------------*/
2736 
2737 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2738 {
2739   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2740   PetscErrorCode ierr;
2741 
2742   PetscFunctionBegin;
2743   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2744   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2745   PetscFunctionReturn(0);
2746 }
2747 
2748 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2749 {
2750   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2751   PetscErrorCode ierr;
2752 
2753   PetscFunctionBegin;
2754   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2755   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2756   PetscFunctionReturn(0);
2757 }
2758 
2759 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2760 {
2761   Mat_MPIAIJ     *b;
2762   PetscErrorCode ierr;
2763   PetscMPIInt    size;
2764 
2765   PetscFunctionBegin;
2766   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2767   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2768   b = (Mat_MPIAIJ*)B->data;
2769 
2770 #if defined(PETSC_USE_CTABLE)
2771   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2772 #else
2773   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2774 #endif
2775   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2776   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2777   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2778 
2779   /* Because the B will have been resized we simply destroy it and create a new one each time */
2780   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2781   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2782   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2783   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2784   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2785   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2786   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2787 
2788   if (!B->preallocated) {
2789     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2790     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2791     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2792     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2793     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2794   }
2795 
2796   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2797   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2798   B->preallocated  = PETSC_TRUE;
2799   B->was_assembled = PETSC_FALSE;
2800   B->assembled     = PETSC_FALSE;
2801   PetscFunctionReturn(0);
2802 }
2803 
2804 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2805 {
2806   Mat_MPIAIJ     *b;
2807   PetscErrorCode ierr;
2808 
2809   PetscFunctionBegin;
2810   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2811   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2812   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2813   b = (Mat_MPIAIJ*)B->data;
2814 
2815 #if defined(PETSC_USE_CTABLE)
2816   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2817 #else
2818   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2819 #endif
2820   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2821   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2822   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2823 
2824   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2825   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2826   B->preallocated  = PETSC_TRUE;
2827   B->was_assembled = PETSC_FALSE;
2828   B->assembled = PETSC_FALSE;
2829   PetscFunctionReturn(0);
2830 }
2831 
2832 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2833 {
2834   Mat            mat;
2835   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2836   PetscErrorCode ierr;
2837 
2838   PetscFunctionBegin;
2839   *newmat = 0;
2840   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2841   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2842   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2843   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2844   a       = (Mat_MPIAIJ*)mat->data;
2845 
2846   mat->factortype   = matin->factortype;
2847   mat->assembled    = PETSC_TRUE;
2848   mat->insertmode   = NOT_SET_VALUES;
2849   mat->preallocated = PETSC_TRUE;
2850 
2851   a->size         = oldmat->size;
2852   a->rank         = oldmat->rank;
2853   a->donotstash   = oldmat->donotstash;
2854   a->roworiented  = oldmat->roworiented;
2855   a->rowindices   = 0;
2856   a->rowvalues    = 0;
2857   a->getrowactive = PETSC_FALSE;
2858 
2859   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2860   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2861 
2862   if (oldmat->colmap) {
2863 #if defined(PETSC_USE_CTABLE)
2864     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2865 #else
2866     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2867     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2868     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2869 #endif
2870   } else a->colmap = 0;
2871   if (oldmat->garray) {
2872     PetscInt len;
2873     len  = oldmat->B->cmap->n;
2874     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2875     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2876     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2877   } else a->garray = 0;
2878 
2879   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2880   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2881   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2882   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2883 
2884   if (oldmat->Mvctx_mpi1) {
2885     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2886     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2887   }
2888 
2889   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2890   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2891   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2892   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2893   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2894   *newmat = mat;
2895   PetscFunctionReturn(0);
2896 }
2897 
2898 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2899 {
2900   PetscBool      isbinary, ishdf5;
2901   PetscErrorCode ierr;
2902 
2903   PetscFunctionBegin;
2904   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2905   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2906   /* force binary viewer to load .info file if it has not yet done so */
2907   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2908   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2909   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2910   if (isbinary) {
2911     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2912   } else if (ishdf5) {
2913 #if defined(PETSC_HAVE_HDF5)
2914     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2915 #else
2916     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2917 #endif
2918   } else {
2919     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2920   }
2921   PetscFunctionReturn(0);
2922 }
2923 
2924 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2925 {
2926   PetscScalar    *vals,*svals;
2927   MPI_Comm       comm;
2928   PetscErrorCode ierr;
2929   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2930   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2931   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2932   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2933   PetscInt       cend,cstart,n,*rowners;
2934   int            fd;
2935   PetscInt       bs = newMat->rmap->bs;
2936 
2937   PetscFunctionBegin;
2938   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2939   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2940   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2941   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2942   if (!rank) {
2943     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2944     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2945     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2946   }
2947 
2948   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2949   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2950   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2951   if (bs < 0) bs = 1;
2952 
2953   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2954   M    = header[1]; N = header[2];
2955 
2956   /* If global sizes are set, check if they are consistent with that given in the file */
2957   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2958   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2959 
2960   /* determine ownership of all (block) rows */
2961   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2962   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2963   else m = newMat->rmap->n; /* Set by user */
2964 
2965   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2966   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2967 
2968   /* First process needs enough room for process with most rows */
2969   if (!rank) {
2970     mmax = rowners[1];
2971     for (i=2; i<=size; i++) {
2972       mmax = PetscMax(mmax, rowners[i]);
2973     }
2974   } else mmax = -1;             /* unused, but compilers complain */
2975 
2976   rowners[0] = 0;
2977   for (i=2; i<=size; i++) {
2978     rowners[i] += rowners[i-1];
2979   }
2980   rstart = rowners[rank];
2981   rend   = rowners[rank+1];
2982 
2983   /* distribute row lengths to all processors */
2984   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2985   if (!rank) {
2986     ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr);
2987     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2988     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2989     for (j=0; j<m; j++) {
2990       procsnz[0] += ourlens[j];
2991     }
2992     for (i=1; i<size; i++) {
2993       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr);
2994       /* calculate the number of nonzeros on each processor */
2995       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2996         procsnz[i] += rowlengths[j];
2997       }
2998       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2999     }
3000     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3001   } else {
3002     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3003   }
3004 
3005   if (!rank) {
3006     /* determine max buffer needed and allocate it */
3007     maxnz = 0;
3008     for (i=0; i<size; i++) {
3009       maxnz = PetscMax(maxnz,procsnz[i]);
3010     }
3011     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3012 
3013     /* read in my part of the matrix column indices  */
3014     nz   = procsnz[0];
3015     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3016     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3017 
3018     /* read in every one elses and ship off */
3019     for (i=1; i<size; i++) {
3020       nz   = procsnz[i];
3021       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3022       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3023     }
3024     ierr = PetscFree(cols);CHKERRQ(ierr);
3025   } else {
3026     /* determine buffer space needed for message */
3027     nz = 0;
3028     for (i=0; i<m; i++) {
3029       nz += ourlens[i];
3030     }
3031     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3032 
3033     /* receive message of column indices*/
3034     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3035   }
3036 
3037   /* determine column ownership if matrix is not square */
3038   if (N != M) {
3039     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3040     else n = newMat->cmap->n;
3041     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3042     cstart = cend - n;
3043   } else {
3044     cstart = rstart;
3045     cend   = rend;
3046     n      = cend - cstart;
3047   }
3048 
3049   /* loop over local rows, determining number of off diagonal entries */
3050   ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr);
3051   jj   = 0;
3052   for (i=0; i<m; i++) {
3053     for (j=0; j<ourlens[i]; j++) {
3054       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3055       jj++;
3056     }
3057   }
3058 
3059   for (i=0; i<m; i++) {
3060     ourlens[i] -= offlens[i];
3061   }
3062   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3063 
3064   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3065 
3066   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3067 
3068   for (i=0; i<m; i++) {
3069     ourlens[i] += offlens[i];
3070   }
3071 
3072   if (!rank) {
3073     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3074 
3075     /* read in my part of the matrix numerical values  */
3076     nz   = procsnz[0];
3077     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3078 
3079     /* insert into matrix */
3080     jj      = rstart;
3081     smycols = mycols;
3082     svals   = vals;
3083     for (i=0; i<m; i++) {
3084       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3085       smycols += ourlens[i];
3086       svals   += ourlens[i];
3087       jj++;
3088     }
3089 
3090     /* read in other processors and ship out */
3091     for (i=1; i<size; i++) {
3092       nz   = procsnz[i];
3093       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3094       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3095     }
3096     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3097   } else {
3098     /* receive numeric values */
3099     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3100 
3101     /* receive message of values*/
3102     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3103 
3104     /* insert into matrix */
3105     jj      = rstart;
3106     smycols = mycols;
3107     svals   = vals;
3108     for (i=0; i<m; i++) {
3109       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3110       smycols += ourlens[i];
3111       svals   += ourlens[i];
3112       jj++;
3113     }
3114   }
3115   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3116   ierr = PetscFree(vals);CHKERRQ(ierr);
3117   ierr = PetscFree(mycols);CHKERRQ(ierr);
3118   ierr = PetscFree(rowners);CHKERRQ(ierr);
3119   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3120   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3121   PetscFunctionReturn(0);
3122 }
3123 
3124 /* Not scalable because of ISAllGather() unless getting all columns. */
3125 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3126 {
3127   PetscErrorCode ierr;
3128   IS             iscol_local;
3129   PetscBool      isstride;
3130   PetscMPIInt    lisstride=0,gisstride;
3131 
3132   PetscFunctionBegin;
3133   /* check if we are grabbing all columns*/
3134   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3135 
3136   if (isstride) {
3137     PetscInt  start,len,mstart,mlen;
3138     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3139     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3140     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3141     if (mstart == start && mlen-mstart == len) lisstride = 1;
3142   }
3143 
3144   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3145   if (gisstride) {
3146     PetscInt N;
3147     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3148     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3149     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3150     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3151   } else {
3152     PetscInt cbs;
3153     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3154     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3155     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3156   }
3157 
3158   *isseq = iscol_local;
3159   PetscFunctionReturn(0);
3160 }
3161 
3162 /*
3163  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3164  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3165 
3166  Input Parameters:
3167    mat - matrix
3168    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3169            i.e., mat->rstart <= isrow[i] < mat->rend
3170    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3171            i.e., mat->cstart <= iscol[i] < mat->cend
3172  Output Parameter:
3173    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3174    iscol_o - sequential column index set for retrieving mat->B
3175    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3176  */
3177 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3178 {
3179   PetscErrorCode ierr;
3180   Vec            x,cmap;
3181   const PetscInt *is_idx;
3182   PetscScalar    *xarray,*cmaparray;
3183   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3184   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3185   Mat            B=a->B;
3186   Vec            lvec=a->lvec,lcmap;
3187   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3188   MPI_Comm       comm;
3189   VecScatter     Mvctx=a->Mvctx;
3190 
3191   PetscFunctionBegin;
3192   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3193   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3194 
3195   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3196   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3197   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3198   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3199   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3200 
3201   /* Get start indices */
3202   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3203   isstart -= ncols;
3204   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3205 
3206   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3207   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3208   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3209   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3210   for (i=0; i<ncols; i++) {
3211     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3212     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3213     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3214   }
3215   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3216   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3217   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3218 
3219   /* Get iscol_d */
3220   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3221   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3222   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3223 
3224   /* Get isrow_d */
3225   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3226   rstart = mat->rmap->rstart;
3227   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3228   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3229   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3230   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3231 
3232   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3233   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3234   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3235 
3236   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3237   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3238   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3239 
3240   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3241 
3242   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3243   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3244 
3245   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3246   /* off-process column indices */
3247   count = 0;
3248   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3249   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3250 
3251   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3252   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3253   for (i=0; i<Bn; i++) {
3254     if (PetscRealPart(xarray[i]) > -1.0) {
3255       idx[count]     = i;                   /* local column index in off-diagonal part B */
3256       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3257       count++;
3258     }
3259   }
3260   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3261   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3262 
3263   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3264   /* cannot ensure iscol_o has same blocksize as iscol! */
3265 
3266   ierr = PetscFree(idx);CHKERRQ(ierr);
3267   *garray = cmap1;
3268 
3269   ierr = VecDestroy(&x);CHKERRQ(ierr);
3270   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3271   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3272   PetscFunctionReturn(0);
3273 }
3274 
3275 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3276 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3277 {
3278   PetscErrorCode ierr;
3279   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3280   Mat            M = NULL;
3281   MPI_Comm       comm;
3282   IS             iscol_d,isrow_d,iscol_o;
3283   Mat            Asub = NULL,Bsub = NULL;
3284   PetscInt       n;
3285 
3286   PetscFunctionBegin;
3287   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3288 
3289   if (call == MAT_REUSE_MATRIX) {
3290     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3291     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3292     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3293 
3294     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3295     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3296 
3297     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3298     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3299 
3300     /* Update diagonal and off-diagonal portions of submat */
3301     asub = (Mat_MPIAIJ*)(*submat)->data;
3302     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3303     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3304     if (n) {
3305       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3306     }
3307     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3308     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3309 
3310   } else { /* call == MAT_INITIAL_MATRIX) */
3311     const PetscInt *garray;
3312     PetscInt        BsubN;
3313 
3314     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3315     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3316 
3317     /* Create local submatrices Asub and Bsub */
3318     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3319     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3320 
3321     /* Create submatrix M */
3322     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3323 
3324     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3325     asub = (Mat_MPIAIJ*)M->data;
3326 
3327     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3328     n = asub->B->cmap->N;
3329     if (BsubN > n) {
3330       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3331       const PetscInt *idx;
3332       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3333       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3334 
3335       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3336       j = 0;
3337       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3338       for (i=0; i<n; i++) {
3339         if (j >= BsubN) break;
3340         while (subgarray[i] > garray[j]) j++;
3341 
3342         if (subgarray[i] == garray[j]) {
3343           idx_new[i] = idx[j++];
3344         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3345       }
3346       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3347 
3348       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3349       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3350 
3351     } else if (BsubN < n) {
3352       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3353     }
3354 
3355     ierr = PetscFree(garray);CHKERRQ(ierr);
3356     *submat = M;
3357 
3358     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3359     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3360     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3361 
3362     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3363     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3364 
3365     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3366     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3367   }
3368   PetscFunctionReturn(0);
3369 }
3370 
3371 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3372 {
3373   PetscErrorCode ierr;
3374   IS             iscol_local=NULL,isrow_d;
3375   PetscInt       csize;
3376   PetscInt       n,i,j,start,end;
3377   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3378   MPI_Comm       comm;
3379 
3380   PetscFunctionBegin;
3381   /* If isrow has same processor distribution as mat,
3382      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3383   if (call == MAT_REUSE_MATRIX) {
3384     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3385     if (isrow_d) {
3386       sameRowDist  = PETSC_TRUE;
3387       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3388     } else {
3389       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3390       if (iscol_local) {
3391         sameRowDist  = PETSC_TRUE;
3392         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3393       }
3394     }
3395   } else {
3396     /* Check if isrow has same processor distribution as mat */
3397     sameDist[0] = PETSC_FALSE;
3398     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3399     if (!n) {
3400       sameDist[0] = PETSC_TRUE;
3401     } else {
3402       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3403       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3404       if (i >= start && j < end) {
3405         sameDist[0] = PETSC_TRUE;
3406       }
3407     }
3408 
3409     /* Check if iscol has same processor distribution as mat */
3410     sameDist[1] = PETSC_FALSE;
3411     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3412     if (!n) {
3413       sameDist[1] = PETSC_TRUE;
3414     } else {
3415       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3416       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3417       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3418     }
3419 
3420     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3421     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3422     sameRowDist = tsameDist[0];
3423   }
3424 
3425   if (sameRowDist) {
3426     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3427       /* isrow and iscol have same processor distribution as mat */
3428       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3429       PetscFunctionReturn(0);
3430     } else { /* sameRowDist */
3431       /* isrow has same processor distribution as mat */
3432       if (call == MAT_INITIAL_MATRIX) {
3433         PetscBool sorted;
3434         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3435         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3436         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3437         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3438 
3439         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3440         if (sorted) {
3441           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3442           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3443           PetscFunctionReturn(0);
3444         }
3445       } else { /* call == MAT_REUSE_MATRIX */
3446         IS    iscol_sub;
3447         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3448         if (iscol_sub) {
3449           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3450           PetscFunctionReturn(0);
3451         }
3452       }
3453     }
3454   }
3455 
3456   /* General case: iscol -> iscol_local which has global size of iscol */
3457   if (call == MAT_REUSE_MATRIX) {
3458     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3459     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3460   } else {
3461     if (!iscol_local) {
3462       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3463     }
3464   }
3465 
3466   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3467   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3468 
3469   if (call == MAT_INITIAL_MATRIX) {
3470     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3471     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3472   }
3473   PetscFunctionReturn(0);
3474 }
3475 
3476 /*@C
3477      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3478          and "off-diagonal" part of the matrix in CSR format.
3479 
3480    Collective
3481 
3482    Input Parameters:
3483 +  comm - MPI communicator
3484 .  A - "diagonal" portion of matrix
3485 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3486 -  garray - global index of B columns
3487 
3488    Output Parameter:
3489 .   mat - the matrix, with input A as its local diagonal matrix
3490    Level: advanced
3491 
3492    Notes:
3493        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3494        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3495 
3496 .seealso: MatCreateMPIAIJWithSplitArrays()
3497 @*/
3498 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3499 {
3500   PetscErrorCode ierr;
3501   Mat_MPIAIJ     *maij;
3502   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3503   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3504   PetscScalar    *oa=b->a;
3505   Mat            Bnew;
3506   PetscInt       m,n,N;
3507 
3508   PetscFunctionBegin;
3509   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3510   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3511   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3512   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3513   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3514   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3515 
3516   /* Get global columns of mat */
3517   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3518 
3519   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3520   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3521   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3522   maij = (Mat_MPIAIJ*)(*mat)->data;
3523 
3524   (*mat)->preallocated = PETSC_TRUE;
3525 
3526   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3527   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3528 
3529   /* Set A as diagonal portion of *mat */
3530   maij->A = A;
3531 
3532   nz = oi[m];
3533   for (i=0; i<nz; i++) {
3534     col   = oj[i];
3535     oj[i] = garray[col];
3536   }
3537 
3538    /* Set Bnew as off-diagonal portion of *mat */
3539   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3540   bnew        = (Mat_SeqAIJ*)Bnew->data;
3541   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3542   maij->B     = Bnew;
3543 
3544   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3545 
3546   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3547   b->free_a       = PETSC_FALSE;
3548   b->free_ij      = PETSC_FALSE;
3549   ierr = MatDestroy(&B);CHKERRQ(ierr);
3550 
3551   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3552   bnew->free_a       = PETSC_TRUE;
3553   bnew->free_ij      = PETSC_TRUE;
3554 
3555   /* condense columns of maij->B */
3556   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3557   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3558   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3559   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3560   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3561   PetscFunctionReturn(0);
3562 }
3563 
3564 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3565 
3566 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3567 {
3568   PetscErrorCode ierr;
3569   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3570   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3571   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3572   Mat            M,Msub,B=a->B;
3573   MatScalar      *aa;
3574   Mat_SeqAIJ     *aij;
3575   PetscInt       *garray = a->garray,*colsub,Ncols;
3576   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3577   IS             iscol_sub,iscmap;
3578   const PetscInt *is_idx,*cmap;
3579   PetscBool      allcolumns=PETSC_FALSE;
3580   MPI_Comm       comm;
3581 
3582   PetscFunctionBegin;
3583   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3584 
3585   if (call == MAT_REUSE_MATRIX) {
3586     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3587     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3588     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3589 
3590     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3591     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3592 
3593     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3594     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3595 
3596     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3597 
3598   } else { /* call == MAT_INITIAL_MATRIX) */
3599     PetscBool flg;
3600 
3601     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3602     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3603 
3604     /* (1) iscol -> nonscalable iscol_local */
3605     /* Check for special case: each processor gets entire matrix columns */
3606     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3607     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3608     if (allcolumns) {
3609       iscol_sub = iscol_local;
3610       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3611       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3612 
3613     } else {
3614       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3615       PetscInt *idx,*cmap1,k;
3616       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3617       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3618       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3619       count = 0;
3620       k     = 0;
3621       for (i=0; i<Ncols; i++) {
3622         j = is_idx[i];
3623         if (j >= cstart && j < cend) {
3624           /* diagonal part of mat */
3625           idx[count]     = j;
3626           cmap1[count++] = i; /* column index in submat */
3627         } else if (Bn) {
3628           /* off-diagonal part of mat */
3629           if (j == garray[k]) {
3630             idx[count]     = j;
3631             cmap1[count++] = i;  /* column index in submat */
3632           } else if (j > garray[k]) {
3633             while (j > garray[k] && k < Bn-1) k++;
3634             if (j == garray[k]) {
3635               idx[count]     = j;
3636               cmap1[count++] = i; /* column index in submat */
3637             }
3638           }
3639         }
3640       }
3641       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3642 
3643       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3644       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3645       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3646 
3647       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3648     }
3649 
3650     /* (3) Create sequential Msub */
3651     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3652   }
3653 
3654   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3655   aij  = (Mat_SeqAIJ*)(Msub)->data;
3656   ii   = aij->i;
3657   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3658 
3659   /*
3660       m - number of local rows
3661       Ncols - number of columns (same on all processors)
3662       rstart - first row in new global matrix generated
3663   */
3664   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3665 
3666   if (call == MAT_INITIAL_MATRIX) {
3667     /* (4) Create parallel newmat */
3668     PetscMPIInt    rank,size;
3669     PetscInt       csize;
3670 
3671     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3672     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3673 
3674     /*
3675         Determine the number of non-zeros in the diagonal and off-diagonal
3676         portions of the matrix in order to do correct preallocation
3677     */
3678 
3679     /* first get start and end of "diagonal" columns */
3680     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3681     if (csize == PETSC_DECIDE) {
3682       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3683       if (mglobal == Ncols) { /* square matrix */
3684         nlocal = m;
3685       } else {
3686         nlocal = Ncols/size + ((Ncols % size) > rank);
3687       }
3688     } else {
3689       nlocal = csize;
3690     }
3691     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3692     rstart = rend - nlocal;
3693     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3694 
3695     /* next, compute all the lengths */
3696     jj    = aij->j;
3697     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3698     olens = dlens + m;
3699     for (i=0; i<m; i++) {
3700       jend = ii[i+1] - ii[i];
3701       olen = 0;
3702       dlen = 0;
3703       for (j=0; j<jend; j++) {
3704         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3705         else dlen++;
3706         jj++;
3707       }
3708       olens[i] = olen;
3709       dlens[i] = dlen;
3710     }
3711 
3712     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3713     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3714 
3715     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3716     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3717     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3718     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3719     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3720     ierr = PetscFree(dlens);CHKERRQ(ierr);
3721 
3722   } else { /* call == MAT_REUSE_MATRIX */
3723     M    = *newmat;
3724     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3725     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3726     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3727     /*
3728          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3729        rather than the slower MatSetValues().
3730     */
3731     M->was_assembled = PETSC_TRUE;
3732     M->assembled     = PETSC_FALSE;
3733   }
3734 
3735   /* (5) Set values of Msub to *newmat */
3736   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3737   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3738 
3739   jj   = aij->j;
3740   aa   = aij->a;
3741   for (i=0; i<m; i++) {
3742     row = rstart + i;
3743     nz  = ii[i+1] - ii[i];
3744     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3745     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3746     jj += nz; aa += nz;
3747   }
3748   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3749 
3750   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3751   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3752 
3753   ierr = PetscFree(colsub);CHKERRQ(ierr);
3754 
3755   /* save Msub, iscol_sub and iscmap used in processor for next request */
3756   if (call ==  MAT_INITIAL_MATRIX) {
3757     *newmat = M;
3758     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3759     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3760 
3761     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3762     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3763 
3764     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3765     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3766 
3767     if (iscol_local) {
3768       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3769       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3770     }
3771   }
3772   PetscFunctionReturn(0);
3773 }
3774 
3775 /*
3776     Not great since it makes two copies of the submatrix, first an SeqAIJ
3777   in local and then by concatenating the local matrices the end result.
3778   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3779 
3780   Note: This requires a sequential iscol with all indices.
3781 */
3782 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3783 {
3784   PetscErrorCode ierr;
3785   PetscMPIInt    rank,size;
3786   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3787   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3788   Mat            M,Mreuse;
3789   MatScalar      *aa,*vwork;
3790   MPI_Comm       comm;
3791   Mat_SeqAIJ     *aij;
3792   PetscBool      colflag,allcolumns=PETSC_FALSE;
3793 
3794   PetscFunctionBegin;
3795   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3796   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3798 
3799   /* Check for special case: each processor gets entire matrix columns */
3800   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3801   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3802   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3803 
3804   if (call ==  MAT_REUSE_MATRIX) {
3805     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3806     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3807     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3808   } else {
3809     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3810   }
3811 
3812   /*
3813       m - number of local rows
3814       n - number of columns (same on all processors)
3815       rstart - first row in new global matrix generated
3816   */
3817   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3818   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3819   if (call == MAT_INITIAL_MATRIX) {
3820     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3821     ii  = aij->i;
3822     jj  = aij->j;
3823 
3824     /*
3825         Determine the number of non-zeros in the diagonal and off-diagonal
3826         portions of the matrix in order to do correct preallocation
3827     */
3828 
3829     /* first get start and end of "diagonal" columns */
3830     if (csize == PETSC_DECIDE) {
3831       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3832       if (mglobal == n) { /* square matrix */
3833         nlocal = m;
3834       } else {
3835         nlocal = n/size + ((n % size) > rank);
3836       }
3837     } else {
3838       nlocal = csize;
3839     }
3840     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3841     rstart = rend - nlocal;
3842     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3843 
3844     /* next, compute all the lengths */
3845     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3846     olens = dlens + m;
3847     for (i=0; i<m; i++) {
3848       jend = ii[i+1] - ii[i];
3849       olen = 0;
3850       dlen = 0;
3851       for (j=0; j<jend; j++) {
3852         if (*jj < rstart || *jj >= rend) olen++;
3853         else dlen++;
3854         jj++;
3855       }
3856       olens[i] = olen;
3857       dlens[i] = dlen;
3858     }
3859     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3860     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3861     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3862     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3863     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3864     ierr = PetscFree(dlens);CHKERRQ(ierr);
3865   } else {
3866     PetscInt ml,nl;
3867 
3868     M    = *newmat;
3869     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3870     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3871     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3872     /*
3873          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3874        rather than the slower MatSetValues().
3875     */
3876     M->was_assembled = PETSC_TRUE;
3877     M->assembled     = PETSC_FALSE;
3878   }
3879   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3880   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3881   ii   = aij->i;
3882   jj   = aij->j;
3883   aa   = aij->a;
3884   for (i=0; i<m; i++) {
3885     row   = rstart + i;
3886     nz    = ii[i+1] - ii[i];
3887     cwork = jj;     jj += nz;
3888     vwork = aa;     aa += nz;
3889     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3890   }
3891 
3892   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3893   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3894   *newmat = M;
3895 
3896   /* save submatrix used in processor for next request */
3897   if (call ==  MAT_INITIAL_MATRIX) {
3898     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3899     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3900   }
3901   PetscFunctionReturn(0);
3902 }
3903 
3904 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3905 {
3906   PetscInt       m,cstart, cend,j,nnz,i,d;
3907   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3908   const PetscInt *JJ;
3909   PetscScalar    *values;
3910   PetscErrorCode ierr;
3911   PetscBool      nooffprocentries;
3912 
3913   PetscFunctionBegin;
3914   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3915 
3916   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3917   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3918   m      = B->rmap->n;
3919   cstart = B->cmap->rstart;
3920   cend   = B->cmap->rend;
3921   rstart = B->rmap->rstart;
3922 
3923   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3924 
3925 #if defined(PETSC_USE_DEBUG)
3926   for (i=0; i<m; i++) {
3927     nnz = Ii[i+1]- Ii[i];
3928     JJ  = J + Ii[i];
3929     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3930     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3931     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3932   }
3933 #endif
3934 
3935   for (i=0; i<m; i++) {
3936     nnz     = Ii[i+1]- Ii[i];
3937     JJ      = J + Ii[i];
3938     nnz_max = PetscMax(nnz_max,nnz);
3939     d       = 0;
3940     for (j=0; j<nnz; j++) {
3941       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3942     }
3943     d_nnz[i] = d;
3944     o_nnz[i] = nnz - d;
3945   }
3946   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3947   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3948 
3949   if (v) values = (PetscScalar*)v;
3950   else {
3951     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3952   }
3953 
3954   for (i=0; i<m; i++) {
3955     ii   = i + rstart;
3956     nnz  = Ii[i+1]- Ii[i];
3957     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3958   }
3959   nooffprocentries    = B->nooffprocentries;
3960   B->nooffprocentries = PETSC_TRUE;
3961   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3962   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3963   B->nooffprocentries = nooffprocentries;
3964 
3965   if (!v) {
3966     ierr = PetscFree(values);CHKERRQ(ierr);
3967   }
3968   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3969   PetscFunctionReturn(0);
3970 }
3971 
3972 /*@
3973    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3974    (the default parallel PETSc format).
3975 
3976    Collective
3977 
3978    Input Parameters:
3979 +  B - the matrix
3980 .  i - the indices into j for the start of each local row (starts with zero)
3981 .  j - the column indices for each local row (starts with zero)
3982 -  v - optional values in the matrix
3983 
3984    Level: developer
3985 
3986    Notes:
3987        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3988      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3989      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3990 
3991        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3992 
3993        The format which is used for the sparse matrix input, is equivalent to a
3994     row-major ordering.. i.e for the following matrix, the input data expected is
3995     as shown
3996 
3997 $        1 0 0
3998 $        2 0 3     P0
3999 $       -------
4000 $        4 5 6     P1
4001 $
4002 $     Process0 [P0]: rows_owned=[0,1]
4003 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4004 $        j =  {0,0,2}  [size = 3]
4005 $        v =  {1,2,3}  [size = 3]
4006 $
4007 $     Process1 [P1]: rows_owned=[2]
4008 $        i =  {0,3}    [size = nrow+1  = 1+1]
4009 $        j =  {0,1,2}  [size = 3]
4010 $        v =  {4,5,6}  [size = 3]
4011 
4012 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4013           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4014 @*/
4015 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4016 {
4017   PetscErrorCode ierr;
4018 
4019   PetscFunctionBegin;
4020   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4021   PetscFunctionReturn(0);
4022 }
4023 
4024 /*@C
4025    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4026    (the default parallel PETSc format).  For good matrix assembly performance
4027    the user should preallocate the matrix storage by setting the parameters
4028    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4029    performance can be increased by more than a factor of 50.
4030 
4031    Collective
4032 
4033    Input Parameters:
4034 +  B - the matrix
4035 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4036            (same value is used for all local rows)
4037 .  d_nnz - array containing the number of nonzeros in the various rows of the
4038            DIAGONAL portion of the local submatrix (possibly different for each row)
4039            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4040            The size of this array is equal to the number of local rows, i.e 'm'.
4041            For matrices that will be factored, you must leave room for (and set)
4042            the diagonal entry even if it is zero.
4043 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4044            submatrix (same value is used for all local rows).
4045 -  o_nnz - array containing the number of nonzeros in the various rows of the
4046            OFF-DIAGONAL portion of the local submatrix (possibly different for
4047            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4048            structure. The size of this array is equal to the number
4049            of local rows, i.e 'm'.
4050 
4051    If the *_nnz parameter is given then the *_nz parameter is ignored
4052 
4053    The AIJ format (also called the Yale sparse matrix format or
4054    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4055    storage.  The stored row and column indices begin with zero.
4056    See Users-Manual: ch_mat for details.
4057 
4058    The parallel matrix is partitioned such that the first m0 rows belong to
4059    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4060    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4061 
4062    The DIAGONAL portion of the local submatrix of a processor can be defined
4063    as the submatrix which is obtained by extraction the part corresponding to
4064    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4065    first row that belongs to the processor, r2 is the last row belonging to
4066    the this processor, and c1-c2 is range of indices of the local part of a
4067    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4068    common case of a square matrix, the row and column ranges are the same and
4069    the DIAGONAL part is also square. The remaining portion of the local
4070    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4071 
4072    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4073 
4074    You can call MatGetInfo() to get information on how effective the preallocation was;
4075    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4076    You can also run with the option -info and look for messages with the string
4077    malloc in them to see if additional memory allocation was needed.
4078 
4079    Example usage:
4080 
4081    Consider the following 8x8 matrix with 34 non-zero values, that is
4082    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4083    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4084    as follows:
4085 
4086 .vb
4087             1  2  0  |  0  3  0  |  0  4
4088     Proc0   0  5  6  |  7  0  0  |  8  0
4089             9  0 10  | 11  0  0  | 12  0
4090     -------------------------------------
4091            13  0 14  | 15 16 17  |  0  0
4092     Proc1   0 18  0  | 19 20 21  |  0  0
4093             0  0  0  | 22 23  0  | 24  0
4094     -------------------------------------
4095     Proc2  25 26 27  |  0  0 28  | 29  0
4096            30  0  0  | 31 32 33  |  0 34
4097 .ve
4098 
4099    This can be represented as a collection of submatrices as:
4100 
4101 .vb
4102       A B C
4103       D E F
4104       G H I
4105 .ve
4106 
4107    Where the submatrices A,B,C are owned by proc0, D,E,F are
4108    owned by proc1, G,H,I are owned by proc2.
4109 
4110    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4111    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4112    The 'M','N' parameters are 8,8, and have the same values on all procs.
4113 
4114    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4115    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4116    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4117    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4118    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4119    matrix, ans [DF] as another SeqAIJ matrix.
4120 
4121    When d_nz, o_nz parameters are specified, d_nz storage elements are
4122    allocated for every row of the local diagonal submatrix, and o_nz
4123    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4124    One way to choose d_nz and o_nz is to use the max nonzerors per local
4125    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4126    In this case, the values of d_nz,o_nz are:
4127 .vb
4128      proc0 : dnz = 2, o_nz = 2
4129      proc1 : dnz = 3, o_nz = 2
4130      proc2 : dnz = 1, o_nz = 4
4131 .ve
4132    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4133    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4134    for proc3. i.e we are using 12+15+10=37 storage locations to store
4135    34 values.
4136 
4137    When d_nnz, o_nnz parameters are specified, the storage is specified
4138    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4139    In the above case the values for d_nnz,o_nnz are:
4140 .vb
4141      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4142      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4143      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4144 .ve
4145    Here the space allocated is sum of all the above values i.e 34, and
4146    hence pre-allocation is perfect.
4147 
4148    Level: intermediate
4149 
4150 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4151           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4152 @*/
4153 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4154 {
4155   PetscErrorCode ierr;
4156 
4157   PetscFunctionBegin;
4158   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4159   PetscValidType(B,1);
4160   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4161   PetscFunctionReturn(0);
4162 }
4163 
4164 /*@
4165      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4166          CSR format for the local rows.
4167 
4168    Collective
4169 
4170    Input Parameters:
4171 +  comm - MPI communicator
4172 .  m - number of local rows (Cannot be PETSC_DECIDE)
4173 .  n - This value should be the same as the local size used in creating the
4174        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4175        calculated if N is given) For square matrices n is almost always m.
4176 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4177 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4178 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4179 .   j - column indices
4180 -   a - matrix values
4181 
4182    Output Parameter:
4183 .   mat - the matrix
4184 
4185    Level: intermediate
4186 
4187    Notes:
4188        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4189      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4190      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4191 
4192        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4193 
4194        The format which is used for the sparse matrix input, is equivalent to a
4195     row-major ordering.. i.e for the following matrix, the input data expected is
4196     as shown
4197 
4198        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4199 
4200 $        1 0 0
4201 $        2 0 3     P0
4202 $       -------
4203 $        4 5 6     P1
4204 $
4205 $     Process0 [P0]: rows_owned=[0,1]
4206 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4207 $        j =  {0,0,2}  [size = 3]
4208 $        v =  {1,2,3}  [size = 3]
4209 $
4210 $     Process1 [P1]: rows_owned=[2]
4211 $        i =  {0,3}    [size = nrow+1  = 1+1]
4212 $        j =  {0,1,2}  [size = 3]
4213 $        v =  {4,5,6}  [size = 3]
4214 
4215 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4216           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4217 @*/
4218 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4219 {
4220   PetscErrorCode ierr;
4221 
4222   PetscFunctionBegin;
4223   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4224   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4225   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4226   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4227   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4228   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4229   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4230   PetscFunctionReturn(0);
4231 }
4232 
4233 /*@
4234      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4235          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4236 
4237    Collective
4238 
4239    Input Parameters:
4240 +  mat - the matrix
4241 .  m - number of local rows (Cannot be PETSC_DECIDE)
4242 .  n - This value should be the same as the local size used in creating the
4243        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4244        calculated if N is given) For square matrices n is almost always m.
4245 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4246 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4247 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4248 .  J - column indices
4249 -  v - matrix values
4250 
4251    Level: intermediate
4252 
4253 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4254           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4255 @*/
4256 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4257 {
4258   PetscErrorCode ierr;
4259   PetscInt       cstart, cend,nnz,i,j;
4260   PetscInt       *ld;
4261   PetscBool      nooffprocentries;
4262   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4263   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4264   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4265   const PetscInt *Adi = Ad->i;
4266   PetscInt       ldi,Iii,md;
4267 
4268   PetscFunctionBegin;
4269   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4270   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4271   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4272   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4273 
4274   cstart = mat->cmap->rstart;
4275   cend   = mat->cmap->rend;
4276   if (!Aij->ld) {
4277     /* count number of entries below block diagonal */
4278     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4279     Aij->ld = ld;
4280     for (i=0; i<m; i++) {
4281       nnz  = Ii[i+1]- Ii[i];
4282       j     = 0;
4283       while  (J[j] < cstart && j < nnz) {j++;}
4284       J    += nnz;
4285       ld[i] = j;
4286     }
4287   } else {
4288     ld = Aij->ld;
4289   }
4290 
4291   for (i=0; i<m; i++) {
4292     nnz  = Ii[i+1]- Ii[i];
4293     Iii  = Ii[i];
4294     ldi  = ld[i];
4295     md   = Adi[i+1]-Adi[i];
4296     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4297     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4298     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4299     ad  += md;
4300     ao  += nnz - md;
4301   }
4302   nooffprocentries      = mat->nooffprocentries;
4303   mat->nooffprocentries = PETSC_TRUE;
4304   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4305   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4306   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4307   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4308   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4309   mat->nooffprocentries = nooffprocentries;
4310   PetscFunctionReturn(0);
4311 }
4312 
4313 /*@C
4314    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4315    (the default parallel PETSc format).  For good matrix assembly performance
4316    the user should preallocate the matrix storage by setting the parameters
4317    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4318    performance can be increased by more than a factor of 50.
4319 
4320    Collective
4321 
4322    Input Parameters:
4323 +  comm - MPI communicator
4324 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4325            This value should be the same as the local size used in creating the
4326            y vector for the matrix-vector product y = Ax.
4327 .  n - This value should be the same as the local size used in creating the
4328        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4329        calculated if N is given) For square matrices n is almost always m.
4330 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4331 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4332 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4333            (same value is used for all local rows)
4334 .  d_nnz - array containing the number of nonzeros in the various rows of the
4335            DIAGONAL portion of the local submatrix (possibly different for each row)
4336            or NULL, if d_nz is used to specify the nonzero structure.
4337            The size of this array is equal to the number of local rows, i.e 'm'.
4338 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4339            submatrix (same value is used for all local rows).
4340 -  o_nnz - array containing the number of nonzeros in the various rows of the
4341            OFF-DIAGONAL portion of the local submatrix (possibly different for
4342            each row) or NULL, if o_nz is used to specify the nonzero
4343            structure. The size of this array is equal to the number
4344            of local rows, i.e 'm'.
4345 
4346    Output Parameter:
4347 .  A - the matrix
4348 
4349    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4350    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4351    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4352 
4353    Notes:
4354    If the *_nnz parameter is given then the *_nz parameter is ignored
4355 
4356    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4357    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4358    storage requirements for this matrix.
4359 
4360    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4361    processor than it must be used on all processors that share the object for
4362    that argument.
4363 
4364    The user MUST specify either the local or global matrix dimensions
4365    (possibly both).
4366 
4367    The parallel matrix is partitioned across processors such that the
4368    first m0 rows belong to process 0, the next m1 rows belong to
4369    process 1, the next m2 rows belong to process 2 etc.. where
4370    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4371    values corresponding to [m x N] submatrix.
4372 
4373    The columns are logically partitioned with the n0 columns belonging
4374    to 0th partition, the next n1 columns belonging to the next
4375    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4376 
4377    The DIAGONAL portion of the local submatrix on any given processor
4378    is the submatrix corresponding to the rows and columns m,n
4379    corresponding to the given processor. i.e diagonal matrix on
4380    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4381    etc. The remaining portion of the local submatrix [m x (N-n)]
4382    constitute the OFF-DIAGONAL portion. The example below better
4383    illustrates this concept.
4384 
4385    For a square global matrix we define each processor's diagonal portion
4386    to be its local rows and the corresponding columns (a square submatrix);
4387    each processor's off-diagonal portion encompasses the remainder of the
4388    local matrix (a rectangular submatrix).
4389 
4390    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4391 
4392    When calling this routine with a single process communicator, a matrix of
4393    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4394    type of communicator, use the construction mechanism
4395 .vb
4396      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4397 .ve
4398 
4399 $     MatCreate(...,&A);
4400 $     MatSetType(A,MATMPIAIJ);
4401 $     MatSetSizes(A, m,n,M,N);
4402 $     MatMPIAIJSetPreallocation(A,...);
4403 
4404    By default, this format uses inodes (identical nodes) when possible.
4405    We search for consecutive rows with the same nonzero structure, thereby
4406    reusing matrix information to achieve increased efficiency.
4407 
4408    Options Database Keys:
4409 +  -mat_no_inode  - Do not use inodes
4410 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4411 
4412 
4413 
4414    Example usage:
4415 
4416    Consider the following 8x8 matrix with 34 non-zero values, that is
4417    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4418    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4419    as follows
4420 
4421 .vb
4422             1  2  0  |  0  3  0  |  0  4
4423     Proc0   0  5  6  |  7  0  0  |  8  0
4424             9  0 10  | 11  0  0  | 12  0
4425     -------------------------------------
4426            13  0 14  | 15 16 17  |  0  0
4427     Proc1   0 18  0  | 19 20 21  |  0  0
4428             0  0  0  | 22 23  0  | 24  0
4429     -------------------------------------
4430     Proc2  25 26 27  |  0  0 28  | 29  0
4431            30  0  0  | 31 32 33  |  0 34
4432 .ve
4433 
4434    This can be represented as a collection of submatrices as
4435 
4436 .vb
4437       A B C
4438       D E F
4439       G H I
4440 .ve
4441 
4442    Where the submatrices A,B,C are owned by proc0, D,E,F are
4443    owned by proc1, G,H,I are owned by proc2.
4444 
4445    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4446    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4447    The 'M','N' parameters are 8,8, and have the same values on all procs.
4448 
4449    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4450    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4451    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4452    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4453    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4454    matrix, ans [DF] as another SeqAIJ matrix.
4455 
4456    When d_nz, o_nz parameters are specified, d_nz storage elements are
4457    allocated for every row of the local diagonal submatrix, and o_nz
4458    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4459    One way to choose d_nz and o_nz is to use the max nonzerors per local
4460    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4461    In this case, the values of d_nz,o_nz are
4462 .vb
4463      proc0 : dnz = 2, o_nz = 2
4464      proc1 : dnz = 3, o_nz = 2
4465      proc2 : dnz = 1, o_nz = 4
4466 .ve
4467    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4468    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4469    for proc3. i.e we are using 12+15+10=37 storage locations to store
4470    34 values.
4471 
4472    When d_nnz, o_nnz parameters are specified, the storage is specified
4473    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4474    In the above case the values for d_nnz,o_nnz are
4475 .vb
4476      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4477      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4478      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4479 .ve
4480    Here the space allocated is sum of all the above values i.e 34, and
4481    hence pre-allocation is perfect.
4482 
4483    Level: intermediate
4484 
4485 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4486           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4487 @*/
4488 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4489 {
4490   PetscErrorCode ierr;
4491   PetscMPIInt    size;
4492 
4493   PetscFunctionBegin;
4494   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4495   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4496   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4497   if (size > 1) {
4498     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4499     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4500   } else {
4501     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4502     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4503   }
4504   PetscFunctionReturn(0);
4505 }
4506 
4507 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4508 {
4509   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4510   PetscBool      flg;
4511   PetscErrorCode ierr;
4512 
4513   PetscFunctionBegin;
4514   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4515   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4516   if (Ad)     *Ad     = a->A;
4517   if (Ao)     *Ao     = a->B;
4518   if (colmap) *colmap = a->garray;
4519   PetscFunctionReturn(0);
4520 }
4521 
4522 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4523 {
4524   PetscErrorCode ierr;
4525   PetscInt       m,N,i,rstart,nnz,Ii;
4526   PetscInt       *indx;
4527   PetscScalar    *values;
4528 
4529   PetscFunctionBegin;
4530   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4531   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4532     PetscInt       *dnz,*onz,sum,bs,cbs;
4533 
4534     if (n == PETSC_DECIDE) {
4535       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4536     }
4537     /* Check sum(n) = N */
4538     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4539     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4540 
4541     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4542     rstart -= m;
4543 
4544     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4545     for (i=0; i<m; i++) {
4546       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4547       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4548       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4549     }
4550 
4551     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4552     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4553     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4554     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4555     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4556     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4557     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4558     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4559   }
4560 
4561   /* numeric phase */
4562   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4563   for (i=0; i<m; i++) {
4564     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4565     Ii   = i + rstart;
4566     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4567     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4568   }
4569   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4570   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4571   PetscFunctionReturn(0);
4572 }
4573 
4574 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4575 {
4576   PetscErrorCode    ierr;
4577   PetscMPIInt       rank;
4578   PetscInt          m,N,i,rstart,nnz;
4579   size_t            len;
4580   const PetscInt    *indx;
4581   PetscViewer       out;
4582   char              *name;
4583   Mat               B;
4584   const PetscScalar *values;
4585 
4586   PetscFunctionBegin;
4587   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4588   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4589   /* Should this be the type of the diagonal block of A? */
4590   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4591   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4592   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4593   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4594   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4595   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4596   for (i=0; i<m; i++) {
4597     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4598     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4599     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4600   }
4601   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4602   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4603 
4604   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4605   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4606   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4607   sprintf(name,"%s.%d",outfile,rank);
4608   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4609   ierr = PetscFree(name);CHKERRQ(ierr);
4610   ierr = MatView(B,out);CHKERRQ(ierr);
4611   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4612   ierr = MatDestroy(&B);CHKERRQ(ierr);
4613   PetscFunctionReturn(0);
4614 }
4615 
4616 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4617 {
4618   PetscErrorCode      ierr;
4619   Mat_Merge_SeqsToMPI *merge;
4620   PetscContainer      container;
4621 
4622   PetscFunctionBegin;
4623   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4624   if (container) {
4625     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4626     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4627     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4628     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4629     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4630     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4631     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4632     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4633     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4634     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4635     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4636     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4637     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4638     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4639     ierr = PetscFree(merge);CHKERRQ(ierr);
4640     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4641   }
4642   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4643   PetscFunctionReturn(0);
4644 }
4645 
4646 #include <../src/mat/utils/freespace.h>
4647 #include <petscbt.h>
4648 
4649 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4650 {
4651   PetscErrorCode      ierr;
4652   MPI_Comm            comm;
4653   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4654   PetscMPIInt         size,rank,taga,*len_s;
4655   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4656   PetscInt            proc,m;
4657   PetscInt            **buf_ri,**buf_rj;
4658   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4659   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4660   MPI_Request         *s_waits,*r_waits;
4661   MPI_Status          *status;
4662   MatScalar           *aa=a->a;
4663   MatScalar           **abuf_r,*ba_i;
4664   Mat_Merge_SeqsToMPI *merge;
4665   PetscContainer      container;
4666 
4667   PetscFunctionBegin;
4668   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4669   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4670 
4671   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4672   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4673 
4674   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4675   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4676 
4677   bi     = merge->bi;
4678   bj     = merge->bj;
4679   buf_ri = merge->buf_ri;
4680   buf_rj = merge->buf_rj;
4681 
4682   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4683   owners = merge->rowmap->range;
4684   len_s  = merge->len_s;
4685 
4686   /* send and recv matrix values */
4687   /*-----------------------------*/
4688   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4689   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4690 
4691   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4692   for (proc=0,k=0; proc<size; proc++) {
4693     if (!len_s[proc]) continue;
4694     i    = owners[proc];
4695     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4696     k++;
4697   }
4698 
4699   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4700   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4701   ierr = PetscFree(status);CHKERRQ(ierr);
4702 
4703   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4704   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4705 
4706   /* insert mat values of mpimat */
4707   /*----------------------------*/
4708   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4709   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4710 
4711   for (k=0; k<merge->nrecv; k++) {
4712     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4713     nrows       = *(buf_ri_k[k]);
4714     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4715     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4716   }
4717 
4718   /* set values of ba */
4719   m = merge->rowmap->n;
4720   for (i=0; i<m; i++) {
4721     arow = owners[rank] + i;
4722     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4723     bnzi = bi[i+1] - bi[i];
4724     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4725 
4726     /* add local non-zero vals of this proc's seqmat into ba */
4727     anzi   = ai[arow+1] - ai[arow];
4728     aj     = a->j + ai[arow];
4729     aa     = a->a + ai[arow];
4730     nextaj = 0;
4731     for (j=0; nextaj<anzi; j++) {
4732       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4733         ba_i[j] += aa[nextaj++];
4734       }
4735     }
4736 
4737     /* add received vals into ba */
4738     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4739       /* i-th row */
4740       if (i == *nextrow[k]) {
4741         anzi   = *(nextai[k]+1) - *nextai[k];
4742         aj     = buf_rj[k] + *(nextai[k]);
4743         aa     = abuf_r[k] + *(nextai[k]);
4744         nextaj = 0;
4745         for (j=0; nextaj<anzi; j++) {
4746           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4747             ba_i[j] += aa[nextaj++];
4748           }
4749         }
4750         nextrow[k]++; nextai[k]++;
4751       }
4752     }
4753     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4754   }
4755   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4756   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4757 
4758   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4759   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4760   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4761   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4762   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4763   PetscFunctionReturn(0);
4764 }
4765 
4766 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4767 {
4768   PetscErrorCode      ierr;
4769   Mat                 B_mpi;
4770   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4771   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4772   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4773   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4774   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4775   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4776   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4777   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4778   MPI_Status          *status;
4779   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4780   PetscBT             lnkbt;
4781   Mat_Merge_SeqsToMPI *merge;
4782   PetscContainer      container;
4783 
4784   PetscFunctionBegin;
4785   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4786 
4787   /* make sure it is a PETSc comm */
4788   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4789   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4790   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4791 
4792   ierr = PetscNew(&merge);CHKERRQ(ierr);
4793   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4794 
4795   /* determine row ownership */
4796   /*---------------------------------------------------------*/
4797   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4798   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4799   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4800   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4801   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4802   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4803   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4804 
4805   m      = merge->rowmap->n;
4806   owners = merge->rowmap->range;
4807 
4808   /* determine the number of messages to send, their lengths */
4809   /*---------------------------------------------------------*/
4810   len_s = merge->len_s;
4811 
4812   len          = 0; /* length of buf_si[] */
4813   merge->nsend = 0;
4814   for (proc=0; proc<size; proc++) {
4815     len_si[proc] = 0;
4816     if (proc == rank) {
4817       len_s[proc] = 0;
4818     } else {
4819       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4820       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4821     }
4822     if (len_s[proc]) {
4823       merge->nsend++;
4824       nrows = 0;
4825       for (i=owners[proc]; i<owners[proc+1]; i++) {
4826         if (ai[i+1] > ai[i]) nrows++;
4827       }
4828       len_si[proc] = 2*(nrows+1);
4829       len         += len_si[proc];
4830     }
4831   }
4832 
4833   /* determine the number and length of messages to receive for ij-structure */
4834   /*-------------------------------------------------------------------------*/
4835   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4836   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4837 
4838   /* post the Irecv of j-structure */
4839   /*-------------------------------*/
4840   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4841   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4842 
4843   /* post the Isend of j-structure */
4844   /*--------------------------------*/
4845   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4846 
4847   for (proc=0, k=0; proc<size; proc++) {
4848     if (!len_s[proc]) continue;
4849     i    = owners[proc];
4850     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4851     k++;
4852   }
4853 
4854   /* receives and sends of j-structure are complete */
4855   /*------------------------------------------------*/
4856   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4857   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4858 
4859   /* send and recv i-structure */
4860   /*---------------------------*/
4861   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4862   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4863 
4864   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4865   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4866   for (proc=0,k=0; proc<size; proc++) {
4867     if (!len_s[proc]) continue;
4868     /* form outgoing message for i-structure:
4869          buf_si[0]:                 nrows to be sent
4870                [1:nrows]:           row index (global)
4871                [nrows+1:2*nrows+1]: i-structure index
4872     */
4873     /*-------------------------------------------*/
4874     nrows       = len_si[proc]/2 - 1;
4875     buf_si_i    = buf_si + nrows+1;
4876     buf_si[0]   = nrows;
4877     buf_si_i[0] = 0;
4878     nrows       = 0;
4879     for (i=owners[proc]; i<owners[proc+1]; i++) {
4880       anzi = ai[i+1] - ai[i];
4881       if (anzi) {
4882         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4883         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4884         nrows++;
4885       }
4886     }
4887     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4888     k++;
4889     buf_si += len_si[proc];
4890   }
4891 
4892   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4893   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4894 
4895   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4896   for (i=0; i<merge->nrecv; i++) {
4897     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4898   }
4899 
4900   ierr = PetscFree(len_si);CHKERRQ(ierr);
4901   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4902   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4903   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4904   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4905   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4906   ierr = PetscFree(status);CHKERRQ(ierr);
4907 
4908   /* compute a local seq matrix in each processor */
4909   /*----------------------------------------------*/
4910   /* allocate bi array and free space for accumulating nonzero column info */
4911   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4912   bi[0] = 0;
4913 
4914   /* create and initialize a linked list */
4915   nlnk = N+1;
4916   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4917 
4918   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4919   len  = ai[owners[rank+1]] - ai[owners[rank]];
4920   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4921 
4922   current_space = free_space;
4923 
4924   /* determine symbolic info for each local row */
4925   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4926 
4927   for (k=0; k<merge->nrecv; k++) {
4928     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4929     nrows       = *buf_ri_k[k];
4930     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4931     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4932   }
4933 
4934   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4935   len  = 0;
4936   for (i=0; i<m; i++) {
4937     bnzi = 0;
4938     /* add local non-zero cols of this proc's seqmat into lnk */
4939     arow  = owners[rank] + i;
4940     anzi  = ai[arow+1] - ai[arow];
4941     aj    = a->j + ai[arow];
4942     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4943     bnzi += nlnk;
4944     /* add received col data into lnk */
4945     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4946       if (i == *nextrow[k]) { /* i-th row */
4947         anzi  = *(nextai[k]+1) - *nextai[k];
4948         aj    = buf_rj[k] + *nextai[k];
4949         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4950         bnzi += nlnk;
4951         nextrow[k]++; nextai[k]++;
4952       }
4953     }
4954     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4955 
4956     /* if free space is not available, make more free space */
4957     if (current_space->local_remaining<bnzi) {
4958       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4959       nspacedouble++;
4960     }
4961     /* copy data into free space, then initialize lnk */
4962     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4963     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4964 
4965     current_space->array           += bnzi;
4966     current_space->local_used      += bnzi;
4967     current_space->local_remaining -= bnzi;
4968 
4969     bi[i+1] = bi[i] + bnzi;
4970   }
4971 
4972   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4973 
4974   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4975   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4976   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4977 
4978   /* create symbolic parallel matrix B_mpi */
4979   /*---------------------------------------*/
4980   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4981   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4982   if (n==PETSC_DECIDE) {
4983     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4984   } else {
4985     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4986   }
4987   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4988   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4989   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4990   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4991   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4992 
4993   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4994   B_mpi->assembled    = PETSC_FALSE;
4995   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4996   merge->bi           = bi;
4997   merge->bj           = bj;
4998   merge->buf_ri       = buf_ri;
4999   merge->buf_rj       = buf_rj;
5000   merge->coi          = NULL;
5001   merge->coj          = NULL;
5002   merge->owners_co    = NULL;
5003 
5004   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5005 
5006   /* attach the supporting struct to B_mpi for reuse */
5007   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5008   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5009   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5010   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5011   *mpimat = B_mpi;
5012 
5013   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5014   PetscFunctionReturn(0);
5015 }
5016 
5017 /*@C
5018       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5019                  matrices from each processor
5020 
5021     Collective
5022 
5023    Input Parameters:
5024 +    comm - the communicators the parallel matrix will live on
5025 .    seqmat - the input sequential matrices
5026 .    m - number of local rows (or PETSC_DECIDE)
5027 .    n - number of local columns (or PETSC_DECIDE)
5028 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5029 
5030    Output Parameter:
5031 .    mpimat - the parallel matrix generated
5032 
5033     Level: advanced
5034 
5035    Notes:
5036      The dimensions of the sequential matrix in each processor MUST be the same.
5037      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5038      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5039 @*/
5040 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5041 {
5042   PetscErrorCode ierr;
5043   PetscMPIInt    size;
5044 
5045   PetscFunctionBegin;
5046   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5047   if (size == 1) {
5048     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5049     if (scall == MAT_INITIAL_MATRIX) {
5050       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5051     } else {
5052       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5053     }
5054     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5055     PetscFunctionReturn(0);
5056   }
5057   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5058   if (scall == MAT_INITIAL_MATRIX) {
5059     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5060   }
5061   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5062   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5063   PetscFunctionReturn(0);
5064 }
5065 
5066 /*@
5067      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5068           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5069           with MatGetSize()
5070 
5071     Not Collective
5072 
5073    Input Parameters:
5074 +    A - the matrix
5075 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5076 
5077    Output Parameter:
5078 .    A_loc - the local sequential matrix generated
5079 
5080     Level: developer
5081 
5082 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5083 
5084 @*/
5085 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5086 {
5087   PetscErrorCode ierr;
5088   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5089   Mat_SeqAIJ     *mat,*a,*b;
5090   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5091   MatScalar      *aa,*ba,*cam;
5092   PetscScalar    *ca;
5093   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5094   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5095   PetscBool      match;
5096   MPI_Comm       comm;
5097   PetscMPIInt    size;
5098 
5099   PetscFunctionBegin;
5100   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5101   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5102   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5103   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5104   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5105 
5106   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5107   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5108   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5109   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5110   aa = a->a; ba = b->a;
5111   if (scall == MAT_INITIAL_MATRIX) {
5112     if (size == 1) {
5113       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5114       PetscFunctionReturn(0);
5115     }
5116 
5117     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5118     ci[0] = 0;
5119     for (i=0; i<am; i++) {
5120       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5121     }
5122     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5123     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5124     k    = 0;
5125     for (i=0; i<am; i++) {
5126       ncols_o = bi[i+1] - bi[i];
5127       ncols_d = ai[i+1] - ai[i];
5128       /* off-diagonal portion of A */
5129       for (jo=0; jo<ncols_o; jo++) {
5130         col = cmap[*bj];
5131         if (col >= cstart) break;
5132         cj[k]   = col; bj++;
5133         ca[k++] = *ba++;
5134       }
5135       /* diagonal portion of A */
5136       for (j=0; j<ncols_d; j++) {
5137         cj[k]   = cstart + *aj++;
5138         ca[k++] = *aa++;
5139       }
5140       /* off-diagonal portion of A */
5141       for (j=jo; j<ncols_o; j++) {
5142         cj[k]   = cmap[*bj++];
5143         ca[k++] = *ba++;
5144       }
5145     }
5146     /* put together the new matrix */
5147     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5148     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5149     /* Since these are PETSc arrays, change flags to free them as necessary. */
5150     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5151     mat->free_a  = PETSC_TRUE;
5152     mat->free_ij = PETSC_TRUE;
5153     mat->nonew   = 0;
5154   } else if (scall == MAT_REUSE_MATRIX) {
5155     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5156     ci = mat->i; cj = mat->j; cam = mat->a;
5157     for (i=0; i<am; i++) {
5158       /* off-diagonal portion of A */
5159       ncols_o = bi[i+1] - bi[i];
5160       for (jo=0; jo<ncols_o; jo++) {
5161         col = cmap[*bj];
5162         if (col >= cstart) break;
5163         *cam++ = *ba++; bj++;
5164       }
5165       /* diagonal portion of A */
5166       ncols_d = ai[i+1] - ai[i];
5167       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5168       /* off-diagonal portion of A */
5169       for (j=jo; j<ncols_o; j++) {
5170         *cam++ = *ba++; bj++;
5171       }
5172     }
5173   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5174   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5175   PetscFunctionReturn(0);
5176 }
5177 
5178 /*@C
5179      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5180 
5181     Not Collective
5182 
5183    Input Parameters:
5184 +    A - the matrix
5185 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5186 -    row, col - index sets of rows and columns to extract (or NULL)
5187 
5188    Output Parameter:
5189 .    A_loc - the local sequential matrix generated
5190 
5191     Level: developer
5192 
5193 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5194 
5195 @*/
5196 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5197 {
5198   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5199   PetscErrorCode ierr;
5200   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5201   IS             isrowa,iscola;
5202   Mat            *aloc;
5203   PetscBool      match;
5204 
5205   PetscFunctionBegin;
5206   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5207   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5208   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5209   if (!row) {
5210     start = A->rmap->rstart; end = A->rmap->rend;
5211     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5212   } else {
5213     isrowa = *row;
5214   }
5215   if (!col) {
5216     start = A->cmap->rstart;
5217     cmap  = a->garray;
5218     nzA   = a->A->cmap->n;
5219     nzB   = a->B->cmap->n;
5220     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5221     ncols = 0;
5222     for (i=0; i<nzB; i++) {
5223       if (cmap[i] < start) idx[ncols++] = cmap[i];
5224       else break;
5225     }
5226     imark = i;
5227     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5228     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5229     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5230   } else {
5231     iscola = *col;
5232   }
5233   if (scall != MAT_INITIAL_MATRIX) {
5234     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5235     aloc[0] = *A_loc;
5236   }
5237   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5238   if (!col) { /* attach global id of condensed columns */
5239     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5240   }
5241   *A_loc = aloc[0];
5242   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5243   if (!row) {
5244     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5245   }
5246   if (!col) {
5247     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5248   }
5249   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5250   PetscFunctionReturn(0);
5251 }
5252 
5253 /*@C
5254     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5255 
5256     Collective on Mat
5257 
5258    Input Parameters:
5259 +    A,B - the matrices in mpiaij format
5260 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5261 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5262 
5263    Output Parameter:
5264 +    rowb, colb - index sets of rows and columns of B to extract
5265 -    B_seq - the sequential matrix generated
5266 
5267     Level: developer
5268 
5269 @*/
5270 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5271 {
5272   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5273   PetscErrorCode ierr;
5274   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5275   IS             isrowb,iscolb;
5276   Mat            *bseq=NULL;
5277 
5278   PetscFunctionBegin;
5279   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5280     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5281   }
5282   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5283 
5284   if (scall == MAT_INITIAL_MATRIX) {
5285     start = A->cmap->rstart;
5286     cmap  = a->garray;
5287     nzA   = a->A->cmap->n;
5288     nzB   = a->B->cmap->n;
5289     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5290     ncols = 0;
5291     for (i=0; i<nzB; i++) {  /* row < local row index */
5292       if (cmap[i] < start) idx[ncols++] = cmap[i];
5293       else break;
5294     }
5295     imark = i;
5296     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5297     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5298     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5299     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5300   } else {
5301     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5302     isrowb  = *rowb; iscolb = *colb;
5303     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5304     bseq[0] = *B_seq;
5305   }
5306   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5307   *B_seq = bseq[0];
5308   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5309   if (!rowb) {
5310     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5311   } else {
5312     *rowb = isrowb;
5313   }
5314   if (!colb) {
5315     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5316   } else {
5317     *colb = iscolb;
5318   }
5319   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5320   PetscFunctionReturn(0);
5321 }
5322 
5323 /*
5324     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5325     of the OFF-DIAGONAL portion of local A
5326 
5327     Collective on Mat
5328 
5329    Input Parameters:
5330 +    A,B - the matrices in mpiaij format
5331 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5332 
5333    Output Parameter:
5334 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5335 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5336 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5337 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5338 
5339     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5340      for this matrix. This is not desirable..
5341 
5342     Level: developer
5343 
5344 */
5345 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5346 {
5347   PetscErrorCode         ierr;
5348   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5349   Mat_SeqAIJ             *b_oth;
5350   VecScatter             ctx;
5351   MPI_Comm               comm;
5352   const PetscMPIInt      *rprocs,*sprocs;
5353   const PetscInt         *srow,*rstarts,*sstarts;
5354   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5355   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5356   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5357   MPI_Request            *rwaits = NULL,*swaits = NULL;
5358   MPI_Status             rstatus;
5359   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5360 
5361   PetscFunctionBegin;
5362   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5363   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5364 
5365   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5366     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5367   }
5368   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5369   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5370 
5371   if (size == 1) {
5372     startsj_s = NULL;
5373     bufa_ptr  = NULL;
5374     *B_oth    = NULL;
5375     PetscFunctionReturn(0);
5376   }
5377 
5378   ctx = a->Mvctx;
5379   tag = ((PetscObject)ctx)->tag;
5380 
5381   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5382   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5383   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5384   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5385   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5386   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5387   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5388 
5389   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5390   if (scall == MAT_INITIAL_MATRIX) {
5391     /* i-array */
5392     /*---------*/
5393     /*  post receives */
5394     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5395     for (i=0; i<nrecvs; i++) {
5396       rowlen = rvalues + rstarts[i]*rbs;
5397       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5398       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5399     }
5400 
5401     /* pack the outgoing message */
5402     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5403 
5404     sstartsj[0] = 0;
5405     rstartsj[0] = 0;
5406     len         = 0; /* total length of j or a array to be sent */
5407     if (nsends) {
5408       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5409       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5410     }
5411     for (i=0; i<nsends; i++) {
5412       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5413       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5414       for (j=0; j<nrows; j++) {
5415         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5416         for (l=0; l<sbs; l++) {
5417           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5418 
5419           rowlen[j*sbs+l] = ncols;
5420 
5421           len += ncols;
5422           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5423         }
5424         k++;
5425       }
5426       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5427 
5428       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5429     }
5430     /* recvs and sends of i-array are completed */
5431     i = nrecvs;
5432     while (i--) {
5433       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5434     }
5435     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5436     ierr = PetscFree(svalues);CHKERRQ(ierr);
5437 
5438     /* allocate buffers for sending j and a arrays */
5439     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5440     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5441 
5442     /* create i-array of B_oth */
5443     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5444 
5445     b_othi[0] = 0;
5446     len       = 0; /* total length of j or a array to be received */
5447     k         = 0;
5448     for (i=0; i<nrecvs; i++) {
5449       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5450       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5451       for (j=0; j<nrows; j++) {
5452         b_othi[k+1] = b_othi[k] + rowlen[j];
5453         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5454         k++;
5455       }
5456       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5457     }
5458     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5459 
5460     /* allocate space for j and a arrrays of B_oth */
5461     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5462     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5463 
5464     /* j-array */
5465     /*---------*/
5466     /*  post receives of j-array */
5467     for (i=0; i<nrecvs; i++) {
5468       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5469       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5470     }
5471 
5472     /* pack the outgoing message j-array */
5473     if (nsends) k = sstarts[0];
5474     for (i=0; i<nsends; i++) {
5475       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5476       bufJ  = bufj+sstartsj[i];
5477       for (j=0; j<nrows; j++) {
5478         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5479         for (ll=0; ll<sbs; ll++) {
5480           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5481           for (l=0; l<ncols; l++) {
5482             *bufJ++ = cols[l];
5483           }
5484           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5485         }
5486       }
5487       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5488     }
5489 
5490     /* recvs and sends of j-array are completed */
5491     i = nrecvs;
5492     while (i--) {
5493       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5494     }
5495     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5496   } else if (scall == MAT_REUSE_MATRIX) {
5497     sstartsj = *startsj_s;
5498     rstartsj = *startsj_r;
5499     bufa     = *bufa_ptr;
5500     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5501     b_otha   = b_oth->a;
5502   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5503 
5504   /* a-array */
5505   /*---------*/
5506   /*  post receives of a-array */
5507   for (i=0; i<nrecvs; i++) {
5508     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5509     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5510   }
5511 
5512   /* pack the outgoing message a-array */
5513   if (nsends) k = sstarts[0];
5514   for (i=0; i<nsends; i++) {
5515     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5516     bufA  = bufa+sstartsj[i];
5517     for (j=0; j<nrows; j++) {
5518       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5519       for (ll=0; ll<sbs; ll++) {
5520         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5521         for (l=0; l<ncols; l++) {
5522           *bufA++ = vals[l];
5523         }
5524         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5525       }
5526     }
5527     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5528   }
5529   /* recvs and sends of a-array are completed */
5530   i = nrecvs;
5531   while (i--) {
5532     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5533   }
5534   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5535   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5536 
5537   if (scall == MAT_INITIAL_MATRIX) {
5538     /* put together the new matrix */
5539     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5540 
5541     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5542     /* Since these are PETSc arrays, change flags to free them as necessary. */
5543     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5544     b_oth->free_a  = PETSC_TRUE;
5545     b_oth->free_ij = PETSC_TRUE;
5546     b_oth->nonew   = 0;
5547 
5548     ierr = PetscFree(bufj);CHKERRQ(ierr);
5549     if (!startsj_s || !bufa_ptr) {
5550       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5551       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5552     } else {
5553       *startsj_s = sstartsj;
5554       *startsj_r = rstartsj;
5555       *bufa_ptr  = bufa;
5556     }
5557   }
5558 
5559   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5560   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5561   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5562   PetscFunctionReturn(0);
5563 }
5564 
5565 /*@C
5566   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5567 
5568   Not Collective
5569 
5570   Input Parameters:
5571 . A - The matrix in mpiaij format
5572 
5573   Output Parameter:
5574 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5575 . colmap - A map from global column index to local index into lvec
5576 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5577 
5578   Level: developer
5579 
5580 @*/
5581 #if defined(PETSC_USE_CTABLE)
5582 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5583 #else
5584 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5585 #endif
5586 {
5587   Mat_MPIAIJ *a;
5588 
5589   PetscFunctionBegin;
5590   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5591   PetscValidPointer(lvec, 2);
5592   PetscValidPointer(colmap, 3);
5593   PetscValidPointer(multScatter, 4);
5594   a = (Mat_MPIAIJ*) A->data;
5595   if (lvec) *lvec = a->lvec;
5596   if (colmap) *colmap = a->colmap;
5597   if (multScatter) *multScatter = a->Mvctx;
5598   PetscFunctionReturn(0);
5599 }
5600 
5601 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5602 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5603 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5604 #if defined(PETSC_HAVE_MKL_SPARSE)
5605 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5606 #endif
5607 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5608 #if defined(PETSC_HAVE_ELEMENTAL)
5609 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5610 #endif
5611 #if defined(PETSC_HAVE_HYPRE)
5612 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5613 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5614 #endif
5615 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5616 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5617 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5618 
5619 /*
5620     Computes (B'*A')' since computing B*A directly is untenable
5621 
5622                n                       p                          p
5623         (              )       (              )         (                  )
5624       m (      A       )  *  n (       B      )   =   m (         C        )
5625         (              )       (              )         (                  )
5626 
5627 */
5628 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5629 {
5630   PetscErrorCode ierr;
5631   Mat            At,Bt,Ct;
5632 
5633   PetscFunctionBegin;
5634   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5635   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5636   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5637   ierr = MatDestroy(&At);CHKERRQ(ierr);
5638   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5639   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5640   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5641   PetscFunctionReturn(0);
5642 }
5643 
5644 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5645 {
5646   PetscErrorCode ierr;
5647   PetscInt       m=A->rmap->n,n=B->cmap->n;
5648   Mat            Cmat;
5649 
5650   PetscFunctionBegin;
5651   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5652   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5653   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5654   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5655   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5656   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5657   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5658   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5659 
5660   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5661 
5662   *C = Cmat;
5663   PetscFunctionReturn(0);
5664 }
5665 
5666 /* ----------------------------------------------------------------*/
5667 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5668 {
5669   PetscErrorCode ierr;
5670 
5671   PetscFunctionBegin;
5672   if (scall == MAT_INITIAL_MATRIX) {
5673     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5674     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5675     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5676   }
5677   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5678   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5679   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5680   PetscFunctionReturn(0);
5681 }
5682 
5683 /*MC
5684    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5685 
5686    Options Database Keys:
5687 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5688 
5689   Level: beginner
5690 
5691 .seealso: MatCreateAIJ()
5692 M*/
5693 
5694 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5695 {
5696   Mat_MPIAIJ     *b;
5697   PetscErrorCode ierr;
5698   PetscMPIInt    size;
5699 
5700   PetscFunctionBegin;
5701   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5702 
5703   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5704   B->data       = (void*)b;
5705   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5706   B->assembled  = PETSC_FALSE;
5707   B->insertmode = NOT_SET_VALUES;
5708   b->size       = size;
5709 
5710   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5711 
5712   /* build cache for off array entries formed */
5713   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5714 
5715   b->donotstash  = PETSC_FALSE;
5716   b->colmap      = 0;
5717   b->garray      = 0;
5718   b->roworiented = PETSC_TRUE;
5719 
5720   /* stuff used for matrix vector multiply */
5721   b->lvec  = NULL;
5722   b->Mvctx = NULL;
5723 
5724   /* stuff for MatGetRow() */
5725   b->rowindices   = 0;
5726   b->rowvalues    = 0;
5727   b->getrowactive = PETSC_FALSE;
5728 
5729   /* flexible pointer used in CUSP/CUSPARSE classes */
5730   b->spptr = NULL;
5731 
5732   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5733   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5734   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5735   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5736   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5737   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5738   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5739   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5740   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5741   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5742 #if defined(PETSC_HAVE_MKL_SPARSE)
5743   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5744 #endif
5745   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5746   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5747 #if defined(PETSC_HAVE_ELEMENTAL)
5748   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5749 #endif
5750 #if defined(PETSC_HAVE_HYPRE)
5751   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5752 #endif
5753   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5754   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5755   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5756   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5757   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5758 #if defined(PETSC_HAVE_HYPRE)
5759   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5760 #endif
5761   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5762   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5763   PetscFunctionReturn(0);
5764 }
5765 
5766 /*@C
5767      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5768          and "off-diagonal" part of the matrix in CSR format.
5769 
5770    Collective
5771 
5772    Input Parameters:
5773 +  comm - MPI communicator
5774 .  m - number of local rows (Cannot be PETSC_DECIDE)
5775 .  n - This value should be the same as the local size used in creating the
5776        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5777        calculated if N is given) For square matrices n is almost always m.
5778 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5779 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5780 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5781 .   j - column indices
5782 .   a - matrix values
5783 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5784 .   oj - column indices
5785 -   oa - matrix values
5786 
5787    Output Parameter:
5788 .   mat - the matrix
5789 
5790    Level: advanced
5791 
5792    Notes:
5793        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5794        must free the arrays once the matrix has been destroyed and not before.
5795 
5796        The i and j indices are 0 based
5797 
5798        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5799 
5800        This sets local rows and cannot be used to set off-processor values.
5801 
5802        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5803        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5804        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5805        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5806        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5807        communication if it is known that only local entries will be set.
5808 
5809 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5810           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5811 @*/
5812 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5813 {
5814   PetscErrorCode ierr;
5815   Mat_MPIAIJ     *maij;
5816 
5817   PetscFunctionBegin;
5818   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5819   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5820   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5821   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5822   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5823   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5824   maij = (Mat_MPIAIJ*) (*mat)->data;
5825 
5826   (*mat)->preallocated = PETSC_TRUE;
5827 
5828   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5829   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5830 
5831   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5832   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5833 
5834   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5835   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5836   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5837   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5838 
5839   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5840   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5841   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5842   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5843   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5844   PetscFunctionReturn(0);
5845 }
5846 
5847 /*
5848     Special version for direct calls from Fortran
5849 */
5850 #include <petsc/private/fortranimpl.h>
5851 
5852 /* Change these macros so can be used in void function */
5853 #undef CHKERRQ
5854 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5855 #undef SETERRQ2
5856 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5857 #undef SETERRQ3
5858 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5859 #undef SETERRQ
5860 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5861 
5862 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5863 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5864 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5865 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5866 #else
5867 #endif
5868 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5869 {
5870   Mat            mat  = *mmat;
5871   PetscInt       m    = *mm, n = *mn;
5872   InsertMode     addv = *maddv;
5873   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5874   PetscScalar    value;
5875   PetscErrorCode ierr;
5876 
5877   MatCheckPreallocated(mat,1);
5878   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5879 
5880 #if defined(PETSC_USE_DEBUG)
5881   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5882 #endif
5883   {
5884     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5885     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5886     PetscBool roworiented = aij->roworiented;
5887 
5888     /* Some Variables required in the macro */
5889     Mat        A                 = aij->A;
5890     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5891     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5892     MatScalar  *aa               = a->a;
5893     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5894     Mat        B                 = aij->B;
5895     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5896     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5897     MatScalar  *ba               = b->a;
5898 
5899     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5900     PetscInt  nonew = a->nonew;
5901     MatScalar *ap1,*ap2;
5902 
5903     PetscFunctionBegin;
5904     for (i=0; i<m; i++) {
5905       if (im[i] < 0) continue;
5906 #if defined(PETSC_USE_DEBUG)
5907       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5908 #endif
5909       if (im[i] >= rstart && im[i] < rend) {
5910         row      = im[i] - rstart;
5911         lastcol1 = -1;
5912         rp1      = aj + ai[row];
5913         ap1      = aa + ai[row];
5914         rmax1    = aimax[row];
5915         nrow1    = ailen[row];
5916         low1     = 0;
5917         high1    = nrow1;
5918         lastcol2 = -1;
5919         rp2      = bj + bi[row];
5920         ap2      = ba + bi[row];
5921         rmax2    = bimax[row];
5922         nrow2    = bilen[row];
5923         low2     = 0;
5924         high2    = nrow2;
5925 
5926         for (j=0; j<n; j++) {
5927           if (roworiented) value = v[i*n+j];
5928           else value = v[i+j*m];
5929           if (in[j] >= cstart && in[j] < cend) {
5930             col = in[j] - cstart;
5931             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5932             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5933           } else if (in[j] < 0) continue;
5934 #if defined(PETSC_USE_DEBUG)
5935           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5936           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5937 #endif
5938           else {
5939             if (mat->was_assembled) {
5940               if (!aij->colmap) {
5941                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5942               }
5943 #if defined(PETSC_USE_CTABLE)
5944               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5945               col--;
5946 #else
5947               col = aij->colmap[in[j]] - 1;
5948 #endif
5949               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5950               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5951                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5952                 col  =  in[j];
5953                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5954                 B     = aij->B;
5955                 b     = (Mat_SeqAIJ*)B->data;
5956                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5957                 rp2   = bj + bi[row];
5958                 ap2   = ba + bi[row];
5959                 rmax2 = bimax[row];
5960                 nrow2 = bilen[row];
5961                 low2  = 0;
5962                 high2 = nrow2;
5963                 bm    = aij->B->rmap->n;
5964                 ba    = b->a;
5965               }
5966             } else col = in[j];
5967             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5968           }
5969         }
5970       } else if (!aij->donotstash) {
5971         if (roworiented) {
5972           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5973         } else {
5974           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5975         }
5976       }
5977     }
5978   }
5979   PetscFunctionReturnVoid();
5980 }
5981