xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 1678b73f9e0904d57331d9efef8bebee0da8ef53)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
198   if (!rank) {
199     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
200     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201   }
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
204     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
205     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
206     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
207     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
208     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
209     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
210     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
211     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
212 
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
215     rstart = rowners[rank];
216     rend   = rowners[rank+1];
217     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
218     if (!rank) {
219       gmata = (Mat_SeqAIJ*) gmat->data;
220       /* send row lengths to all processors */
221       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
222       for (i=1; i<size; i++) {
223         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
224       }
225       /* determine number diagonal and off-diagonal counts */
226       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
227       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
228       jj   = 0;
229       for (i=0; i<m; i++) {
230         for (j=0; j<dlens[i]; j++) {
231           if (gmata->j[jj] < rstart) ld[i]++;
232           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
233           jj++;
234         }
235       }
236       /* send column indices to other processes */
237       for (i=1; i<size; i++) {
238         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
239         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
240         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242 
243       /* send numerical values to other processes */
244       for (i=1; i<size; i++) {
245         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
246         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
247       }
248       gmataa = gmata->a;
249       gmataj = gmata->j;
250 
251     } else {
252       /* receive row lengths */
253       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
254       /* receive column indices */
255       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
256       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
257       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
258       /* determine number diagonal and off-diagonal counts */
259       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
260       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
261       jj   = 0;
262       for (i=0; i<m; i++) {
263         for (j=0; j<dlens[i]; j++) {
264           if (gmataj[jj] < rstart) ld[i]++;
265           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
266           jj++;
267         }
268       }
269       /* receive numerical values */
270       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
271       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
272     }
273     /* set preallocation */
274     for (i=0; i<m; i++) {
275       dlens[i] -= olens[i];
276     }
277     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
278     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
279 
280     for (i=0; i<m; i++) {
281       dlens[i] += olens[i];
282     }
283     cnt = 0;
284     for (i=0; i<m; i++) {
285       row  = rstart + i;
286       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
287       cnt += dlens[i];
288     }
289     if (rank) {
290       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
291     }
292     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
293     ierr = PetscFree(rowners);CHKERRQ(ierr);
294 
295     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
296 
297     *inmat = mat;
298   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
299     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
300     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
301     mat  = *inmat;
302     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
303     if (!rank) {
304       /* send numerical values to other processes */
305       gmata  = (Mat_SeqAIJ*) gmat->data;
306       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
307       gmataa = gmata->a;
308       for (i=1; i<size; i++) {
309         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
310         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
311       }
312       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
313     } else {
314       /* receive numerical values from process 0*/
315       nz   = Ad->nz + Ao->nz;
316       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
317       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
318     }
319     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
320     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
321     ad = Ad->a;
322     ao = Ao->a;
323     if (mat->rmap->n) {
324       i  = 0;
325       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
326       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
327     }
328     for (i=1; i<mat->rmap->n; i++) {
329       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
330       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
331     }
332     i--;
333     if (mat->rmap->n) {
334       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
335     }
336     if (rank) {
337       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
338     }
339   }
340   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
341   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 /*
346   Local utility routine that creates a mapping from the global column
347 number to the local number in the off-diagonal part of the local
348 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
349 a slightly higher hash table cost; without it it is not scalable (each processor
350 has an order N integer array but is fast to acess.
351 */
352 #undef __FUNCT__
353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
355 {
356   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
357   PetscErrorCode ierr;
358   PetscInt       n = aij->B->cmap->n,i;
359 
360   PetscFunctionBegin;
361   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
362 #if defined(PETSC_USE_CTABLE)
363   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
364   for (i=0; i<n; i++) {
365     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
366   }
367 #else
368   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
369   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
370   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
371 #endif
372   PetscFunctionReturn(0);
373 }
374 
375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
376 { \
377     if (col <= lastcol1)  low1 = 0;     \
378     else                 high1 = nrow1; \
379     lastcol1 = col;\
380     while (high1-low1 > 5) { \
381       t = (low1+high1)/2; \
382       if (rp1[t] > col) high1 = t; \
383       else              low1  = t; \
384     } \
385       for (_i=low1; _i<high1; _i++) { \
386         if (rp1[_i] > col) break; \
387         if (rp1[_i] == col) { \
388           if (addv == ADD_VALUES) ap1[_i] += value;   \
389           else                    ap1[_i] = value; \
390           goto a_noinsert; \
391         } \
392       }  \
393       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
394       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
395       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
396       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
397       N = nrow1++ - 1; a->nz++; high1++; \
398       /* shift up all the later entries in this row */ \
399       for (ii=N; ii>=_i; ii--) { \
400         rp1[ii+1] = rp1[ii]; \
401         ap1[ii+1] = ap1[ii]; \
402       } \
403       rp1[_i] = col;  \
404       ap1[_i] = value;  \
405       A->nonzerostate++;\
406       a_noinsert: ; \
407       ailen[row] = nrow1; \
408 }
409 
410 
411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
412   { \
413     if (col <= lastcol2) low2 = 0;                        \
414     else high2 = nrow2;                                   \
415     lastcol2 = col;                                       \
416     while (high2-low2 > 5) {                              \
417       t = (low2+high2)/2;                                 \
418       if (rp2[t] > col) high2 = t;                        \
419       else             low2  = t;                         \
420     }                                                     \
421     for (_i=low2; _i<high2; _i++) {                       \
422       if (rp2[_i] > col) break;                           \
423       if (rp2[_i] == col) {                               \
424         if (addv == ADD_VALUES) ap2[_i] += value;         \
425         else                    ap2[_i] = value;          \
426         goto b_noinsert;                                  \
427       }                                                   \
428     }                                                     \
429     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
430     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
431     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
432     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
433     N = nrow2++ - 1; b->nz++; high2++;                    \
434     /* shift up all the later entries in this row */      \
435     for (ii=N; ii>=_i; ii--) {                            \
436       rp2[ii+1] = rp2[ii];                                \
437       ap2[ii+1] = ap2[ii];                                \
438     }                                                     \
439     rp2[_i] = col;                                        \
440     ap2[_i] = value;                                      \
441     B->nonzerostate++;                                    \
442     b_noinsert: ;                                         \
443     bilen[row] = nrow2;                                   \
444   }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat        A                 = aij->A;
487   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
488   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar  *aa               = a->a;
490   PetscBool  ignorezeroentries = a->ignorezeroentries;
491   Mat        B                 = aij->B;
492   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
493   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar  *ba               = b->a;
495 
496   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt  nonew;
498   MatScalar *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   for (i=0; i<m; i++) {
502     if (im[i] < 0) continue;
503 #if defined(PETSC_USE_DEBUG)
504     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
505 #endif
506     if (im[i] >= rstart && im[i] < rend) {
507       row      = im[i] - rstart;
508       lastcol1 = -1;
509       rp1      = aj + ai[row];
510       ap1      = aa + ai[row];
511       rmax1    = aimax[row];
512       nrow1    = ailen[row];
513       low1     = 0;
514       high1    = nrow1;
515       lastcol2 = -1;
516       rp2      = bj + bi[row];
517       ap2      = ba + bi[row];
518       rmax2    = bimax[row];
519       nrow2    = bilen[row];
520       low2     = 0;
521       high2    = nrow2;
522 
523       for (j=0; j<n; j++) {
524         if (roworiented) value = v[i*n+j];
525         else             value = v[i+j*m];
526         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527         if (in[j] >= cstart && in[j] < cend) {
528           col   = in[j] - cstart;
529           nonew = a->nonew;
530           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531         } else if (in[j] < 0) continue;
532 #if defined(PETSC_USE_DEBUG)
533         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534 #endif
535         else {
536           if (mat->was_assembled) {
537             if (!aij->colmap) {
538               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
539             }
540 #if defined(PETSC_USE_CTABLE)
541             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
542             col--;
543 #else
544             col = aij->colmap[in[j]] - 1;
545 #endif
546             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
548               col  =  in[j];
549               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550               B     = aij->B;
551               b     = (Mat_SeqAIJ*)B->data;
552               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553               rp2   = bj + bi[row];
554               ap2   = ba + bi[row];
555               rmax2 = bimax[row];
556               nrow2 = bilen[row];
557               low2  = 0;
558               high2 = nrow2;
559               bm    = aij->B->rmap->n;
560               ba    = b->a;
561             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562           } else col = in[j];
563           nonew = b->nonew;
564           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565         }
566       }
567     } else {
568       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569       if (!aij->donotstash) {
570         mat->assembled = PETSC_FALSE;
571         if (roworiented) {
572           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
573         } else {
574           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
575         }
576       }
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatGetValues_MPIAIJ"
584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585 {
586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
590 
591   PetscFunctionBegin;
592   for (i=0; i<m; i++) {
593     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595     if (idxm[i] >= rstart && idxm[i] < rend) {
596       row = idxm[i] - rstart;
597       for (j=0; j<n; j++) {
598         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600         if (idxn[j] >= cstart && idxn[j] < cend) {
601           col  = idxn[j] - cstart;
602           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603         } else {
604           if (!aij->colmap) {
605             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606           }
607 #if defined(PETSC_USE_CTABLE)
608           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
609           col--;
610 #else
611           col = aij->colmap[idxn[j]] - 1;
612 #endif
613           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614           else {
615             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
620   }
621   PetscFunctionReturn(0);
622 }
623 
624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       nstash,reallocs;
633   InsertMode     addv;
634 
635   PetscFunctionBegin;
636   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
637 
638   /* make sure all processors are either in INSERTMODE or ADDMODE */
639   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
640   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
641   mat->insertmode = addv; /* in case this processor had no cache */
642 
643   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
644   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
645   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
651 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
652 {
653   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
654   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
655   PetscErrorCode ierr;
656   PetscMPIInt    n;
657   PetscInt       i,j,rstart,ncols,flg;
658   PetscInt       *row,*col;
659   PetscBool      other_disassembled;
660   PetscScalar    *val;
661   InsertMode     addv = mat->insertmode;
662 
663   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
664 
665   PetscFunctionBegin;
666   if (!aij->donotstash && !mat->nooffprocentries) {
667     while (1) {
668       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
669       if (!flg) break;
670 
671       for (i=0; i<n; ) {
672         /* Now identify the consecutive vals belonging to the same row */
673         for (j=i,rstart=row[j]; j<n; j++) {
674           if (row[j] != rstart) break;
675         }
676         if (j < n) ncols = j-i;
677         else       ncols = n-i;
678         /* Now assemble all these values with a single function call */
679         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
680 
681         i = j;
682       }
683     }
684     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
685   }
686   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
687   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
688 
689   /* determine if any processor has disassembled, if so we must
690      also disassemble ourselfs, in order that we may reassemble. */
691   /*
692      if nonzero structure of submatrix B cannot change then we know that
693      no processor disassembled thus we can skip this stuff
694   */
695   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
696     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
697     if (mat->was_assembled && !other_disassembled) {
698       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
699     }
700   }
701   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
703   }
704   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
705   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
706   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
707 
708   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
709 
710   aij->rowvalues = 0;
711 
712   /* used by MatAXPY() */
713   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
714   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
715 
716   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
717   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
718 
719   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
720   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
721     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
722     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
729 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
730 {
731   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
732   PetscErrorCode ierr;
733 
734   PetscFunctionBegin;
735   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
736   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatZeroRows_MPIAIJ"
742 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
743 {
744   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
745   PetscInt      *owners = A->rmap->range;
746   PetscInt       n      = A->rmap->n;
747   PetscSF        sf;
748   PetscInt      *lrows;
749   PetscSFNode   *rrows;
750   PetscInt       r, p = 0, len = 0;
751   PetscErrorCode ierr;
752 
753   PetscFunctionBegin;
754   /* Create SF where leaves are input rows and roots are owned rows */
755   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
756   for (r = 0; r < n; ++r) lrows[r] = -1;
757   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
758   for (r = 0; r < N; ++r) {
759     const PetscInt idx   = rows[r];
760     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
761     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
762       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
763     }
764     if (A->nooffproczerorows) {
765       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
766       lrows[len++] = idx - owners[p];
767     } else {
768       rrows[r].rank = p;
769       rrows[r].index = rows[r] - owners[p];
770     }
771   }
772   if (!A->nooffproczerorows) {
773     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
774     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
775     /* Collect flags for rows to be zeroed */
776     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
777     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
778     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
779     /* Compress and put in row numbers */
780     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
781   }
782   /* fix right hand side if needed */
783   if (x && b) {
784     const PetscScalar *xx;
785     PetscScalar       *bb;
786 
787     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
788     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
789     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
790     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
791     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
792   }
793   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
794   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
795   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
796     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
797   } else if (diag != 0.0) {
798     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
799     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
800     for (r = 0; r < len; ++r) {
801       const PetscInt row = lrows[r] + A->rmap->rstart;
802       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
803     }
804     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
805     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806   } else {
807     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
808   }
809   ierr = PetscFree(lrows);CHKERRQ(ierr);
810 
811   /* only change matrix nonzero state if pattern was allowed to be changed */
812   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
813     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
814     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
815   }
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
821 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
822 {
823   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
824   PetscErrorCode    ierr;
825   PetscMPIInt       n = A->rmap->n;
826   PetscInt          i,j,r,m,p = 0,len = 0;
827   PetscInt          *lrows,*owners = A->rmap->range;
828   PetscSFNode       *rrows;
829   PetscSF           sf;
830   const PetscScalar *xx;
831   PetscScalar       *bb,*mask;
832   Vec               xmask,lmask;
833   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
834   const PetscInt    *aj, *ii,*ridx;
835   PetscScalar       *aa;
836 
837   PetscFunctionBegin;
838   /* Create SF where leaves are input rows and roots are owned rows */
839   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
840   for (r = 0; r < n; ++r) lrows[r] = -1;
841   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
842   for (r = 0; r < N; ++r) {
843     const PetscInt idx   = rows[r];
844     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
845     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
846       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
847     }
848     rrows[r].rank  = p;
849     rrows[r].index = rows[r] - owners[p];
850   }
851   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
852   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
853   /* Collect flags for rows to be zeroed */
854   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
856   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
857   /* Compress and put in row numbers */
858   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
859   /* zero diagonal part of matrix */
860   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
861   /* handle off diagonal part of matrix */
862   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
863   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
864   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
865   for (i=0; i<len; i++) bb[lrows[i]] = 1;
866   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
867   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
869   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
870   if (x) {
871     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
874     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
875   }
876   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
877   /* remove zeroed rows of off diagonal matrix */
878   ii = aij->i;
879   for (i=0; i<len; i++) {
880     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
881   }
882   /* loop over all elements of off process part of matrix zeroing removed columns*/
883   if (aij->compressedrow.use) {
884     m    = aij->compressedrow.nrows;
885     ii   = aij->compressedrow.i;
886     ridx = aij->compressedrow.rindex;
887     for (i=0; i<m; i++) {
888       n  = ii[i+1] - ii[i];
889       aj = aij->j + ii[i];
890       aa = aij->a + ii[i];
891 
892       for (j=0; j<n; j++) {
893         if (PetscAbsScalar(mask[*aj])) {
894           if (b) bb[*ridx] -= *aa*xx[*aj];
895           *aa = 0.0;
896         }
897         aa++;
898         aj++;
899       }
900       ridx++;
901     }
902   } else { /* do not use compressed row format */
903     m = l->B->rmap->n;
904     for (i=0; i<m; i++) {
905       n  = ii[i+1] - ii[i];
906       aj = aij->j + ii[i];
907       aa = aij->a + ii[i];
908       for (j=0; j<n; j++) {
909         if (PetscAbsScalar(mask[*aj])) {
910           if (b) bb[i] -= *aa*xx[*aj];
911           *aa = 0.0;
912         }
913         aa++;
914         aj++;
915       }
916     }
917   }
918   if (x) {
919     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
920     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
921   }
922   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
923   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
924   ierr = PetscFree(lrows);CHKERRQ(ierr);
925 
926   /* only change matrix nonzero state if pattern was allowed to be changed */
927   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
928     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
929     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
930   }
931   PetscFunctionReturn(0);
932 }
933 
934 #undef __FUNCT__
935 #define __FUNCT__ "MatMult_MPIAIJ"
936 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
937 {
938   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
939   PetscErrorCode ierr;
940   PetscInt       nt;
941 
942   PetscFunctionBegin;
943   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
944   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
945   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
947   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
948   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
949   PetscFunctionReturn(0);
950 }
951 
952 #undef __FUNCT__
953 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
954 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
955 {
956   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
957   PetscErrorCode ierr;
958 
959   PetscFunctionBegin;
960   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
961   PetscFunctionReturn(0);
962 }
963 
964 #undef __FUNCT__
965 #define __FUNCT__ "MatMultAdd_MPIAIJ"
966 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
967 {
968   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
969   PetscErrorCode ierr;
970 
971   PetscFunctionBegin;
972   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
973   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
974   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
975   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
976   PetscFunctionReturn(0);
977 }
978 
979 #undef __FUNCT__
980 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
981 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
982 {
983   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
984   PetscErrorCode ierr;
985   PetscBool      merged;
986 
987   PetscFunctionBegin;
988   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
989   /* do nondiagonal part */
990   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
991   if (!merged) {
992     /* send it on its way */
993     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
994     /* do local part */
995     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
996     /* receive remote parts: note this assumes the values are not actually */
997     /* added in yy until the next line, */
998     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
999   } else {
1000     /* do local part */
1001     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1002     /* send it on its way */
1003     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1004     /* values actually were received in the Begin() but we need to call this nop */
1005     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1006   }
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 #undef __FUNCT__
1011 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1012 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1013 {
1014   MPI_Comm       comm;
1015   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1016   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1017   IS             Me,Notme;
1018   PetscErrorCode ierr;
1019   PetscInt       M,N,first,last,*notme,i;
1020   PetscMPIInt    size;
1021 
1022   PetscFunctionBegin;
1023   /* Easy test: symmetric diagonal block */
1024   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1025   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1026   if (!*f) PetscFunctionReturn(0);
1027   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1028   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1029   if (size == 1) PetscFunctionReturn(0);
1030 
1031   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1032   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1033   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1034   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1035   for (i=0; i<first; i++) notme[i] = i;
1036   for (i=last; i<M; i++) notme[i-last+first] = i;
1037   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1038   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1039   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1040   Aoff = Aoffs[0];
1041   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1042   Boff = Boffs[0];
1043   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1044   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1045   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1046   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1047   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1048   ierr = PetscFree(notme);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 #undef __FUNCT__
1053 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1054 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1055 {
1056   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1057   PetscErrorCode ierr;
1058 
1059   PetscFunctionBegin;
1060   /* do nondiagonal part */
1061   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1062   /* send it on its way */
1063   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1064   /* do local part */
1065   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1066   /* receive remote parts */
1067   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 /*
1072   This only works correctly for square matrices where the subblock A->A is the
1073    diagonal block
1074 */
1075 #undef __FUNCT__
1076 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1077 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1078 {
1079   PetscErrorCode ierr;
1080   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1081 
1082   PetscFunctionBegin;
1083   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1084   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1085   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatScale_MPIAIJ"
1091 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1092 {
1093   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1094   PetscErrorCode ierr;
1095 
1096   PetscFunctionBegin;
1097   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1098   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatDestroy_Redundant"
1104 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant)
1105 {
1106   PetscErrorCode ierr;
1107   Mat_Redundant  *redund = *redundant;
1108   PetscInt       i;
1109 
1110   PetscFunctionBegin;
1111   *redundant = NULL;
1112   if (redund){
1113     if (redund->matseq) { /* via MatGetSubMatrices()  */
1114       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
1115       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
1116       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
1117       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
1118     } else {
1119       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
1120       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1121       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1122       for (i=0; i<redund->nrecvs; i++) {
1123         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1124         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1125       }
1126       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1127     }
1128 
1129     if (redund->psubcomm) {
1130       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
1131     }
1132     ierr = PetscFree(redund);CHKERRQ(ierr);
1133   }
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatDestroy_MPIAIJ"
1139 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1140 {
1141   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1142   PetscErrorCode ierr;
1143 
1144   PetscFunctionBegin;
1145 #if defined(PETSC_USE_LOG)
1146   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1147 #endif
1148   ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr);
1149   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1150   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1151   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1152   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1153 #if defined(PETSC_USE_CTABLE)
1154   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1155 #else
1156   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1157 #endif
1158   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1159   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1160   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1161   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1162   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1163   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1164 
1165   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1168   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1169   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1170   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1179 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1180 {
1181   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1182   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1183   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1184   PetscErrorCode ierr;
1185   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1186   int            fd;
1187   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1188   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1189   PetscScalar    *column_values;
1190   PetscInt       message_count,flowcontrolcount;
1191   FILE           *file;
1192 
1193   PetscFunctionBegin;
1194   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1195   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1196   nz   = A->nz + B->nz;
1197   if (!rank) {
1198     header[0] = MAT_FILE_CLASSID;
1199     header[1] = mat->rmap->N;
1200     header[2] = mat->cmap->N;
1201 
1202     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1203     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1204     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1205     /* get largest number of rows any processor has */
1206     rlen  = mat->rmap->n;
1207     range = mat->rmap->range;
1208     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1209   } else {
1210     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1211     rlen = mat->rmap->n;
1212   }
1213 
1214   /* load up the local row counts */
1215   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1216   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1217 
1218   /* store the row lengths to the file */
1219   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1220   if (!rank) {
1221     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1222     for (i=1; i<size; i++) {
1223       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1224       rlen = range[i+1] - range[i];
1225       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1226       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1227     }
1228     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1229   } else {
1230     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1231     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1232     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1233   }
1234   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1235 
1236   /* load up the local column indices */
1237   nzmax = nz; /* th processor needs space a largest processor needs */
1238   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1239   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1240   cnt   = 0;
1241   for (i=0; i<mat->rmap->n; i++) {
1242     for (j=B->i[i]; j<B->i[i+1]; j++) {
1243       if ((col = garray[B->j[j]]) > cstart) break;
1244       column_indices[cnt++] = col;
1245     }
1246     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1247     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1248   }
1249   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1250 
1251   /* store the column indices to the file */
1252   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1253   if (!rank) {
1254     MPI_Status status;
1255     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1256     for (i=1; i<size; i++) {
1257       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1258       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1259       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1260       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1261       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1262     }
1263     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1264   } else {
1265     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1266     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1267     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1268     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1269   }
1270   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1271 
1272   /* load up the local column values */
1273   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1274   cnt  = 0;
1275   for (i=0; i<mat->rmap->n; i++) {
1276     for (j=B->i[i]; j<B->i[i+1]; j++) {
1277       if (garray[B->j[j]] > cstart) break;
1278       column_values[cnt++] = B->a[j];
1279     }
1280     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1281     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1282   }
1283   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1284 
1285   /* store the column values to the file */
1286   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1287   if (!rank) {
1288     MPI_Status status;
1289     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1290     for (i=1; i<size; i++) {
1291       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1292       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1293       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1294       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1295       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1296     }
1297     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1298   } else {
1299     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1300     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1301     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1302     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1303   }
1304   ierr = PetscFree(column_values);CHKERRQ(ierr);
1305 
1306   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1307   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 #include <petscdraw.h>
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1314 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1315 {
1316   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1317   PetscErrorCode    ierr;
1318   PetscMPIInt       rank = aij->rank,size = aij->size;
1319   PetscBool         isdraw,iascii,isbinary;
1320   PetscViewer       sviewer;
1321   PetscViewerFormat format;
1322 
1323   PetscFunctionBegin;
1324   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1325   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1326   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1327   if (iascii) {
1328     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1329     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1330       MatInfo   info;
1331       PetscBool inodes;
1332 
1333       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1334       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1335       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1336       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1337       if (!inodes) {
1338         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1339                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1340       } else {
1341         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1342                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1343       }
1344       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1345       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1346       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1347       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1348       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1349       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1350       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1351       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1352       PetscFunctionReturn(0);
1353     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1354       PetscInt inodecount,inodelimit,*inodes;
1355       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1356       if (inodes) {
1357         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1358       } else {
1359         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1360       }
1361       PetscFunctionReturn(0);
1362     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1363       PetscFunctionReturn(0);
1364     }
1365   } else if (isbinary) {
1366     if (size == 1) {
1367       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1368       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1369     } else {
1370       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1371     }
1372     PetscFunctionReturn(0);
1373   } else if (isdraw) {
1374     PetscDraw draw;
1375     PetscBool isnull;
1376     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1377     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1378   }
1379 
1380   {
1381     /* assemble the entire matrix onto first processor. */
1382     Mat        A;
1383     Mat_SeqAIJ *Aloc;
1384     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1385     MatScalar  *a;
1386 
1387     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1388     if (!rank) {
1389       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1390     } else {
1391       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1392     }
1393     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1394     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1395     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1396     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1397     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1398 
1399     /* copy over the A part */
1400     Aloc = (Mat_SeqAIJ*)aij->A->data;
1401     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1402     row  = mat->rmap->rstart;
1403     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1404     for (i=0; i<m; i++) {
1405       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1406       row++;
1407       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1408     }
1409     aj = Aloc->j;
1410     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1411 
1412     /* copy over the B part */
1413     Aloc = (Mat_SeqAIJ*)aij->B->data;
1414     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1415     row  = mat->rmap->rstart;
1416     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1417     ct   = cols;
1418     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1419     for (i=0; i<m; i++) {
1420       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1421       row++;
1422       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1423     }
1424     ierr = PetscFree(ct);CHKERRQ(ierr);
1425     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1426     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1427     /*
1428        Everyone has to call to draw the matrix since the graphics waits are
1429        synchronized across all processors that share the PetscDraw object
1430     */
1431     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1432     if (!rank) {
1433       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1434     }
1435     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1436     ierr = MatDestroy(&A);CHKERRQ(ierr);
1437   }
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #undef __FUNCT__
1442 #define __FUNCT__ "MatView_MPIAIJ"
1443 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1444 {
1445   PetscErrorCode ierr;
1446   PetscBool      iascii,isdraw,issocket,isbinary;
1447 
1448   PetscFunctionBegin;
1449   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1450   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1451   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1452   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1453   if (iascii || isdraw || isbinary || issocket) {
1454     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1455   }
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatSOR_MPIAIJ"
1461 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1462 {
1463   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1464   PetscErrorCode ierr;
1465   Vec            bb1 = 0;
1466   PetscBool      hasop;
1467 
1468   PetscFunctionBegin;
1469   if (flag == SOR_APPLY_UPPER) {
1470     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1471     PetscFunctionReturn(0);
1472   }
1473 
1474   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1475     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1476   }
1477 
1478   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1479     if (flag & SOR_ZERO_INITIAL_GUESS) {
1480       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1481       its--;
1482     }
1483 
1484     while (its--) {
1485       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1486       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1487 
1488       /* update rhs: bb1 = bb - B*x */
1489       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1490       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1491 
1492       /* local sweep */
1493       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1494     }
1495   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1496     if (flag & SOR_ZERO_INITIAL_GUESS) {
1497       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1498       its--;
1499     }
1500     while (its--) {
1501       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1502       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503 
1504       /* update rhs: bb1 = bb - B*x */
1505       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1506       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1507 
1508       /* local sweep */
1509       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1510     }
1511   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1512     if (flag & SOR_ZERO_INITIAL_GUESS) {
1513       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1514       its--;
1515     }
1516     while (its--) {
1517       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1518       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1519 
1520       /* update rhs: bb1 = bb - B*x */
1521       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1522       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1523 
1524       /* local sweep */
1525       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1526     }
1527   } else if (flag & SOR_EISENSTAT) {
1528     Vec xx1;
1529 
1530     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1531     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1532 
1533     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1534     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1535     if (!mat->diag) {
1536       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1537       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1538     }
1539     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1540     if (hasop) {
1541       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1542     } else {
1543       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1544     }
1545     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1546 
1547     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1548 
1549     /* local sweep */
1550     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1551     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1552     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1553   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1554 
1555   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1556   PetscFunctionReturn(0);
1557 }
1558 
1559 #undef __FUNCT__
1560 #define __FUNCT__ "MatPermute_MPIAIJ"
1561 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1562 {
1563   Mat            aA,aB,Aperm;
1564   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1565   PetscScalar    *aa,*ba;
1566   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1567   PetscSF        rowsf,sf;
1568   IS             parcolp = NULL;
1569   PetscBool      done;
1570   PetscErrorCode ierr;
1571 
1572   PetscFunctionBegin;
1573   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1574   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1575   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1576   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1577 
1578   /* Invert row permutation to find out where my rows should go */
1579   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1580   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1581   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1582   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1583   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1584   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1585 
1586   /* Invert column permutation to find out where my columns should go */
1587   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1588   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1589   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1590   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1591   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1592   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1593   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1594 
1595   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1596   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1597   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1598 
1599   /* Find out where my gcols should go */
1600   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1601   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1602   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1603   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1604   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1605   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1606   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1607   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1608 
1609   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1610   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1611   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1612   for (i=0; i<m; i++) {
1613     PetscInt row = rdest[i],rowner;
1614     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1615     for (j=ai[i]; j<ai[i+1]; j++) {
1616       PetscInt cowner,col = cdest[aj[j]];
1617       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1618       if (rowner == cowner) dnnz[i]++;
1619       else onnz[i]++;
1620     }
1621     for (j=bi[i]; j<bi[i+1]; j++) {
1622       PetscInt cowner,col = gcdest[bj[j]];
1623       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1624       if (rowner == cowner) dnnz[i]++;
1625       else onnz[i]++;
1626     }
1627   }
1628   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1629   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1630   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1631   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1632   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1633 
1634   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1635   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1636   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1637   for (i=0; i<m; i++) {
1638     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1639     PetscInt j0,rowlen;
1640     rowlen = ai[i+1] - ai[i];
1641     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1642       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1643       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1644     }
1645     rowlen = bi[i+1] - bi[i];
1646     for (j0=j=0; j<rowlen; j0=j) {
1647       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1648       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1649     }
1650   }
1651   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1652   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1653   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1654   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1655   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1656   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1657   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1658   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1659   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1660   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1661   *B = Aperm;
1662   PetscFunctionReturn(0);
1663 }
1664 
1665 #undef __FUNCT__
1666 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1667 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1668 {
1669   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1670   Mat            A    = mat->A,B = mat->B;
1671   PetscErrorCode ierr;
1672   PetscReal      isend[5],irecv[5];
1673 
1674   PetscFunctionBegin;
1675   info->block_size = 1.0;
1676   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1677 
1678   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1679   isend[3] = info->memory;  isend[4] = info->mallocs;
1680 
1681   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1682 
1683   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1684   isend[3] += info->memory;  isend[4] += info->mallocs;
1685   if (flag == MAT_LOCAL) {
1686     info->nz_used      = isend[0];
1687     info->nz_allocated = isend[1];
1688     info->nz_unneeded  = isend[2];
1689     info->memory       = isend[3];
1690     info->mallocs      = isend[4];
1691   } else if (flag == MAT_GLOBAL_MAX) {
1692     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1693 
1694     info->nz_used      = irecv[0];
1695     info->nz_allocated = irecv[1];
1696     info->nz_unneeded  = irecv[2];
1697     info->memory       = irecv[3];
1698     info->mallocs      = irecv[4];
1699   } else if (flag == MAT_GLOBAL_SUM) {
1700     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1701 
1702     info->nz_used      = irecv[0];
1703     info->nz_allocated = irecv[1];
1704     info->nz_unneeded  = irecv[2];
1705     info->memory       = irecv[3];
1706     info->mallocs      = irecv[4];
1707   }
1708   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1709   info->fill_ratio_needed = 0;
1710   info->factor_mallocs    = 0;
1711   PetscFunctionReturn(0);
1712 }
1713 
1714 #undef __FUNCT__
1715 #define __FUNCT__ "MatSetOption_MPIAIJ"
1716 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1717 {
1718   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1719   PetscErrorCode ierr;
1720 
1721   PetscFunctionBegin;
1722   switch (op) {
1723   case MAT_NEW_NONZERO_LOCATIONS:
1724   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1725   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1726   case MAT_KEEP_NONZERO_PATTERN:
1727   case MAT_NEW_NONZERO_LOCATION_ERR:
1728   case MAT_USE_INODES:
1729   case MAT_IGNORE_ZERO_ENTRIES:
1730     MatCheckPreallocated(A,1);
1731     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1732     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1733     break;
1734   case MAT_ROW_ORIENTED:
1735     a->roworiented = flg;
1736 
1737     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1738     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1739     break;
1740   case MAT_NEW_DIAGONALS:
1741     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1742     break;
1743   case MAT_IGNORE_OFF_PROC_ENTRIES:
1744     a->donotstash = flg;
1745     break;
1746   case MAT_SPD:
1747     A->spd_set = PETSC_TRUE;
1748     A->spd     = flg;
1749     if (flg) {
1750       A->symmetric                  = PETSC_TRUE;
1751       A->structurally_symmetric     = PETSC_TRUE;
1752       A->symmetric_set              = PETSC_TRUE;
1753       A->structurally_symmetric_set = PETSC_TRUE;
1754     }
1755     break;
1756   case MAT_SYMMETRIC:
1757     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1758     break;
1759   case MAT_STRUCTURALLY_SYMMETRIC:
1760     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1761     break;
1762   case MAT_HERMITIAN:
1763     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1764     break;
1765   case MAT_SYMMETRY_ETERNAL:
1766     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1767     break;
1768   default:
1769     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1770   }
1771   PetscFunctionReturn(0);
1772 }
1773 
1774 #undef __FUNCT__
1775 #define __FUNCT__ "MatGetRow_MPIAIJ"
1776 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1777 {
1778   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1779   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1780   PetscErrorCode ierr;
1781   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1782   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1783   PetscInt       *cmap,*idx_p;
1784 
1785   PetscFunctionBegin;
1786   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1787   mat->getrowactive = PETSC_TRUE;
1788 
1789   if (!mat->rowvalues && (idx || v)) {
1790     /*
1791         allocate enough space to hold information from the longest row.
1792     */
1793     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1794     PetscInt   max = 1,tmp;
1795     for (i=0; i<matin->rmap->n; i++) {
1796       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1797       if (max < tmp) max = tmp;
1798     }
1799     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1800   }
1801 
1802   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1803   lrow = row - rstart;
1804 
1805   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1806   if (!v)   {pvA = 0; pvB = 0;}
1807   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1808   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1809   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1810   nztot = nzA + nzB;
1811 
1812   cmap = mat->garray;
1813   if (v  || idx) {
1814     if (nztot) {
1815       /* Sort by increasing column numbers, assuming A and B already sorted */
1816       PetscInt imark = -1;
1817       if (v) {
1818         *v = v_p = mat->rowvalues;
1819         for (i=0; i<nzB; i++) {
1820           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1821           else break;
1822         }
1823         imark = i;
1824         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1825         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1826       }
1827       if (idx) {
1828         *idx = idx_p = mat->rowindices;
1829         if (imark > -1) {
1830           for (i=0; i<imark; i++) {
1831             idx_p[i] = cmap[cworkB[i]];
1832           }
1833         } else {
1834           for (i=0; i<nzB; i++) {
1835             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1836             else break;
1837           }
1838           imark = i;
1839         }
1840         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1841         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1842       }
1843     } else {
1844       if (idx) *idx = 0;
1845       if (v)   *v   = 0;
1846     }
1847   }
1848   *nz  = nztot;
1849   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1850   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1851   PetscFunctionReturn(0);
1852 }
1853 
1854 #undef __FUNCT__
1855 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1856 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1857 {
1858   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1859 
1860   PetscFunctionBegin;
1861   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1862   aij->getrowactive = PETSC_FALSE;
1863   PetscFunctionReturn(0);
1864 }
1865 
1866 #undef __FUNCT__
1867 #define __FUNCT__ "MatNorm_MPIAIJ"
1868 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1869 {
1870   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1871   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1872   PetscErrorCode ierr;
1873   PetscInt       i,j,cstart = mat->cmap->rstart;
1874   PetscReal      sum = 0.0;
1875   MatScalar      *v;
1876 
1877   PetscFunctionBegin;
1878   if (aij->size == 1) {
1879     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1880   } else {
1881     if (type == NORM_FROBENIUS) {
1882       v = amat->a;
1883       for (i=0; i<amat->nz; i++) {
1884         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1885       }
1886       v = bmat->a;
1887       for (i=0; i<bmat->nz; i++) {
1888         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1889       }
1890       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1891       *norm = PetscSqrtReal(*norm);
1892     } else if (type == NORM_1) { /* max column norm */
1893       PetscReal *tmp,*tmp2;
1894       PetscInt  *jj,*garray = aij->garray;
1895       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1896       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1897       *norm = 0.0;
1898       v     = amat->a; jj = amat->j;
1899       for (j=0; j<amat->nz; j++) {
1900         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1901       }
1902       v = bmat->a; jj = bmat->j;
1903       for (j=0; j<bmat->nz; j++) {
1904         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1905       }
1906       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1907       for (j=0; j<mat->cmap->N; j++) {
1908         if (tmp2[j] > *norm) *norm = tmp2[j];
1909       }
1910       ierr = PetscFree(tmp);CHKERRQ(ierr);
1911       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1912     } else if (type == NORM_INFINITY) { /* max row norm */
1913       PetscReal ntemp = 0.0;
1914       for (j=0; j<aij->A->rmap->n; j++) {
1915         v   = amat->a + amat->i[j];
1916         sum = 0.0;
1917         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1918           sum += PetscAbsScalar(*v); v++;
1919         }
1920         v = bmat->a + bmat->i[j];
1921         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1922           sum += PetscAbsScalar(*v); v++;
1923         }
1924         if (sum > ntemp) ntemp = sum;
1925       }
1926       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1927     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1928   }
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 #undef __FUNCT__
1933 #define __FUNCT__ "MatTranspose_MPIAIJ"
1934 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1935 {
1936   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1937   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1938   PetscErrorCode ierr;
1939   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1940   PetscInt       cstart = A->cmap->rstart,ncol;
1941   Mat            B;
1942   MatScalar      *array;
1943 
1944   PetscFunctionBegin;
1945   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1946 
1947   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1948   ai = Aloc->i; aj = Aloc->j;
1949   bi = Bloc->i; bj = Bloc->j;
1950   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1951     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1952     PetscSFNode          *oloc;
1953     PETSC_UNUSED PetscSF sf;
1954 
1955     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1956     /* compute d_nnz for preallocation */
1957     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1958     for (i=0; i<ai[ma]; i++) {
1959       d_nnz[aj[i]]++;
1960       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1961     }
1962     /* compute local off-diagonal contributions */
1963     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1964     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1965     /* map those to global */
1966     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1967     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1968     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1969     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1970     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1971     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1972     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1973 
1974     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1975     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1976     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1977     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1978     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1979     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1980   } else {
1981     B    = *matout;
1982     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1983     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1984   }
1985 
1986   /* copy over the A part */
1987   array = Aloc->a;
1988   row   = A->rmap->rstart;
1989   for (i=0; i<ma; i++) {
1990     ncol = ai[i+1]-ai[i];
1991     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1992     row++;
1993     array += ncol; aj += ncol;
1994   }
1995   aj = Aloc->j;
1996   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1997 
1998   /* copy over the B part */
1999   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2000   array = Bloc->a;
2001   row   = A->rmap->rstart;
2002   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2003   cols_tmp = cols;
2004   for (i=0; i<mb; i++) {
2005     ncol = bi[i+1]-bi[i];
2006     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2007     row++;
2008     array += ncol; cols_tmp += ncol;
2009   }
2010   ierr = PetscFree(cols);CHKERRQ(ierr);
2011 
2012   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2013   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2014   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2015     *matout = B;
2016   } else {
2017     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2018   }
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 #undef __FUNCT__
2023 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2024 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2025 {
2026   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2027   Mat            a    = aij->A,b = aij->B;
2028   PetscErrorCode ierr;
2029   PetscInt       s1,s2,s3;
2030 
2031   PetscFunctionBegin;
2032   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2033   if (rr) {
2034     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2035     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2036     /* Overlap communication with computation. */
2037     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2038   }
2039   if (ll) {
2040     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2041     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2042     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2043   }
2044   /* scale  the diagonal block */
2045   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2046 
2047   if (rr) {
2048     /* Do a scatter end and then right scale the off-diagonal block */
2049     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2050     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2051   }
2052   PetscFunctionReturn(0);
2053 }
2054 
2055 #undef __FUNCT__
2056 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2057 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2058 {
2059   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2060   PetscErrorCode ierr;
2061 
2062   PetscFunctionBegin;
2063   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2064   PetscFunctionReturn(0);
2065 }
2066 
2067 #undef __FUNCT__
2068 #define __FUNCT__ "MatEqual_MPIAIJ"
2069 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2070 {
2071   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2072   Mat            a,b,c,d;
2073   PetscBool      flg;
2074   PetscErrorCode ierr;
2075 
2076   PetscFunctionBegin;
2077   a = matA->A; b = matA->B;
2078   c = matB->A; d = matB->B;
2079 
2080   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2081   if (flg) {
2082     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2083   }
2084   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2085   PetscFunctionReturn(0);
2086 }
2087 
2088 #undef __FUNCT__
2089 #define __FUNCT__ "MatCopy_MPIAIJ"
2090 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2091 {
2092   PetscErrorCode ierr;
2093   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2094   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2095 
2096   PetscFunctionBegin;
2097   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2098   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2099     /* because of the column compression in the off-processor part of the matrix a->B,
2100        the number of columns in a->B and b->B may be different, hence we cannot call
2101        the MatCopy() directly on the two parts. If need be, we can provide a more
2102        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2103        then copying the submatrices */
2104     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2105   } else {
2106     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2107     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2108   }
2109   PetscFunctionReturn(0);
2110 }
2111 
2112 #undef __FUNCT__
2113 #define __FUNCT__ "MatSetUp_MPIAIJ"
2114 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2115 {
2116   PetscErrorCode ierr;
2117 
2118   PetscFunctionBegin;
2119   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2120   PetscFunctionReturn(0);
2121 }
2122 
2123 /*
2124    Computes the number of nonzeros per row needed for preallocation when X and Y
2125    have different nonzero structure.
2126 */
2127 #undef __FUNCT__
2128 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2129 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2130 {
2131   PetscInt       i,j,k,nzx,nzy;
2132 
2133   PetscFunctionBegin;
2134   /* Set the number of nonzeros in the new matrix */
2135   for (i=0; i<m; i++) {
2136     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2137     nzx = xi[i+1] - xi[i];
2138     nzy = yi[i+1] - yi[i];
2139     nnz[i] = 0;
2140     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2141       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2142       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2143       nnz[i]++;
2144     }
2145     for (; k<nzy; k++) nnz[i]++;
2146   }
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2151 #undef __FUNCT__
2152 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2153 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2154 {
2155   PetscErrorCode ierr;
2156   PetscInt       m = Y->rmap->N;
2157   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2158   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2159 
2160   PetscFunctionBegin;
2161   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2162   PetscFunctionReturn(0);
2163 }
2164 
2165 #undef __FUNCT__
2166 #define __FUNCT__ "MatAXPY_MPIAIJ"
2167 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2168 {
2169   PetscErrorCode ierr;
2170   PetscInt       i;
2171   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2172   PetscBLASInt   bnz,one=1;
2173   Mat_SeqAIJ     *x,*y;
2174 
2175   PetscFunctionBegin;
2176   if (str == SAME_NONZERO_PATTERN) {
2177     PetscScalar alpha = a;
2178     x    = (Mat_SeqAIJ*)xx->A->data;
2179     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2180     y    = (Mat_SeqAIJ*)yy->A->data;
2181     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2182     x    = (Mat_SeqAIJ*)xx->B->data;
2183     y    = (Mat_SeqAIJ*)yy->B->data;
2184     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2185     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2186     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2187   } else if (str == SUBSET_NONZERO_PATTERN) {
2188     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2189 
2190     x = (Mat_SeqAIJ*)xx->B->data;
2191     y = (Mat_SeqAIJ*)yy->B->data;
2192     if (y->xtoy && y->XtoY != xx->B) {
2193       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2194       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2195     }
2196     if (!y->xtoy) { /* get xtoy */
2197       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2198       y->XtoY = xx->B;
2199       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2200     }
2201     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2202     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2203   } else {
2204     Mat      B;
2205     PetscInt *nnz_d,*nnz_o;
2206     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2207     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2208     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2209     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2210     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2211     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2212     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2213     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2214     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2215     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2216     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2217     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2218     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2219     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2220   }
2221   PetscFunctionReturn(0);
2222 }
2223 
2224 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2225 
2226 #undef __FUNCT__
2227 #define __FUNCT__ "MatConjugate_MPIAIJ"
2228 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2229 {
2230 #if defined(PETSC_USE_COMPLEX)
2231   PetscErrorCode ierr;
2232   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2233 
2234   PetscFunctionBegin;
2235   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2236   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2237 #else
2238   PetscFunctionBegin;
2239 #endif
2240   PetscFunctionReturn(0);
2241 }
2242 
2243 #undef __FUNCT__
2244 #define __FUNCT__ "MatRealPart_MPIAIJ"
2245 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2246 {
2247   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2248   PetscErrorCode ierr;
2249 
2250   PetscFunctionBegin;
2251   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2252   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2253   PetscFunctionReturn(0);
2254 }
2255 
2256 #undef __FUNCT__
2257 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2258 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2259 {
2260   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2265   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2266   PetscFunctionReturn(0);
2267 }
2268 
2269 #if defined(PETSC_HAVE_PBGL)
2270 
2271 #include <boost/parallel/mpi/bsp_process_group.hpp>
2272 #include <boost/graph/distributed/ilu_default_graph.hpp>
2273 #include <boost/graph/distributed/ilu_0_block.hpp>
2274 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2275 #include <boost/graph/distributed/petsc/interface.hpp>
2276 #include <boost/multi_array.hpp>
2277 #include <boost/parallel/distributed_property_map->hpp>
2278 
2279 #undef __FUNCT__
2280 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2281 /*
2282   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2283 */
2284 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2285 {
2286   namespace petsc = boost::distributed::petsc;
2287 
2288   namespace graph_dist = boost::graph::distributed;
2289   using boost::graph::distributed::ilu_default::process_group_type;
2290   using boost::graph::ilu_permuted;
2291 
2292   PetscBool      row_identity, col_identity;
2293   PetscContainer c;
2294   PetscInt       m, n, M, N;
2295   PetscErrorCode ierr;
2296 
2297   PetscFunctionBegin;
2298   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2299   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2300   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2301   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2302 
2303   process_group_type pg;
2304   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2305   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2306   lgraph_type& level_graph = *lgraph_p;
2307   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2308 
2309   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2310   ilu_permuted(level_graph);
2311 
2312   /* put together the new matrix */
2313   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2314   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2315   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2316   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2317   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2318   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2319   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2320   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2321 
2322   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2323   ierr = PetscContainerSetPointer(c, lgraph_p);
2324   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2325   ierr = PetscContainerDestroy(&c);
2326   PetscFunctionReturn(0);
2327 }
2328 
2329 #undef __FUNCT__
2330 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2331 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2332 {
2333   PetscFunctionBegin;
2334   PetscFunctionReturn(0);
2335 }
2336 
2337 #undef __FUNCT__
2338 #define __FUNCT__ "MatSolve_MPIAIJ"
2339 /*
2340   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2341 */
2342 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2343 {
2344   namespace graph_dist = boost::graph::distributed;
2345 
2346   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2347   lgraph_type    *lgraph_p;
2348   PetscContainer c;
2349   PetscErrorCode ierr;
2350 
2351   PetscFunctionBegin;
2352   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2353   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2354   ierr = VecCopy(b, x);CHKERRQ(ierr);
2355 
2356   PetscScalar *array_x;
2357   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2358   PetscInt sx;
2359   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2360 
2361   PetscScalar *array_b;
2362   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2363   PetscInt sb;
2364   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2365 
2366   lgraph_type& level_graph = *lgraph_p;
2367   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2368 
2369   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2370   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2371   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2372 
2373   typedef boost::iterator_property_map<array_ref_type::iterator,
2374                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2375   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2376   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2377 
2378   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2379   PetscFunctionReturn(0);
2380 }
2381 #endif
2382 
2383 
2384 #undef __FUNCT__
2385 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2386 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2387 {
2388   PetscMPIInt    rank,size;
2389   MPI_Comm       comm;
2390   PetscErrorCode ierr;
2391   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2392   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2393   PetscInt       *rowrange = mat->rmap->range;
2394   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2395   Mat            A = aij->A,B=aij->B,C=*matredundant;
2396   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2397   PetscScalar    *sbuf_a;
2398   PetscInt       nzlocal=a->nz+b->nz;
2399   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2400   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2401   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2402   MatScalar      *aworkA,*aworkB;
2403   PetscScalar    *vals;
2404   PetscMPIInt    tag1,tag2,tag3,imdex;
2405   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2406   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2407   MPI_Status     recv_status,*send_status;
2408   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2409   PetscInt       **rbuf_j=NULL;
2410   PetscScalar    **rbuf_a=NULL;
2411   Mat_Redundant  *redund =NULL;
2412 
2413   PetscFunctionBegin;
2414   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2415   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2416   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2417   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2418   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2419 
2420   if (reuse == MAT_REUSE_MATRIX) {
2421     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2422     if (subsize == 1) {
2423       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2424       redund = c->redundant;
2425     } else {
2426       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2427       redund = c->redundant;
2428     }
2429     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2430 
2431     nsends    = redund->nsends;
2432     nrecvs    = redund->nrecvs;
2433     send_rank = redund->send_rank;
2434     recv_rank = redund->recv_rank;
2435     sbuf_nz   = redund->sbuf_nz;
2436     rbuf_nz   = redund->rbuf_nz;
2437     sbuf_j    = redund->sbuf_j;
2438     sbuf_a    = redund->sbuf_a;
2439     rbuf_j    = redund->rbuf_j;
2440     rbuf_a    = redund->rbuf_a;
2441   }
2442 
2443   if (reuse == MAT_INITIAL_MATRIX) {
2444     PetscInt    nleftover,np_subcomm;
2445 
2446     /* get the destination processors' id send_rank, nsends and nrecvs */
2447     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2448 
2449     np_subcomm = size/nsubcomm;
2450     nleftover  = size - nsubcomm*np_subcomm;
2451 
2452     /* block of codes below is specific for INTERLACED */
2453     /* ------------------------------------------------*/
2454     nsends = 0; nrecvs = 0;
2455     for (i=0; i<size; i++) {
2456       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2457         send_rank[nsends++] = i;
2458         recv_rank[nrecvs++] = i;
2459       }
2460     }
2461     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2462       i = size-nleftover-1;
2463       j = 0;
2464       while (j < nsubcomm - nleftover) {
2465         send_rank[nsends++] = i;
2466         i--; j++;
2467       }
2468     }
2469 
2470     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2471       for (i=0; i<nleftover; i++) {
2472         recv_rank[nrecvs++] = size-nleftover+i;
2473       }
2474     }
2475     /*----------------------------------------------*/
2476 
2477     /* allocate sbuf_j, sbuf_a */
2478     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2479     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2480     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2481     /*
2482     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2483     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2484      */
2485   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2486 
2487   /* copy mat's local entries into the buffers */
2488   if (reuse == MAT_INITIAL_MATRIX) {
2489     rownz_max = 0;
2490     rptr      = sbuf_j;
2491     cols      = sbuf_j + rend-rstart + 1;
2492     vals      = sbuf_a;
2493     rptr[0]   = 0;
2494     for (i=0; i<rend-rstart; i++) {
2495       row    = i + rstart;
2496       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2497       ncols  = nzA + nzB;
2498       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2499       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2500       /* load the column indices for this row into cols */
2501       lwrite = 0;
2502       for (l=0; l<nzB; l++) {
2503         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2504           vals[lwrite]   = aworkB[l];
2505           cols[lwrite++] = ctmp;
2506         }
2507       }
2508       for (l=0; l<nzA; l++) {
2509         vals[lwrite]   = aworkA[l];
2510         cols[lwrite++] = cstart + cworkA[l];
2511       }
2512       for (l=0; l<nzB; l++) {
2513         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2514           vals[lwrite]   = aworkB[l];
2515           cols[lwrite++] = ctmp;
2516         }
2517       }
2518       vals     += ncols;
2519       cols     += ncols;
2520       rptr[i+1] = rptr[i] + ncols;
2521       if (rownz_max < ncols) rownz_max = ncols;
2522     }
2523     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2524   } else { /* only copy matrix values into sbuf_a */
2525     rptr    = sbuf_j;
2526     vals    = sbuf_a;
2527     rptr[0] = 0;
2528     for (i=0; i<rend-rstart; i++) {
2529       row    = i + rstart;
2530       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2531       ncols  = nzA + nzB;
2532       cworkB = b->j + b->i[i];
2533       aworkA = a->a + a->i[i];
2534       aworkB = b->a + b->i[i];
2535       lwrite = 0;
2536       for (l=0; l<nzB; l++) {
2537         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2538       }
2539       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2540       for (l=0; l<nzB; l++) {
2541         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2542       }
2543       vals     += ncols;
2544       rptr[i+1] = rptr[i] + ncols;
2545     }
2546   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2547 
2548   /* send nzlocal to others, and recv other's nzlocal */
2549   /*--------------------------------------------------*/
2550   if (reuse == MAT_INITIAL_MATRIX) {
2551     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2552 
2553     s_waits2 = s_waits3 + nsends;
2554     s_waits1 = s_waits2 + nsends;
2555     r_waits1 = s_waits1 + nsends;
2556     r_waits2 = r_waits1 + nrecvs;
2557     r_waits3 = r_waits2 + nrecvs;
2558   } else {
2559     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2560 
2561     r_waits3 = s_waits3 + nsends;
2562   }
2563 
2564   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2565   if (reuse == MAT_INITIAL_MATRIX) {
2566     /* get new tags to keep the communication clean */
2567     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2568     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2569     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2570 
2571     /* post receives of other's nzlocal */
2572     for (i=0; i<nrecvs; i++) {
2573       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2574     }
2575     /* send nzlocal to others */
2576     for (i=0; i<nsends; i++) {
2577       sbuf_nz[i] = nzlocal;
2578       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2579     }
2580     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2581     count = nrecvs;
2582     while (count) {
2583       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2584 
2585       recv_rank[imdex] = recv_status.MPI_SOURCE;
2586       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2587       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2588 
2589       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2590 
2591       rbuf_nz[imdex] += i + 2;
2592 
2593       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2594       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2595       count--;
2596     }
2597     /* wait on sends of nzlocal */
2598     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2599     /* send mat->i,j to others, and recv from other's */
2600     /*------------------------------------------------*/
2601     for (i=0; i<nsends; i++) {
2602       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2603       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2604     }
2605     /* wait on receives of mat->i,j */
2606     /*------------------------------*/
2607     count = nrecvs;
2608     while (count) {
2609       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2610       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2611       count--;
2612     }
2613     /* wait on sends of mat->i,j */
2614     /*---------------------------*/
2615     if (nsends) {
2616       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2617     }
2618   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2619 
2620   /* post receives, send and receive mat->a */
2621   /*----------------------------------------*/
2622   for (imdex=0; imdex<nrecvs; imdex++) {
2623     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2624   }
2625   for (i=0; i<nsends; i++) {
2626     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2627   }
2628   count = nrecvs;
2629   while (count) {
2630     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2631     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2632     count--;
2633   }
2634   if (nsends) {
2635     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2636   }
2637 
2638   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2639 
2640   /* create redundant matrix */
2641   /*-------------------------*/
2642   if (reuse == MAT_INITIAL_MATRIX) {
2643     const PetscInt *range;
2644     PetscInt       rstart_sub,rend_sub,mloc_sub;
2645 
2646     /* compute rownz_max for preallocation */
2647     for (imdex=0; imdex<nrecvs; imdex++) {
2648       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2649       rptr = rbuf_j[imdex];
2650       for (i=0; i<j; i++) {
2651         ncols = rptr[i+1] - rptr[i];
2652         if (rownz_max < ncols) rownz_max = ncols;
2653       }
2654     }
2655 
2656     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2657 
2658     /* get local size of redundant matrix
2659        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2660     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2661     rstart_sub = range[nsubcomm*subrank];
2662     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2663       rend_sub = range[nsubcomm*(subrank+1)];
2664     } else {
2665       rend_sub = mat->rmap->N;
2666     }
2667     mloc_sub = rend_sub - rstart_sub;
2668 
2669     if (M == N) {
2670       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2671     } else { /* non-square matrix */
2672       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2673     }
2674     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2675     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2676     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2677     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2678   } else {
2679     C = *matredundant;
2680   }
2681 
2682   /* insert local matrix entries */
2683   rptr = sbuf_j;
2684   cols = sbuf_j + rend-rstart + 1;
2685   vals = sbuf_a;
2686   for (i=0; i<rend-rstart; i++) {
2687     row   = i + rstart;
2688     ncols = rptr[i+1] - rptr[i];
2689     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2690     vals += ncols;
2691     cols += ncols;
2692   }
2693   /* insert received matrix entries */
2694   for (imdex=0; imdex<nrecvs; imdex++) {
2695     rstart = rowrange[recv_rank[imdex]];
2696     rend   = rowrange[recv_rank[imdex]+1];
2697     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2698     rptr   = rbuf_j[imdex];
2699     cols   = rbuf_j[imdex] + rend-rstart + 1;
2700     vals   = rbuf_a[imdex];
2701     for (i=0; i<rend-rstart; i++) {
2702       row   = i + rstart;
2703       ncols = rptr[i+1] - rptr[i];
2704       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2705       vals += ncols;
2706       cols += ncols;
2707     }
2708   }
2709   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2710   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2711 
2712   if (reuse == MAT_INITIAL_MATRIX) {
2713     *matredundant = C;
2714 
2715     /* create a supporting struct and attach it to C for reuse */
2716     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2717     if (subsize == 1) {
2718       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2719       c->redundant = redund;
2720     } else {
2721       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2722       c->redundant = redund;
2723     }
2724 
2725     redund->nzlocal   = nzlocal;
2726     redund->nsends    = nsends;
2727     redund->nrecvs    = nrecvs;
2728     redund->send_rank = send_rank;
2729     redund->recv_rank = recv_rank;
2730     redund->sbuf_nz   = sbuf_nz;
2731     redund->rbuf_nz   = rbuf_nz;
2732     redund->sbuf_j    = sbuf_j;
2733     redund->sbuf_a    = sbuf_a;
2734     redund->rbuf_j    = rbuf_j;
2735     redund->rbuf_a    = rbuf_a;
2736     redund->psubcomm  = NULL;
2737   }
2738   PetscFunctionReturn(0);
2739 }
2740 
2741 #undef __FUNCT__
2742 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2743 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2744 {
2745   PetscErrorCode ierr;
2746   MPI_Comm       comm;
2747   PetscMPIInt    size,subsize;
2748   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2749   Mat_Redundant  *redund=NULL;
2750   PetscSubcomm   psubcomm=NULL;
2751   MPI_Comm       subcomm_in=subcomm;
2752   Mat            *matseq;
2753   IS             isrow,iscol;
2754 
2755   PetscFunctionBegin;
2756   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2757     if (reuse ==  MAT_INITIAL_MATRIX) {
2758       /* create psubcomm, then get subcomm */
2759       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2760       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2761       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2762 
2763       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2764       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2765       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2766       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2767       subcomm = psubcomm->comm;
2768     } else { /* retrieve psubcomm and subcomm */
2769       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2770       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2771       if (subsize == 1) {
2772         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2773         redund = c->redundant;
2774       } else {
2775         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2776         redund = c->redundant;
2777       }
2778       psubcomm = redund->psubcomm;
2779     }
2780     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2781       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2782       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */
2783         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2784         if (subsize == 1) {
2785           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2786           c->redundant->psubcomm = psubcomm;
2787         } else {
2788           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2789           c->redundant->psubcomm = psubcomm ;
2790         }
2791       }
2792       PetscFunctionReturn(0);
2793     }
2794   }
2795 
2796   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2797   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2798   if (reuse == MAT_INITIAL_MATRIX) {
2799     /* create a local sequential matrix matseq[0] */
2800     mloc_sub = PETSC_DECIDE;
2801     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2802     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2803     rstart = rend - mloc_sub;
2804     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2805     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2806   } else { /* reuse == MAT_REUSE_MATRIX */
2807     if (subsize == 1) {
2808       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2809       redund = c->redundant;
2810     } else {
2811       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2812       redund = c->redundant;
2813     }
2814 
2815     isrow  = redund->isrow;
2816     iscol  = redund->iscol;
2817     matseq = redund->matseq;
2818   }
2819   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2820   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2821 
2822   if (reuse == MAT_INITIAL_MATRIX) {
2823     /* create a supporting struct and attach it to C for reuse */
2824     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2825     if (subsize == 1) {
2826       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2827       c->redundant = redund;
2828     } else {
2829       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2830       c->redundant = redund;
2831     }
2832     redund->isrow    = isrow;
2833     redund->iscol    = iscol;
2834     redund->matseq   = matseq;
2835     redund->psubcomm = psubcomm;
2836   }
2837   PetscFunctionReturn(0);
2838 }
2839 
2840 #undef __FUNCT__
2841 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2842 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2843 {
2844   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2845   PetscErrorCode ierr;
2846   PetscInt       i,*idxb = 0;
2847   PetscScalar    *va,*vb;
2848   Vec            vtmp;
2849 
2850   PetscFunctionBegin;
2851   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2852   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2853   if (idx) {
2854     for (i=0; i<A->rmap->n; i++) {
2855       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2856     }
2857   }
2858 
2859   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2860   if (idx) {
2861     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2862   }
2863   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2864   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2865 
2866   for (i=0; i<A->rmap->n; i++) {
2867     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2868       va[i] = vb[i];
2869       if (idx) idx[i] = a->garray[idxb[i]];
2870     }
2871   }
2872 
2873   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2874   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2875   ierr = PetscFree(idxb);CHKERRQ(ierr);
2876   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2877   PetscFunctionReturn(0);
2878 }
2879 
2880 #undef __FUNCT__
2881 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2882 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2883 {
2884   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2885   PetscErrorCode ierr;
2886   PetscInt       i,*idxb = 0;
2887   PetscScalar    *va,*vb;
2888   Vec            vtmp;
2889 
2890   PetscFunctionBegin;
2891   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2892   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2893   if (idx) {
2894     for (i=0; i<A->cmap->n; i++) {
2895       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2896     }
2897   }
2898 
2899   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2900   if (idx) {
2901     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2902   }
2903   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2904   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2905 
2906   for (i=0; i<A->rmap->n; i++) {
2907     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2908       va[i] = vb[i];
2909       if (idx) idx[i] = a->garray[idxb[i]];
2910     }
2911   }
2912 
2913   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2914   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2915   ierr = PetscFree(idxb);CHKERRQ(ierr);
2916   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2917   PetscFunctionReturn(0);
2918 }
2919 
2920 #undef __FUNCT__
2921 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2922 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2923 {
2924   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2925   PetscInt       n      = A->rmap->n;
2926   PetscInt       cstart = A->cmap->rstart;
2927   PetscInt       *cmap  = mat->garray;
2928   PetscInt       *diagIdx, *offdiagIdx;
2929   Vec            diagV, offdiagV;
2930   PetscScalar    *a, *diagA, *offdiagA;
2931   PetscInt       r;
2932   PetscErrorCode ierr;
2933 
2934   PetscFunctionBegin;
2935   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2936   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2937   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2938   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2939   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2940   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2941   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2942   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2943   for (r = 0; r < n; ++r) {
2944     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2945       a[r]   = diagA[r];
2946       idx[r] = cstart + diagIdx[r];
2947     } else {
2948       a[r]   = offdiagA[r];
2949       idx[r] = cmap[offdiagIdx[r]];
2950     }
2951   }
2952   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2953   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2954   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2955   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2956   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2957   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2958   PetscFunctionReturn(0);
2959 }
2960 
2961 #undef __FUNCT__
2962 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2963 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2964 {
2965   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2966   PetscInt       n      = A->rmap->n;
2967   PetscInt       cstart = A->cmap->rstart;
2968   PetscInt       *cmap  = mat->garray;
2969   PetscInt       *diagIdx, *offdiagIdx;
2970   Vec            diagV, offdiagV;
2971   PetscScalar    *a, *diagA, *offdiagA;
2972   PetscInt       r;
2973   PetscErrorCode ierr;
2974 
2975   PetscFunctionBegin;
2976   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2977   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2978   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2979   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2980   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2981   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2982   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2983   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2984   for (r = 0; r < n; ++r) {
2985     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2986       a[r]   = diagA[r];
2987       idx[r] = cstart + diagIdx[r];
2988     } else {
2989       a[r]   = offdiagA[r];
2990       idx[r] = cmap[offdiagIdx[r]];
2991     }
2992   }
2993   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2994   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2995   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2996   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2997   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2998   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2999   PetscFunctionReturn(0);
3000 }
3001 
3002 #undef __FUNCT__
3003 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3004 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3005 {
3006   PetscErrorCode ierr;
3007   Mat            *dummy;
3008 
3009   PetscFunctionBegin;
3010   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3011   *newmat = *dummy;
3012   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3013   PetscFunctionReturn(0);
3014 }
3015 
3016 #undef __FUNCT__
3017 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3018 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3019 {
3020   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3021   PetscErrorCode ierr;
3022 
3023   PetscFunctionBegin;
3024   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3025   PetscFunctionReturn(0);
3026 }
3027 
3028 #undef __FUNCT__
3029 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3030 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3031 {
3032   PetscErrorCode ierr;
3033   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3034 
3035   PetscFunctionBegin;
3036   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3037   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3038   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3039   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3040   PetscFunctionReturn(0);
3041 }
3042 
3043 /* -------------------------------------------------------------------*/
3044 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3045                                        MatGetRow_MPIAIJ,
3046                                        MatRestoreRow_MPIAIJ,
3047                                        MatMult_MPIAIJ,
3048                                 /* 4*/ MatMultAdd_MPIAIJ,
3049                                        MatMultTranspose_MPIAIJ,
3050                                        MatMultTransposeAdd_MPIAIJ,
3051 #if defined(PETSC_HAVE_PBGL)
3052                                        MatSolve_MPIAIJ,
3053 #else
3054                                        0,
3055 #endif
3056                                        0,
3057                                        0,
3058                                 /*10*/ 0,
3059                                        0,
3060                                        0,
3061                                        MatSOR_MPIAIJ,
3062                                        MatTranspose_MPIAIJ,
3063                                 /*15*/ MatGetInfo_MPIAIJ,
3064                                        MatEqual_MPIAIJ,
3065                                        MatGetDiagonal_MPIAIJ,
3066                                        MatDiagonalScale_MPIAIJ,
3067                                        MatNorm_MPIAIJ,
3068                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3069                                        MatAssemblyEnd_MPIAIJ,
3070                                        MatSetOption_MPIAIJ,
3071                                        MatZeroEntries_MPIAIJ,
3072                                 /*24*/ MatZeroRows_MPIAIJ,
3073                                        0,
3074 #if defined(PETSC_HAVE_PBGL)
3075                                        0,
3076 #else
3077                                        0,
3078 #endif
3079                                        0,
3080                                        0,
3081                                 /*29*/ MatSetUp_MPIAIJ,
3082 #if defined(PETSC_HAVE_PBGL)
3083                                        0,
3084 #else
3085                                        0,
3086 #endif
3087                                        0,
3088                                        0,
3089                                        0,
3090                                 /*34*/ MatDuplicate_MPIAIJ,
3091                                        0,
3092                                        0,
3093                                        0,
3094                                        0,
3095                                 /*39*/ MatAXPY_MPIAIJ,
3096                                        MatGetSubMatrices_MPIAIJ,
3097                                        MatIncreaseOverlap_MPIAIJ,
3098                                        MatGetValues_MPIAIJ,
3099                                        MatCopy_MPIAIJ,
3100                                 /*44*/ MatGetRowMax_MPIAIJ,
3101                                        MatScale_MPIAIJ,
3102                                        0,
3103                                        0,
3104                                        MatZeroRowsColumns_MPIAIJ,
3105                                 /*49*/ MatSetRandom_MPIAIJ,
3106                                        0,
3107                                        0,
3108                                        0,
3109                                        0,
3110                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3111                                        0,
3112                                        MatSetUnfactored_MPIAIJ,
3113                                        MatPermute_MPIAIJ,
3114                                        0,
3115                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3116                                        MatDestroy_MPIAIJ,
3117                                        MatView_MPIAIJ,
3118                                        0,
3119                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3120                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3121                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3122                                        0,
3123                                        0,
3124                                        0,
3125                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3126                                        MatGetRowMinAbs_MPIAIJ,
3127                                        0,
3128                                        MatSetColoring_MPIAIJ,
3129                                        0,
3130                                        MatSetValuesAdifor_MPIAIJ,
3131                                 /*75*/ MatFDColoringApply_AIJ,
3132                                        0,
3133                                        0,
3134                                        0,
3135                                        MatFindZeroDiagonals_MPIAIJ,
3136                                 /*80*/ 0,
3137                                        0,
3138                                        0,
3139                                 /*83*/ MatLoad_MPIAIJ,
3140                                        0,
3141                                        0,
3142                                        0,
3143                                        0,
3144                                        0,
3145                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3146                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3147                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3148                                        MatPtAP_MPIAIJ_MPIAIJ,
3149                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3150                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3151                                        0,
3152                                        0,
3153                                        0,
3154                                        0,
3155                                 /*99*/ 0,
3156                                        0,
3157                                        0,
3158                                        MatConjugate_MPIAIJ,
3159                                        0,
3160                                 /*104*/MatSetValuesRow_MPIAIJ,
3161                                        MatRealPart_MPIAIJ,
3162                                        MatImaginaryPart_MPIAIJ,
3163                                        0,
3164                                        0,
3165                                 /*109*/0,
3166                                        MatGetRedundantMatrix_MPIAIJ,
3167                                        MatGetRowMin_MPIAIJ,
3168                                        0,
3169                                        0,
3170                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3171                                        0,
3172                                        0,
3173                                        0,
3174                                        0,
3175                                 /*119*/0,
3176                                        0,
3177                                        0,
3178                                        0,
3179                                        MatGetMultiProcBlock_MPIAIJ,
3180                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3181                                        MatGetColumnNorms_MPIAIJ,
3182                                        MatInvertBlockDiagonal_MPIAIJ,
3183                                        0,
3184                                        MatGetSubMatricesParallel_MPIAIJ,
3185                                 /*129*/0,
3186                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3187                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3188                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3189                                        0,
3190                                 /*134*/0,
3191                                        0,
3192                                        0,
3193                                        0,
3194                                        0,
3195                                 /*139*/0,
3196                                        0,
3197                                        0,
3198                                        MatFDColoringSetUp_MPIXAIJ
3199 };
3200 
3201 /* ----------------------------------------------------------------------------------------*/
3202 
3203 #undef __FUNCT__
3204 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3205 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3206 {
3207   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3208   PetscErrorCode ierr;
3209 
3210   PetscFunctionBegin;
3211   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3212   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3213   PetscFunctionReturn(0);
3214 }
3215 
3216 #undef __FUNCT__
3217 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3218 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3219 {
3220   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3221   PetscErrorCode ierr;
3222 
3223   PetscFunctionBegin;
3224   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3225   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3226   PetscFunctionReturn(0);
3227 }
3228 
3229 #undef __FUNCT__
3230 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3231 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3232 {
3233   Mat_MPIAIJ     *b;
3234   PetscErrorCode ierr;
3235 
3236   PetscFunctionBegin;
3237   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3238   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3239   b = (Mat_MPIAIJ*)B->data;
3240 
3241   if (!B->preallocated) {
3242     /* Explicitly create 2 MATSEQAIJ matrices. */
3243     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3244     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3245     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3246     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3247     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3248     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3249     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3250     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3251     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3252     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3253   }
3254 
3255   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3256   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3257   B->preallocated = PETSC_TRUE;
3258   PetscFunctionReturn(0);
3259 }
3260 
3261 #undef __FUNCT__
3262 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3263 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3264 {
3265   Mat            mat;
3266   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3267   PetscErrorCode ierr;
3268 
3269   PetscFunctionBegin;
3270   *newmat = 0;
3271   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3272   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3273   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3274   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3275   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3276   a       = (Mat_MPIAIJ*)mat->data;
3277 
3278   mat->factortype   = matin->factortype;
3279   mat->assembled    = PETSC_TRUE;
3280   mat->insertmode   = NOT_SET_VALUES;
3281   mat->preallocated = PETSC_TRUE;
3282 
3283   a->size         = oldmat->size;
3284   a->rank         = oldmat->rank;
3285   a->donotstash   = oldmat->donotstash;
3286   a->roworiented  = oldmat->roworiented;
3287   a->rowindices   = 0;
3288   a->rowvalues    = 0;
3289   a->getrowactive = PETSC_FALSE;
3290 
3291   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3292   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3293 
3294   if (oldmat->colmap) {
3295 #if defined(PETSC_USE_CTABLE)
3296     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3297 #else
3298     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3299     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3300     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3301 #endif
3302   } else a->colmap = 0;
3303   if (oldmat->garray) {
3304     PetscInt len;
3305     len  = oldmat->B->cmap->n;
3306     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3307     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3308     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3309   } else a->garray = 0;
3310 
3311   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3312   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3313   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3314   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3315   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3316   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3317   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3318   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3319   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3320   *newmat = mat;
3321   PetscFunctionReturn(0);
3322 }
3323 
3324 
3325 
3326 #undef __FUNCT__
3327 #define __FUNCT__ "MatLoad_MPIAIJ"
3328 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3329 {
3330   PetscScalar    *vals,*svals;
3331   MPI_Comm       comm;
3332   PetscErrorCode ierr;
3333   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3334   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3335   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3336   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3337   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3338   int            fd;
3339   PetscInt       bs = 1;
3340 
3341   PetscFunctionBegin;
3342   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3343   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3344   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3345   if (!rank) {
3346     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3347     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3348     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3349   }
3350 
3351   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3352   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3353   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3354 
3355   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3356 
3357   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3358   M    = header[1]; N = header[2];
3359   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3360   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3361   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3362 
3363   /* If global sizes are set, check if they are consistent with that given in the file */
3364   if (sizesset) {
3365     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3366   }
3367   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3368   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3369 
3370   /* determine ownership of all (block) rows */
3371   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3372   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3373   else m = newMat->rmap->n; /* Set by user */
3374 
3375   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3376   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3377 
3378   /* First process needs enough room for process with most rows */
3379   if (!rank) {
3380     mmax = rowners[1];
3381     for (i=2; i<=size; i++) {
3382       mmax = PetscMax(mmax, rowners[i]);
3383     }
3384   } else mmax = -1;             /* unused, but compilers complain */
3385 
3386   rowners[0] = 0;
3387   for (i=2; i<=size; i++) {
3388     rowners[i] += rowners[i-1];
3389   }
3390   rstart = rowners[rank];
3391   rend   = rowners[rank+1];
3392 
3393   /* distribute row lengths to all processors */
3394   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3395   if (!rank) {
3396     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3397     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3398     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3399     for (j=0; j<m; j++) {
3400       procsnz[0] += ourlens[j];
3401     }
3402     for (i=1; i<size; i++) {
3403       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3404       /* calculate the number of nonzeros on each processor */
3405       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3406         procsnz[i] += rowlengths[j];
3407       }
3408       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3409     }
3410     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3411   } else {
3412     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3413   }
3414 
3415   if (!rank) {
3416     /* determine max buffer needed and allocate it */
3417     maxnz = 0;
3418     for (i=0; i<size; i++) {
3419       maxnz = PetscMax(maxnz,procsnz[i]);
3420     }
3421     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3422 
3423     /* read in my part of the matrix column indices  */
3424     nz   = procsnz[0];
3425     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3426     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3427 
3428     /* read in every one elses and ship off */
3429     for (i=1; i<size; i++) {
3430       nz   = procsnz[i];
3431       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3432       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3433     }
3434     ierr = PetscFree(cols);CHKERRQ(ierr);
3435   } else {
3436     /* determine buffer space needed for message */
3437     nz = 0;
3438     for (i=0; i<m; i++) {
3439       nz += ourlens[i];
3440     }
3441     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3442 
3443     /* receive message of column indices*/
3444     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3445   }
3446 
3447   /* determine column ownership if matrix is not square */
3448   if (N != M) {
3449     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3450     else n = newMat->cmap->n;
3451     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3452     cstart = cend - n;
3453   } else {
3454     cstart = rstart;
3455     cend   = rend;
3456     n      = cend - cstart;
3457   }
3458 
3459   /* loop over local rows, determining number of off diagonal entries */
3460   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3461   jj   = 0;
3462   for (i=0; i<m; i++) {
3463     for (j=0; j<ourlens[i]; j++) {
3464       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3465       jj++;
3466     }
3467   }
3468 
3469   for (i=0; i<m; i++) {
3470     ourlens[i] -= offlens[i];
3471   }
3472   if (!sizesset) {
3473     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3474   }
3475 
3476   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3477 
3478   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3479 
3480   for (i=0; i<m; i++) {
3481     ourlens[i] += offlens[i];
3482   }
3483 
3484   if (!rank) {
3485     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3486 
3487     /* read in my part of the matrix numerical values  */
3488     nz   = procsnz[0];
3489     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3490 
3491     /* insert into matrix */
3492     jj      = rstart;
3493     smycols = mycols;
3494     svals   = vals;
3495     for (i=0; i<m; i++) {
3496       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3497       smycols += ourlens[i];
3498       svals   += ourlens[i];
3499       jj++;
3500     }
3501 
3502     /* read in other processors and ship out */
3503     for (i=1; i<size; i++) {
3504       nz   = procsnz[i];
3505       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3506       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3507     }
3508     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3509   } else {
3510     /* receive numeric values */
3511     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3512 
3513     /* receive message of values*/
3514     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3515 
3516     /* insert into matrix */
3517     jj      = rstart;
3518     smycols = mycols;
3519     svals   = vals;
3520     for (i=0; i<m; i++) {
3521       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3522       smycols += ourlens[i];
3523       svals   += ourlens[i];
3524       jj++;
3525     }
3526   }
3527   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3528   ierr = PetscFree(vals);CHKERRQ(ierr);
3529   ierr = PetscFree(mycols);CHKERRQ(ierr);
3530   ierr = PetscFree(rowners);CHKERRQ(ierr);
3531   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3532   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3533   PetscFunctionReturn(0);
3534 }
3535 
3536 #undef __FUNCT__
3537 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3538 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3539 {
3540   PetscErrorCode ierr;
3541   IS             iscol_local;
3542   PetscInt       csize;
3543 
3544   PetscFunctionBegin;
3545   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3546   if (call == MAT_REUSE_MATRIX) {
3547     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3548     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3549   } else {
3550     PetscInt cbs;
3551     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3552     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3553     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3554   }
3555   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3556   if (call == MAT_INITIAL_MATRIX) {
3557     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3558     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3559   }
3560   PetscFunctionReturn(0);
3561 }
3562 
3563 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3564 #undef __FUNCT__
3565 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3566 /*
3567     Not great since it makes two copies of the submatrix, first an SeqAIJ
3568   in local and then by concatenating the local matrices the end result.
3569   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3570 
3571   Note: This requires a sequential iscol with all indices.
3572 */
3573 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3574 {
3575   PetscErrorCode ierr;
3576   PetscMPIInt    rank,size;
3577   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3578   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3579   PetscBool      allcolumns, colflag;
3580   Mat            M,Mreuse;
3581   MatScalar      *vwork,*aa;
3582   MPI_Comm       comm;
3583   Mat_SeqAIJ     *aij;
3584 
3585   PetscFunctionBegin;
3586   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3587   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3588   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3589 
3590   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3591   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3592   if (colflag && ncol == mat->cmap->N) {
3593     allcolumns = PETSC_TRUE;
3594   } else {
3595     allcolumns = PETSC_FALSE;
3596   }
3597   if (call ==  MAT_REUSE_MATRIX) {
3598     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3599     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3600     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3601   } else {
3602     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3603   }
3604 
3605   /*
3606       m - number of local rows
3607       n - number of columns (same on all processors)
3608       rstart - first row in new global matrix generated
3609   */
3610   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3611   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3612   if (call == MAT_INITIAL_MATRIX) {
3613     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3614     ii  = aij->i;
3615     jj  = aij->j;
3616 
3617     /*
3618         Determine the number of non-zeros in the diagonal and off-diagonal
3619         portions of the matrix in order to do correct preallocation
3620     */
3621 
3622     /* first get start and end of "diagonal" columns */
3623     if (csize == PETSC_DECIDE) {
3624       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3625       if (mglobal == n) { /* square matrix */
3626         nlocal = m;
3627       } else {
3628         nlocal = n/size + ((n % size) > rank);
3629       }
3630     } else {
3631       nlocal = csize;
3632     }
3633     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3634     rstart = rend - nlocal;
3635     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3636 
3637     /* next, compute all the lengths */
3638     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3639     olens = dlens + m;
3640     for (i=0; i<m; i++) {
3641       jend = ii[i+1] - ii[i];
3642       olen = 0;
3643       dlen = 0;
3644       for (j=0; j<jend; j++) {
3645         if (*jj < rstart || *jj >= rend) olen++;
3646         else dlen++;
3647         jj++;
3648       }
3649       olens[i] = olen;
3650       dlens[i] = dlen;
3651     }
3652     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3653     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3654     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3655     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3656     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3657     ierr = PetscFree(dlens);CHKERRQ(ierr);
3658   } else {
3659     PetscInt ml,nl;
3660 
3661     M    = *newmat;
3662     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3663     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3664     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3665     /*
3666          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3667        rather than the slower MatSetValues().
3668     */
3669     M->was_assembled = PETSC_TRUE;
3670     M->assembled     = PETSC_FALSE;
3671   }
3672   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3673   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3674   ii   = aij->i;
3675   jj   = aij->j;
3676   aa   = aij->a;
3677   for (i=0; i<m; i++) {
3678     row   = rstart + i;
3679     nz    = ii[i+1] - ii[i];
3680     cwork = jj;     jj += nz;
3681     vwork = aa;     aa += nz;
3682     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3683   }
3684 
3685   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3686   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3687   *newmat = M;
3688 
3689   /* save submatrix used in processor for next request */
3690   if (call ==  MAT_INITIAL_MATRIX) {
3691     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3692     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3693   }
3694   PetscFunctionReturn(0);
3695 }
3696 
3697 #undef __FUNCT__
3698 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3699 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3700 {
3701   PetscInt       m,cstart, cend,j,nnz,i,d;
3702   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3703   const PetscInt *JJ;
3704   PetscScalar    *values;
3705   PetscErrorCode ierr;
3706 
3707   PetscFunctionBegin;
3708   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3709 
3710   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3711   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3712   m      = B->rmap->n;
3713   cstart = B->cmap->rstart;
3714   cend   = B->cmap->rend;
3715   rstart = B->rmap->rstart;
3716 
3717   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3718 
3719 #if defined(PETSC_USE_DEBUGGING)
3720   for (i=0; i<m; i++) {
3721     nnz = Ii[i+1]- Ii[i];
3722     JJ  = J + Ii[i];
3723     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3724     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3725     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3726   }
3727 #endif
3728 
3729   for (i=0; i<m; i++) {
3730     nnz     = Ii[i+1]- Ii[i];
3731     JJ      = J + Ii[i];
3732     nnz_max = PetscMax(nnz_max,nnz);
3733     d       = 0;
3734     for (j=0; j<nnz; j++) {
3735       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3736     }
3737     d_nnz[i] = d;
3738     o_nnz[i] = nnz - d;
3739   }
3740   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3741   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3742 
3743   if (v) values = (PetscScalar*)v;
3744   else {
3745     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3746   }
3747 
3748   for (i=0; i<m; i++) {
3749     ii   = i + rstart;
3750     nnz  = Ii[i+1]- Ii[i];
3751     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3752   }
3753   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3754   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3755 
3756   if (!v) {
3757     ierr = PetscFree(values);CHKERRQ(ierr);
3758   }
3759   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3760   PetscFunctionReturn(0);
3761 }
3762 
3763 #undef __FUNCT__
3764 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3765 /*@
3766    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3767    (the default parallel PETSc format).
3768 
3769    Collective on MPI_Comm
3770 
3771    Input Parameters:
3772 +  B - the matrix
3773 .  i - the indices into j for the start of each local row (starts with zero)
3774 .  j - the column indices for each local row (starts with zero)
3775 -  v - optional values in the matrix
3776 
3777    Level: developer
3778 
3779    Notes:
3780        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3781      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3782      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3783 
3784        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3785 
3786        The format which is used for the sparse matrix input, is equivalent to a
3787     row-major ordering.. i.e for the following matrix, the input data expected is
3788     as shown:
3789 
3790         1 0 0
3791         2 0 3     P0
3792        -------
3793         4 5 6     P1
3794 
3795      Process0 [P0]: rows_owned=[0,1]
3796         i =  {0,1,3}  [size = nrow+1  = 2+1]
3797         j =  {0,0,2}  [size = nz = 6]
3798         v =  {1,2,3}  [size = nz = 6]
3799 
3800      Process1 [P1]: rows_owned=[2]
3801         i =  {0,3}    [size = nrow+1  = 1+1]
3802         j =  {0,1,2}  [size = nz = 6]
3803         v =  {4,5,6}  [size = nz = 6]
3804 
3805 .keywords: matrix, aij, compressed row, sparse, parallel
3806 
3807 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3808           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3809 @*/
3810 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3811 {
3812   PetscErrorCode ierr;
3813 
3814   PetscFunctionBegin;
3815   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3816   PetscFunctionReturn(0);
3817 }
3818 
3819 #undef __FUNCT__
3820 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3821 /*@C
3822    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3823    (the default parallel PETSc format).  For good matrix assembly performance
3824    the user should preallocate the matrix storage by setting the parameters
3825    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3826    performance can be increased by more than a factor of 50.
3827 
3828    Collective on MPI_Comm
3829 
3830    Input Parameters:
3831 +  B - the matrix
3832 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3833            (same value is used for all local rows)
3834 .  d_nnz - array containing the number of nonzeros in the various rows of the
3835            DIAGONAL portion of the local submatrix (possibly different for each row)
3836            or NULL, if d_nz is used to specify the nonzero structure.
3837            The size of this array is equal to the number of local rows, i.e 'm'.
3838            For matrices that will be factored, you must leave room for (and set)
3839            the diagonal entry even if it is zero.
3840 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3841            submatrix (same value is used for all local rows).
3842 -  o_nnz - array containing the number of nonzeros in the various rows of the
3843            OFF-DIAGONAL portion of the local submatrix (possibly different for
3844            each row) or NULL, if o_nz is used to specify the nonzero
3845            structure. The size of this array is equal to the number
3846            of local rows, i.e 'm'.
3847 
3848    If the *_nnz parameter is given then the *_nz parameter is ignored
3849 
3850    The AIJ format (also called the Yale sparse matrix format or
3851    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3852    storage.  The stored row and column indices begin with zero.
3853    See Users-Manual: ch_mat for details.
3854 
3855    The parallel matrix is partitioned such that the first m0 rows belong to
3856    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3857    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3858 
3859    The DIAGONAL portion of the local submatrix of a processor can be defined
3860    as the submatrix which is obtained by extraction the part corresponding to
3861    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3862    first row that belongs to the processor, r2 is the last row belonging to
3863    the this processor, and c1-c2 is range of indices of the local part of a
3864    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3865    common case of a square matrix, the row and column ranges are the same and
3866    the DIAGONAL part is also square. The remaining portion of the local
3867    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3868 
3869    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3870 
3871    You can call MatGetInfo() to get information on how effective the preallocation was;
3872    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3873    You can also run with the option -info and look for messages with the string
3874    malloc in them to see if additional memory allocation was needed.
3875 
3876    Example usage:
3877 
3878    Consider the following 8x8 matrix with 34 non-zero values, that is
3879    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3880    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3881    as follows:
3882 
3883 .vb
3884             1  2  0  |  0  3  0  |  0  4
3885     Proc0   0  5  6  |  7  0  0  |  8  0
3886             9  0 10  | 11  0  0  | 12  0
3887     -------------------------------------
3888            13  0 14  | 15 16 17  |  0  0
3889     Proc1   0 18  0  | 19 20 21  |  0  0
3890             0  0  0  | 22 23  0  | 24  0
3891     -------------------------------------
3892     Proc2  25 26 27  |  0  0 28  | 29  0
3893            30  0  0  | 31 32 33  |  0 34
3894 .ve
3895 
3896    This can be represented as a collection of submatrices as:
3897 
3898 .vb
3899       A B C
3900       D E F
3901       G H I
3902 .ve
3903 
3904    Where the submatrices A,B,C are owned by proc0, D,E,F are
3905    owned by proc1, G,H,I are owned by proc2.
3906 
3907    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3908    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3909    The 'M','N' parameters are 8,8, and have the same values on all procs.
3910 
3911    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3912    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3913    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3914    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3915    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3916    matrix, ans [DF] as another SeqAIJ matrix.
3917 
3918    When d_nz, o_nz parameters are specified, d_nz storage elements are
3919    allocated for every row of the local diagonal submatrix, and o_nz
3920    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3921    One way to choose d_nz and o_nz is to use the max nonzerors per local
3922    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3923    In this case, the values of d_nz,o_nz are:
3924 .vb
3925      proc0 : dnz = 2, o_nz = 2
3926      proc1 : dnz = 3, o_nz = 2
3927      proc2 : dnz = 1, o_nz = 4
3928 .ve
3929    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3930    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3931    for proc3. i.e we are using 12+15+10=37 storage locations to store
3932    34 values.
3933 
3934    When d_nnz, o_nnz parameters are specified, the storage is specified
3935    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3936    In the above case the values for d_nnz,o_nnz are:
3937 .vb
3938      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3939      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3940      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3941 .ve
3942    Here the space allocated is sum of all the above values i.e 34, and
3943    hence pre-allocation is perfect.
3944 
3945    Level: intermediate
3946 
3947 .keywords: matrix, aij, compressed row, sparse, parallel
3948 
3949 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3950           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3951 @*/
3952 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3953 {
3954   PetscErrorCode ierr;
3955 
3956   PetscFunctionBegin;
3957   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3958   PetscValidType(B,1);
3959   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3960   PetscFunctionReturn(0);
3961 }
3962 
3963 #undef __FUNCT__
3964 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3965 /*@
3966      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3967          CSR format the local rows.
3968 
3969    Collective on MPI_Comm
3970 
3971    Input Parameters:
3972 +  comm - MPI communicator
3973 .  m - number of local rows (Cannot be PETSC_DECIDE)
3974 .  n - This value should be the same as the local size used in creating the
3975        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3976        calculated if N is given) For square matrices n is almost always m.
3977 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3978 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3979 .   i - row indices
3980 .   j - column indices
3981 -   a - matrix values
3982 
3983    Output Parameter:
3984 .   mat - the matrix
3985 
3986    Level: intermediate
3987 
3988    Notes:
3989        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3990      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3991      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3992 
3993        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3994 
3995        The format which is used for the sparse matrix input, is equivalent to a
3996     row-major ordering.. i.e for the following matrix, the input data expected is
3997     as shown:
3998 
3999         1 0 0
4000         2 0 3     P0
4001        -------
4002         4 5 6     P1
4003 
4004      Process0 [P0]: rows_owned=[0,1]
4005         i =  {0,1,3}  [size = nrow+1  = 2+1]
4006         j =  {0,0,2}  [size = nz = 6]
4007         v =  {1,2,3}  [size = nz = 6]
4008 
4009      Process1 [P1]: rows_owned=[2]
4010         i =  {0,3}    [size = nrow+1  = 1+1]
4011         j =  {0,1,2}  [size = nz = 6]
4012         v =  {4,5,6}  [size = nz = 6]
4013 
4014 .keywords: matrix, aij, compressed row, sparse, parallel
4015 
4016 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4017           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4018 @*/
4019 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4020 {
4021   PetscErrorCode ierr;
4022 
4023   PetscFunctionBegin;
4024   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4025   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4026   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4027   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4028   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4029   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4030   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4031   PetscFunctionReturn(0);
4032 }
4033 
4034 #undef __FUNCT__
4035 #define __FUNCT__ "MatCreateAIJ"
4036 /*@C
4037    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4038    (the default parallel PETSc format).  For good matrix assembly performance
4039    the user should preallocate the matrix storage by setting the parameters
4040    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4041    performance can be increased by more than a factor of 50.
4042 
4043    Collective on MPI_Comm
4044 
4045    Input Parameters:
4046 +  comm - MPI communicator
4047 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4048            This value should be the same as the local size used in creating the
4049            y vector for the matrix-vector product y = Ax.
4050 .  n - This value should be the same as the local size used in creating the
4051        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4052        calculated if N is given) For square matrices n is almost always m.
4053 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4054 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4055 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4056            (same value is used for all local rows)
4057 .  d_nnz - array containing the number of nonzeros in the various rows of the
4058            DIAGONAL portion of the local submatrix (possibly different for each row)
4059            or NULL, if d_nz is used to specify the nonzero structure.
4060            The size of this array is equal to the number of local rows, i.e 'm'.
4061 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4062            submatrix (same value is used for all local rows).
4063 -  o_nnz - array containing the number of nonzeros in the various rows of the
4064            OFF-DIAGONAL portion of the local submatrix (possibly different for
4065            each row) or NULL, if o_nz is used to specify the nonzero
4066            structure. The size of this array is equal to the number
4067            of local rows, i.e 'm'.
4068 
4069    Output Parameter:
4070 .  A - the matrix
4071 
4072    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4073    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4074    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4075 
4076    Notes:
4077    If the *_nnz parameter is given then the *_nz parameter is ignored
4078 
4079    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4080    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4081    storage requirements for this matrix.
4082 
4083    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4084    processor than it must be used on all processors that share the object for
4085    that argument.
4086 
4087    The user MUST specify either the local or global matrix dimensions
4088    (possibly both).
4089 
4090    The parallel matrix is partitioned across processors such that the
4091    first m0 rows belong to process 0, the next m1 rows belong to
4092    process 1, the next m2 rows belong to process 2 etc.. where
4093    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4094    values corresponding to [m x N] submatrix.
4095 
4096    The columns are logically partitioned with the n0 columns belonging
4097    to 0th partition, the next n1 columns belonging to the next
4098    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4099 
4100    The DIAGONAL portion of the local submatrix on any given processor
4101    is the submatrix corresponding to the rows and columns m,n
4102    corresponding to the given processor. i.e diagonal matrix on
4103    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4104    etc. The remaining portion of the local submatrix [m x (N-n)]
4105    constitute the OFF-DIAGONAL portion. The example below better
4106    illustrates this concept.
4107 
4108    For a square global matrix we define each processor's diagonal portion
4109    to be its local rows and the corresponding columns (a square submatrix);
4110    each processor's off-diagonal portion encompasses the remainder of the
4111    local matrix (a rectangular submatrix).
4112 
4113    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4114 
4115    When calling this routine with a single process communicator, a matrix of
4116    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4117    type of communicator, use the construction mechanism:
4118      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4119 
4120    By default, this format uses inodes (identical nodes) when possible.
4121    We search for consecutive rows with the same nonzero structure, thereby
4122    reusing matrix information to achieve increased efficiency.
4123 
4124    Options Database Keys:
4125 +  -mat_no_inode  - Do not use inodes
4126 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4127 -  -mat_aij_oneindex - Internally use indexing starting at 1
4128         rather than 0.  Note that when calling MatSetValues(),
4129         the user still MUST index entries starting at 0!
4130 
4131 
4132    Example usage:
4133 
4134    Consider the following 8x8 matrix with 34 non-zero values, that is
4135    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4136    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4137    as follows:
4138 
4139 .vb
4140             1  2  0  |  0  3  0  |  0  4
4141     Proc0   0  5  6  |  7  0  0  |  8  0
4142             9  0 10  | 11  0  0  | 12  0
4143     -------------------------------------
4144            13  0 14  | 15 16 17  |  0  0
4145     Proc1   0 18  0  | 19 20 21  |  0  0
4146             0  0  0  | 22 23  0  | 24  0
4147     -------------------------------------
4148     Proc2  25 26 27  |  0  0 28  | 29  0
4149            30  0  0  | 31 32 33  |  0 34
4150 .ve
4151 
4152    This can be represented as a collection of submatrices as:
4153 
4154 .vb
4155       A B C
4156       D E F
4157       G H I
4158 .ve
4159 
4160    Where the submatrices A,B,C are owned by proc0, D,E,F are
4161    owned by proc1, G,H,I are owned by proc2.
4162 
4163    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4164    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4165    The 'M','N' parameters are 8,8, and have the same values on all procs.
4166 
4167    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4168    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4169    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4170    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4171    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4172    matrix, ans [DF] as another SeqAIJ matrix.
4173 
4174    When d_nz, o_nz parameters are specified, d_nz storage elements are
4175    allocated for every row of the local diagonal submatrix, and o_nz
4176    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4177    One way to choose d_nz and o_nz is to use the max nonzerors per local
4178    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4179    In this case, the values of d_nz,o_nz are:
4180 .vb
4181      proc0 : dnz = 2, o_nz = 2
4182      proc1 : dnz = 3, o_nz = 2
4183      proc2 : dnz = 1, o_nz = 4
4184 .ve
4185    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4186    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4187    for proc3. i.e we are using 12+15+10=37 storage locations to store
4188    34 values.
4189 
4190    When d_nnz, o_nnz parameters are specified, the storage is specified
4191    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4192    In the above case the values for d_nnz,o_nnz are:
4193 .vb
4194      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4195      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4196      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4197 .ve
4198    Here the space allocated is sum of all the above values i.e 34, and
4199    hence pre-allocation is perfect.
4200 
4201    Level: intermediate
4202 
4203 .keywords: matrix, aij, compressed row, sparse, parallel
4204 
4205 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4206           MPIAIJ, MatCreateMPIAIJWithArrays()
4207 @*/
4208 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4209 {
4210   PetscErrorCode ierr;
4211   PetscMPIInt    size;
4212 
4213   PetscFunctionBegin;
4214   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4215   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4216   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4217   if (size > 1) {
4218     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4219     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4220   } else {
4221     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4222     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4223   }
4224   PetscFunctionReturn(0);
4225 }
4226 
4227 #undef __FUNCT__
4228 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4229 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4230 {
4231   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4232 
4233   PetscFunctionBegin;
4234   if (Ad)     *Ad     = a->A;
4235   if (Ao)     *Ao     = a->B;
4236   if (colmap) *colmap = a->garray;
4237   PetscFunctionReturn(0);
4238 }
4239 
4240 #undef __FUNCT__
4241 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4242 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4243 {
4244   PetscErrorCode ierr;
4245   PetscInt       i;
4246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4247 
4248   PetscFunctionBegin;
4249   if (coloring->ctype == IS_COLORING_GLOBAL) {
4250     ISColoringValue *allcolors,*colors;
4251     ISColoring      ocoloring;
4252 
4253     /* set coloring for diagonal portion */
4254     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4255 
4256     /* set coloring for off-diagonal portion */
4257     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4258     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4259     for (i=0; i<a->B->cmap->n; i++) {
4260       colors[i] = allcolors[a->garray[i]];
4261     }
4262     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4263     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4264     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4265     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4266   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4267     ISColoringValue *colors;
4268     PetscInt        *larray;
4269     ISColoring      ocoloring;
4270 
4271     /* set coloring for diagonal portion */
4272     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4273     for (i=0; i<a->A->cmap->n; i++) {
4274       larray[i] = i + A->cmap->rstart;
4275     }
4276     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4277     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4278     for (i=0; i<a->A->cmap->n; i++) {
4279       colors[i] = coloring->colors[larray[i]];
4280     }
4281     ierr = PetscFree(larray);CHKERRQ(ierr);
4282     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4283     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4284     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4285 
4286     /* set coloring for off-diagonal portion */
4287     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4288     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4289     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4290     for (i=0; i<a->B->cmap->n; i++) {
4291       colors[i] = coloring->colors[larray[i]];
4292     }
4293     ierr = PetscFree(larray);CHKERRQ(ierr);
4294     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4295     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4296     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4297   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4298   PetscFunctionReturn(0);
4299 }
4300 
4301 #undef __FUNCT__
4302 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4303 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4304 {
4305   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4306   PetscErrorCode ierr;
4307 
4308   PetscFunctionBegin;
4309   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4310   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4311   PetscFunctionReturn(0);
4312 }
4313 
4314 #undef __FUNCT__
4315 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4316 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4317 {
4318   PetscErrorCode ierr;
4319   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4320   PetscInt       *indx;
4321 
4322   PetscFunctionBegin;
4323   /* This routine will ONLY return MPIAIJ type matrix */
4324   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4325   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4326   if (n == PETSC_DECIDE) {
4327     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4328   }
4329   /* Check sum(n) = N */
4330   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4331   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4332 
4333   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4334   rstart -= m;
4335 
4336   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4337   for (i=0; i<m; i++) {
4338     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4339     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4340     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4341   }
4342 
4343   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4344   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4345   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4346   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4347   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4348   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4349   PetscFunctionReturn(0);
4350 }
4351 
4352 #undef __FUNCT__
4353 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4354 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4355 {
4356   PetscErrorCode ierr;
4357   PetscInt       m,N,i,rstart,nnz,Ii;
4358   PetscInt       *indx;
4359   PetscScalar    *values;
4360 
4361   PetscFunctionBegin;
4362   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4363   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4364   for (i=0; i<m; i++) {
4365     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4366     Ii   = i + rstart;
4367     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4368     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4369   }
4370   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4371   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4372   PetscFunctionReturn(0);
4373 }
4374 
4375 #undef __FUNCT__
4376 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4377 /*@
4378       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4379                  matrices from each processor
4380 
4381     Collective on MPI_Comm
4382 
4383    Input Parameters:
4384 +    comm - the communicators the parallel matrix will live on
4385 .    inmat - the input sequential matrices
4386 .    n - number of local columns (or PETSC_DECIDE)
4387 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4388 
4389    Output Parameter:
4390 .    outmat - the parallel matrix generated
4391 
4392     Level: advanced
4393 
4394    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4395 
4396 @*/
4397 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4398 {
4399   PetscErrorCode ierr;
4400   PetscMPIInt    size;
4401 
4402   PetscFunctionBegin;
4403   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4404   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4405   if (size == 1) {
4406     if (scall == MAT_INITIAL_MATRIX) {
4407       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4408     } else {
4409       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4410     }
4411   } else {
4412     if (scall == MAT_INITIAL_MATRIX) {
4413       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4414     }
4415     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4416   }
4417   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4418   PetscFunctionReturn(0);
4419 }
4420 
4421 #undef __FUNCT__
4422 #define __FUNCT__ "MatFileSplit"
4423 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4424 {
4425   PetscErrorCode    ierr;
4426   PetscMPIInt       rank;
4427   PetscInt          m,N,i,rstart,nnz;
4428   size_t            len;
4429   const PetscInt    *indx;
4430   PetscViewer       out;
4431   char              *name;
4432   Mat               B;
4433   const PetscScalar *values;
4434 
4435   PetscFunctionBegin;
4436   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4437   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4438   /* Should this be the type of the diagonal block of A? */
4439   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4440   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4441   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4442   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4443   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4444   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4445   for (i=0; i<m; i++) {
4446     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4447     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4448     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4449   }
4450   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4451   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4452 
4453   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4454   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4455   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4456   sprintf(name,"%s.%d",outfile,rank);
4457   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4458   ierr = PetscFree(name);CHKERRQ(ierr);
4459   ierr = MatView(B,out);CHKERRQ(ierr);
4460   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4461   ierr = MatDestroy(&B);CHKERRQ(ierr);
4462   PetscFunctionReturn(0);
4463 }
4464 
4465 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4466 #undef __FUNCT__
4467 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4468 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4469 {
4470   PetscErrorCode      ierr;
4471   Mat_Merge_SeqsToMPI *merge;
4472   PetscContainer      container;
4473 
4474   PetscFunctionBegin;
4475   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4476   if (container) {
4477     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4478     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4479     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4480     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4481     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4482     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4483     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4484     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4485     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4486     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4487     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4488     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4489     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4490     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4491     ierr = PetscFree(merge);CHKERRQ(ierr);
4492     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4493   }
4494   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4495   PetscFunctionReturn(0);
4496 }
4497 
4498 #include <../src/mat/utils/freespace.h>
4499 #include <petscbt.h>
4500 
4501 #undef __FUNCT__
4502 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4503 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4504 {
4505   PetscErrorCode      ierr;
4506   MPI_Comm            comm;
4507   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4508   PetscMPIInt         size,rank,taga,*len_s;
4509   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4510   PetscInt            proc,m;
4511   PetscInt            **buf_ri,**buf_rj;
4512   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4513   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4514   MPI_Request         *s_waits,*r_waits;
4515   MPI_Status          *status;
4516   MatScalar           *aa=a->a;
4517   MatScalar           **abuf_r,*ba_i;
4518   Mat_Merge_SeqsToMPI *merge;
4519   PetscContainer      container;
4520 
4521   PetscFunctionBegin;
4522   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4523   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4524 
4525   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4526   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4527 
4528   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4529   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4530 
4531   bi     = merge->bi;
4532   bj     = merge->bj;
4533   buf_ri = merge->buf_ri;
4534   buf_rj = merge->buf_rj;
4535 
4536   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4537   owners = merge->rowmap->range;
4538   len_s  = merge->len_s;
4539 
4540   /* send and recv matrix values */
4541   /*-----------------------------*/
4542   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4543   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4544 
4545   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4546   for (proc=0,k=0; proc<size; proc++) {
4547     if (!len_s[proc]) continue;
4548     i    = owners[proc];
4549     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4550     k++;
4551   }
4552 
4553   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4554   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4555   ierr = PetscFree(status);CHKERRQ(ierr);
4556 
4557   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4558   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4559 
4560   /* insert mat values of mpimat */
4561   /*----------------------------*/
4562   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4563   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4564 
4565   for (k=0; k<merge->nrecv; k++) {
4566     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4567     nrows       = *(buf_ri_k[k]);
4568     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4569     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4570   }
4571 
4572   /* set values of ba */
4573   m = merge->rowmap->n;
4574   for (i=0; i<m; i++) {
4575     arow = owners[rank] + i;
4576     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4577     bnzi = bi[i+1] - bi[i];
4578     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4579 
4580     /* add local non-zero vals of this proc's seqmat into ba */
4581     anzi   = ai[arow+1] - ai[arow];
4582     aj     = a->j + ai[arow];
4583     aa     = a->a + ai[arow];
4584     nextaj = 0;
4585     for (j=0; nextaj<anzi; j++) {
4586       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4587         ba_i[j] += aa[nextaj++];
4588       }
4589     }
4590 
4591     /* add received vals into ba */
4592     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4593       /* i-th row */
4594       if (i == *nextrow[k]) {
4595         anzi   = *(nextai[k]+1) - *nextai[k];
4596         aj     = buf_rj[k] + *(nextai[k]);
4597         aa     = abuf_r[k] + *(nextai[k]);
4598         nextaj = 0;
4599         for (j=0; nextaj<anzi; j++) {
4600           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4601             ba_i[j] += aa[nextaj++];
4602           }
4603         }
4604         nextrow[k]++; nextai[k]++;
4605       }
4606     }
4607     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4608   }
4609   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4610   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4611 
4612   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4613   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4614   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4615   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4616   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4617   PetscFunctionReturn(0);
4618 }
4619 
4620 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4621 
4622 #undef __FUNCT__
4623 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4624 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4625 {
4626   PetscErrorCode      ierr;
4627   Mat                 B_mpi;
4628   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4629   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4630   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4631   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4632   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4633   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4634   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4635   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4636   MPI_Status          *status;
4637   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4638   PetscBT             lnkbt;
4639   Mat_Merge_SeqsToMPI *merge;
4640   PetscContainer      container;
4641 
4642   PetscFunctionBegin;
4643   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4644 
4645   /* make sure it is a PETSc comm */
4646   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4647   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4648   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4649 
4650   ierr = PetscNew(&merge);CHKERRQ(ierr);
4651   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4652 
4653   /* determine row ownership */
4654   /*---------------------------------------------------------*/
4655   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4656   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4657   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4658   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4659   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4660   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4661   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4662 
4663   m      = merge->rowmap->n;
4664   owners = merge->rowmap->range;
4665 
4666   /* determine the number of messages to send, their lengths */
4667   /*---------------------------------------------------------*/
4668   len_s = merge->len_s;
4669 
4670   len          = 0; /* length of buf_si[] */
4671   merge->nsend = 0;
4672   for (proc=0; proc<size; proc++) {
4673     len_si[proc] = 0;
4674     if (proc == rank) {
4675       len_s[proc] = 0;
4676     } else {
4677       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4678       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4679     }
4680     if (len_s[proc]) {
4681       merge->nsend++;
4682       nrows = 0;
4683       for (i=owners[proc]; i<owners[proc+1]; i++) {
4684         if (ai[i+1] > ai[i]) nrows++;
4685       }
4686       len_si[proc] = 2*(nrows+1);
4687       len         += len_si[proc];
4688     }
4689   }
4690 
4691   /* determine the number and length of messages to receive for ij-structure */
4692   /*-------------------------------------------------------------------------*/
4693   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4694   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4695 
4696   /* post the Irecv of j-structure */
4697   /*-------------------------------*/
4698   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4699   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4700 
4701   /* post the Isend of j-structure */
4702   /*--------------------------------*/
4703   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4704 
4705   for (proc=0, k=0; proc<size; proc++) {
4706     if (!len_s[proc]) continue;
4707     i    = owners[proc];
4708     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4709     k++;
4710   }
4711 
4712   /* receives and sends of j-structure are complete */
4713   /*------------------------------------------------*/
4714   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4715   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4716 
4717   /* send and recv i-structure */
4718   /*---------------------------*/
4719   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4720   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4721 
4722   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4723   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4724   for (proc=0,k=0; proc<size; proc++) {
4725     if (!len_s[proc]) continue;
4726     /* form outgoing message for i-structure:
4727          buf_si[0]:                 nrows to be sent
4728                [1:nrows]:           row index (global)
4729                [nrows+1:2*nrows+1]: i-structure index
4730     */
4731     /*-------------------------------------------*/
4732     nrows       = len_si[proc]/2 - 1;
4733     buf_si_i    = buf_si + nrows+1;
4734     buf_si[0]   = nrows;
4735     buf_si_i[0] = 0;
4736     nrows       = 0;
4737     for (i=owners[proc]; i<owners[proc+1]; i++) {
4738       anzi = ai[i+1] - ai[i];
4739       if (anzi) {
4740         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4741         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4742         nrows++;
4743       }
4744     }
4745     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4746     k++;
4747     buf_si += len_si[proc];
4748   }
4749 
4750   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4751   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4752 
4753   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4754   for (i=0; i<merge->nrecv; i++) {
4755     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4756   }
4757 
4758   ierr = PetscFree(len_si);CHKERRQ(ierr);
4759   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4760   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4761   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4762   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4763   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4764   ierr = PetscFree(status);CHKERRQ(ierr);
4765 
4766   /* compute a local seq matrix in each processor */
4767   /*----------------------------------------------*/
4768   /* allocate bi array and free space for accumulating nonzero column info */
4769   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4770   bi[0] = 0;
4771 
4772   /* create and initialize a linked list */
4773   nlnk = N+1;
4774   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4775 
4776   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4777   len  = ai[owners[rank+1]] - ai[owners[rank]];
4778   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4779 
4780   current_space = free_space;
4781 
4782   /* determine symbolic info for each local row */
4783   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4784 
4785   for (k=0; k<merge->nrecv; k++) {
4786     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4787     nrows       = *buf_ri_k[k];
4788     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4789     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4790   }
4791 
4792   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4793   len  = 0;
4794   for (i=0; i<m; i++) {
4795     bnzi = 0;
4796     /* add local non-zero cols of this proc's seqmat into lnk */
4797     arow  = owners[rank] + i;
4798     anzi  = ai[arow+1] - ai[arow];
4799     aj    = a->j + ai[arow];
4800     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4801     bnzi += nlnk;
4802     /* add received col data into lnk */
4803     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4804       if (i == *nextrow[k]) { /* i-th row */
4805         anzi  = *(nextai[k]+1) - *nextai[k];
4806         aj    = buf_rj[k] + *nextai[k];
4807         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4808         bnzi += nlnk;
4809         nextrow[k]++; nextai[k]++;
4810       }
4811     }
4812     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4813 
4814     /* if free space is not available, make more free space */
4815     if (current_space->local_remaining<bnzi) {
4816       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4817       nspacedouble++;
4818     }
4819     /* copy data into free space, then initialize lnk */
4820     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4821     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4822 
4823     current_space->array           += bnzi;
4824     current_space->local_used      += bnzi;
4825     current_space->local_remaining -= bnzi;
4826 
4827     bi[i+1] = bi[i] + bnzi;
4828   }
4829 
4830   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4831 
4832   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4833   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4834   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4835 
4836   /* create symbolic parallel matrix B_mpi */
4837   /*---------------------------------------*/
4838   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4839   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4840   if (n==PETSC_DECIDE) {
4841     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4842   } else {
4843     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4844   }
4845   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4846   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4847   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4848   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4849   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4850 
4851   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4852   B_mpi->assembled    = PETSC_FALSE;
4853   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4854   merge->bi           = bi;
4855   merge->bj           = bj;
4856   merge->buf_ri       = buf_ri;
4857   merge->buf_rj       = buf_rj;
4858   merge->coi          = NULL;
4859   merge->coj          = NULL;
4860   merge->owners_co    = NULL;
4861 
4862   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4863 
4864   /* attach the supporting struct to B_mpi for reuse */
4865   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4866   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4867   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4868   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4869   *mpimat = B_mpi;
4870 
4871   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4872   PetscFunctionReturn(0);
4873 }
4874 
4875 #undef __FUNCT__
4876 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4877 /*@C
4878       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4879                  matrices from each processor
4880 
4881     Collective on MPI_Comm
4882 
4883    Input Parameters:
4884 +    comm - the communicators the parallel matrix will live on
4885 .    seqmat - the input sequential matrices
4886 .    m - number of local rows (or PETSC_DECIDE)
4887 .    n - number of local columns (or PETSC_DECIDE)
4888 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4889 
4890    Output Parameter:
4891 .    mpimat - the parallel matrix generated
4892 
4893     Level: advanced
4894 
4895    Notes:
4896      The dimensions of the sequential matrix in each processor MUST be the same.
4897      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4898      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4899 @*/
4900 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4901 {
4902   PetscErrorCode ierr;
4903   PetscMPIInt    size;
4904 
4905   PetscFunctionBegin;
4906   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4907   if (size == 1) {
4908     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4909     if (scall == MAT_INITIAL_MATRIX) {
4910       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4911     } else {
4912       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4913     }
4914     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4915     PetscFunctionReturn(0);
4916   }
4917   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4918   if (scall == MAT_INITIAL_MATRIX) {
4919     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4920   }
4921   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4922   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4923   PetscFunctionReturn(0);
4924 }
4925 
4926 #undef __FUNCT__
4927 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4928 /*@
4929      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4930           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4931           with MatGetSize()
4932 
4933     Not Collective
4934 
4935    Input Parameters:
4936 +    A - the matrix
4937 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4938 
4939    Output Parameter:
4940 .    A_loc - the local sequential matrix generated
4941 
4942     Level: developer
4943 
4944 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4945 
4946 @*/
4947 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4948 {
4949   PetscErrorCode ierr;
4950   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4951   Mat_SeqAIJ     *mat,*a,*b;
4952   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4953   MatScalar      *aa,*ba,*cam;
4954   PetscScalar    *ca;
4955   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4956   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4957   PetscBool      match;
4958   MPI_Comm       comm;
4959   PetscMPIInt    size;
4960 
4961   PetscFunctionBegin;
4962   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4963   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4964   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4965   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4966   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4967 
4968   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4969   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4970   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4971   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4972   aa = a->a; ba = b->a;
4973   if (scall == MAT_INITIAL_MATRIX) {
4974     if (size == 1) {
4975       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4976       PetscFunctionReturn(0);
4977     }
4978 
4979     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4980     ci[0] = 0;
4981     for (i=0; i<am; i++) {
4982       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4983     }
4984     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4985     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4986     k    = 0;
4987     for (i=0; i<am; i++) {
4988       ncols_o = bi[i+1] - bi[i];
4989       ncols_d = ai[i+1] - ai[i];
4990       /* off-diagonal portion of A */
4991       for (jo=0; jo<ncols_o; jo++) {
4992         col = cmap[*bj];
4993         if (col >= cstart) break;
4994         cj[k]   = col; bj++;
4995         ca[k++] = *ba++;
4996       }
4997       /* diagonal portion of A */
4998       for (j=0; j<ncols_d; j++) {
4999         cj[k]   = cstart + *aj++;
5000         ca[k++] = *aa++;
5001       }
5002       /* off-diagonal portion of A */
5003       for (j=jo; j<ncols_o; j++) {
5004         cj[k]   = cmap[*bj++];
5005         ca[k++] = *ba++;
5006       }
5007     }
5008     /* put together the new matrix */
5009     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5010     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5011     /* Since these are PETSc arrays, change flags to free them as necessary. */
5012     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5013     mat->free_a  = PETSC_TRUE;
5014     mat->free_ij = PETSC_TRUE;
5015     mat->nonew   = 0;
5016   } else if (scall == MAT_REUSE_MATRIX) {
5017     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5018     ci = mat->i; cj = mat->j; cam = mat->a;
5019     for (i=0; i<am; i++) {
5020       /* off-diagonal portion of A */
5021       ncols_o = bi[i+1] - bi[i];
5022       for (jo=0; jo<ncols_o; jo++) {
5023         col = cmap[*bj];
5024         if (col >= cstart) break;
5025         *cam++ = *ba++; bj++;
5026       }
5027       /* diagonal portion of A */
5028       ncols_d = ai[i+1] - ai[i];
5029       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5030       /* off-diagonal portion of A */
5031       for (j=jo; j<ncols_o; j++) {
5032         *cam++ = *ba++; bj++;
5033       }
5034     }
5035   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5036   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5037   PetscFunctionReturn(0);
5038 }
5039 
5040 #undef __FUNCT__
5041 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5042 /*@C
5043      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5044 
5045     Not Collective
5046 
5047    Input Parameters:
5048 +    A - the matrix
5049 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5050 -    row, col - index sets of rows and columns to extract (or NULL)
5051 
5052    Output Parameter:
5053 .    A_loc - the local sequential matrix generated
5054 
5055     Level: developer
5056 
5057 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5058 
5059 @*/
5060 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5061 {
5062   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5063   PetscErrorCode ierr;
5064   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5065   IS             isrowa,iscola;
5066   Mat            *aloc;
5067   PetscBool      match;
5068 
5069   PetscFunctionBegin;
5070   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5071   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5072   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5073   if (!row) {
5074     start = A->rmap->rstart; end = A->rmap->rend;
5075     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5076   } else {
5077     isrowa = *row;
5078   }
5079   if (!col) {
5080     start = A->cmap->rstart;
5081     cmap  = a->garray;
5082     nzA   = a->A->cmap->n;
5083     nzB   = a->B->cmap->n;
5084     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5085     ncols = 0;
5086     for (i=0; i<nzB; i++) {
5087       if (cmap[i] < start) idx[ncols++] = cmap[i];
5088       else break;
5089     }
5090     imark = i;
5091     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5092     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5093     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5094   } else {
5095     iscola = *col;
5096   }
5097   if (scall != MAT_INITIAL_MATRIX) {
5098     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5099     aloc[0] = *A_loc;
5100   }
5101   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5102   *A_loc = aloc[0];
5103   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5104   if (!row) {
5105     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5106   }
5107   if (!col) {
5108     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5109   }
5110   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5111   PetscFunctionReturn(0);
5112 }
5113 
5114 #undef __FUNCT__
5115 #define __FUNCT__ "MatGetBrowsOfAcols"
5116 /*@C
5117     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5118 
5119     Collective on Mat
5120 
5121    Input Parameters:
5122 +    A,B - the matrices in mpiaij format
5123 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5124 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5125 
5126    Output Parameter:
5127 +    rowb, colb - index sets of rows and columns of B to extract
5128 -    B_seq - the sequential matrix generated
5129 
5130     Level: developer
5131 
5132 @*/
5133 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5134 {
5135   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5136   PetscErrorCode ierr;
5137   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5138   IS             isrowb,iscolb;
5139   Mat            *bseq=NULL;
5140 
5141   PetscFunctionBegin;
5142   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5143     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5144   }
5145   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5146 
5147   if (scall == MAT_INITIAL_MATRIX) {
5148     start = A->cmap->rstart;
5149     cmap  = a->garray;
5150     nzA   = a->A->cmap->n;
5151     nzB   = a->B->cmap->n;
5152     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5153     ncols = 0;
5154     for (i=0; i<nzB; i++) {  /* row < local row index */
5155       if (cmap[i] < start) idx[ncols++] = cmap[i];
5156       else break;
5157     }
5158     imark = i;
5159     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5160     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5161     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5162     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5163   } else {
5164     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5165     isrowb  = *rowb; iscolb = *colb;
5166     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5167     bseq[0] = *B_seq;
5168   }
5169   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5170   *B_seq = bseq[0];
5171   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5172   if (!rowb) {
5173     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5174   } else {
5175     *rowb = isrowb;
5176   }
5177   if (!colb) {
5178     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5179   } else {
5180     *colb = iscolb;
5181   }
5182   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5183   PetscFunctionReturn(0);
5184 }
5185 
5186 #undef __FUNCT__
5187 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5188 /*
5189     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5190     of the OFF-DIAGONAL portion of local A
5191 
5192     Collective on Mat
5193 
5194    Input Parameters:
5195 +    A,B - the matrices in mpiaij format
5196 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5197 
5198    Output Parameter:
5199 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5200 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5201 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5202 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5203 
5204     Level: developer
5205 
5206 */
5207 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5208 {
5209   VecScatter_MPI_General *gen_to,*gen_from;
5210   PetscErrorCode         ierr;
5211   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5212   Mat_SeqAIJ             *b_oth;
5213   VecScatter             ctx =a->Mvctx;
5214   MPI_Comm               comm;
5215   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5216   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5217   PetscScalar            *rvalues,*svalues;
5218   MatScalar              *b_otha,*bufa,*bufA;
5219   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5220   MPI_Request            *rwaits = NULL,*swaits = NULL;
5221   MPI_Status             *sstatus,rstatus;
5222   PetscMPIInt            jj,size;
5223   PetscInt               *cols,sbs,rbs;
5224   PetscScalar            *vals;
5225 
5226   PetscFunctionBegin;
5227   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5228   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5229   if (size == 1) PetscFunctionReturn(0);
5230 
5231   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5232     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5233   }
5234   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5235   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5236 
5237   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5238   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5239   rvalues  = gen_from->values; /* holds the length of receiving row */
5240   svalues  = gen_to->values;   /* holds the length of sending row */
5241   nrecvs   = gen_from->n;
5242   nsends   = gen_to->n;
5243 
5244   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5245   srow    = gen_to->indices;    /* local row index to be sent */
5246   sstarts = gen_to->starts;
5247   sprocs  = gen_to->procs;
5248   sstatus = gen_to->sstatus;
5249   sbs     = gen_to->bs;
5250   rstarts = gen_from->starts;
5251   rprocs  = gen_from->procs;
5252   rbs     = gen_from->bs;
5253 
5254   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5255   if (scall == MAT_INITIAL_MATRIX) {
5256     /* i-array */
5257     /*---------*/
5258     /*  post receives */
5259     for (i=0; i<nrecvs; i++) {
5260       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5261       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5262       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5263     }
5264 
5265     /* pack the outgoing message */
5266     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5267 
5268     sstartsj[0] = 0;
5269     rstartsj[0] = 0;
5270     len         = 0; /* total length of j or a array to be sent */
5271     k           = 0;
5272     for (i=0; i<nsends; i++) {
5273       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5274       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5275       for (j=0; j<nrows; j++) {
5276         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5277         for (l=0; l<sbs; l++) {
5278           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5279 
5280           rowlen[j*sbs+l] = ncols;
5281 
5282           len += ncols;
5283           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5284         }
5285         k++;
5286       }
5287       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5288 
5289       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5290     }
5291     /* recvs and sends of i-array are completed */
5292     i = nrecvs;
5293     while (i--) {
5294       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5295     }
5296     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5297 
5298     /* allocate buffers for sending j and a arrays */
5299     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5300     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5301 
5302     /* create i-array of B_oth */
5303     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5304 
5305     b_othi[0] = 0;
5306     len       = 0; /* total length of j or a array to be received */
5307     k         = 0;
5308     for (i=0; i<nrecvs; i++) {
5309       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5310       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5311       for (j=0; j<nrows; j++) {
5312         b_othi[k+1] = b_othi[k] + rowlen[j];
5313         len        += rowlen[j]; k++;
5314       }
5315       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5316     }
5317 
5318     /* allocate space for j and a arrrays of B_oth */
5319     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5320     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5321 
5322     /* j-array */
5323     /*---------*/
5324     /*  post receives of j-array */
5325     for (i=0; i<nrecvs; i++) {
5326       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5327       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5328     }
5329 
5330     /* pack the outgoing message j-array */
5331     k = 0;
5332     for (i=0; i<nsends; i++) {
5333       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5334       bufJ  = bufj+sstartsj[i];
5335       for (j=0; j<nrows; j++) {
5336         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5337         for (ll=0; ll<sbs; ll++) {
5338           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5339           for (l=0; l<ncols; l++) {
5340             *bufJ++ = cols[l];
5341           }
5342           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5343         }
5344       }
5345       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5346     }
5347 
5348     /* recvs and sends of j-array are completed */
5349     i = nrecvs;
5350     while (i--) {
5351       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5352     }
5353     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5354   } else if (scall == MAT_REUSE_MATRIX) {
5355     sstartsj = *startsj_s;
5356     rstartsj = *startsj_r;
5357     bufa     = *bufa_ptr;
5358     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5359     b_otha   = b_oth->a;
5360   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5361 
5362   /* a-array */
5363   /*---------*/
5364   /*  post receives of a-array */
5365   for (i=0; i<nrecvs; i++) {
5366     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5367     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5368   }
5369 
5370   /* pack the outgoing message a-array */
5371   k = 0;
5372   for (i=0; i<nsends; i++) {
5373     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5374     bufA  = bufa+sstartsj[i];
5375     for (j=0; j<nrows; j++) {
5376       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5377       for (ll=0; ll<sbs; ll++) {
5378         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5379         for (l=0; l<ncols; l++) {
5380           *bufA++ = vals[l];
5381         }
5382         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5383       }
5384     }
5385     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5386   }
5387   /* recvs and sends of a-array are completed */
5388   i = nrecvs;
5389   while (i--) {
5390     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5391   }
5392   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5393   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5394 
5395   if (scall == MAT_INITIAL_MATRIX) {
5396     /* put together the new matrix */
5397     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5398 
5399     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5400     /* Since these are PETSc arrays, change flags to free them as necessary. */
5401     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5402     b_oth->free_a  = PETSC_TRUE;
5403     b_oth->free_ij = PETSC_TRUE;
5404     b_oth->nonew   = 0;
5405 
5406     ierr = PetscFree(bufj);CHKERRQ(ierr);
5407     if (!startsj_s || !bufa_ptr) {
5408       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5409       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5410     } else {
5411       *startsj_s = sstartsj;
5412       *startsj_r = rstartsj;
5413       *bufa_ptr  = bufa;
5414     }
5415   }
5416   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5417   PetscFunctionReturn(0);
5418 }
5419 
5420 #undef __FUNCT__
5421 #define __FUNCT__ "MatGetCommunicationStructs"
5422 /*@C
5423   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5424 
5425   Not Collective
5426 
5427   Input Parameters:
5428 . A - The matrix in mpiaij format
5429 
5430   Output Parameter:
5431 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5432 . colmap - A map from global column index to local index into lvec
5433 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5434 
5435   Level: developer
5436 
5437 @*/
5438 #if defined(PETSC_USE_CTABLE)
5439 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5440 #else
5441 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5442 #endif
5443 {
5444   Mat_MPIAIJ *a;
5445 
5446   PetscFunctionBegin;
5447   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5448   PetscValidPointer(lvec, 2);
5449   PetscValidPointer(colmap, 3);
5450   PetscValidPointer(multScatter, 4);
5451   a = (Mat_MPIAIJ*) A->data;
5452   if (lvec) *lvec = a->lvec;
5453   if (colmap) *colmap = a->colmap;
5454   if (multScatter) *multScatter = a->Mvctx;
5455   PetscFunctionReturn(0);
5456 }
5457 
5458 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5459 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5460 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5461 
5462 #undef __FUNCT__
5463 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5464 /*
5465     Computes (B'*A')' since computing B*A directly is untenable
5466 
5467                n                       p                          p
5468         (              )       (              )         (                  )
5469       m (      A       )  *  n (       B      )   =   m (         C        )
5470         (              )       (              )         (                  )
5471 
5472 */
5473 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5474 {
5475   PetscErrorCode ierr;
5476   Mat            At,Bt,Ct;
5477 
5478   PetscFunctionBegin;
5479   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5480   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5481   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5482   ierr = MatDestroy(&At);CHKERRQ(ierr);
5483   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5484   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5485   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5486   PetscFunctionReturn(0);
5487 }
5488 
5489 #undef __FUNCT__
5490 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5491 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5492 {
5493   PetscErrorCode ierr;
5494   PetscInt       m=A->rmap->n,n=B->cmap->n;
5495   Mat            Cmat;
5496 
5497   PetscFunctionBegin;
5498   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5499   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5500   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5501   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5502   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5503   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5504   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5505   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5506 
5507   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5508 
5509   *C = Cmat;
5510   PetscFunctionReturn(0);
5511 }
5512 
5513 /* ----------------------------------------------------------------*/
5514 #undef __FUNCT__
5515 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5516 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5517 {
5518   PetscErrorCode ierr;
5519 
5520   PetscFunctionBegin;
5521   if (scall == MAT_INITIAL_MATRIX) {
5522     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5523     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5524     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5525   }
5526   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5527   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5528   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5529   PetscFunctionReturn(0);
5530 }
5531 
5532 #if defined(PETSC_HAVE_MUMPS)
5533 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5534 #endif
5535 #if defined(PETSC_HAVE_PASTIX)
5536 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5537 #endif
5538 #if defined(PETSC_HAVE_SUPERLU_DIST)
5539 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5540 #endif
5541 #if defined(PETSC_HAVE_CLIQUE)
5542 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5543 #endif
5544 
5545 /*MC
5546    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5547 
5548    Options Database Keys:
5549 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5550 
5551   Level: beginner
5552 
5553 .seealso: MatCreateAIJ()
5554 M*/
5555 
5556 #undef __FUNCT__
5557 #define __FUNCT__ "MatCreate_MPIAIJ"
5558 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5559 {
5560   Mat_MPIAIJ     *b;
5561   PetscErrorCode ierr;
5562   PetscMPIInt    size;
5563 
5564   PetscFunctionBegin;
5565   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5566 
5567   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5568   B->data       = (void*)b;
5569   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5570   B->assembled  = PETSC_FALSE;
5571   B->insertmode = NOT_SET_VALUES;
5572   b->size       = size;
5573 
5574   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5575 
5576   /* build cache for off array entries formed */
5577   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5578 
5579   b->donotstash  = PETSC_FALSE;
5580   b->colmap      = 0;
5581   b->garray      = 0;
5582   b->roworiented = PETSC_TRUE;
5583 
5584   /* stuff used for matrix vector multiply */
5585   b->lvec  = NULL;
5586   b->Mvctx = NULL;
5587 
5588   /* stuff for MatGetRow() */
5589   b->rowindices   = 0;
5590   b->rowvalues    = 0;
5591   b->getrowactive = PETSC_FALSE;
5592 
5593   /* flexible pointer used in CUSP/CUSPARSE classes */
5594   b->spptr = NULL;
5595 
5596 #if defined(PETSC_HAVE_MUMPS)
5597   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5598 #endif
5599 #if defined(PETSC_HAVE_PASTIX)
5600   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5601 #endif
5602 #if defined(PETSC_HAVE_SUPERLU_DIST)
5603   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5604 #endif
5605 #if defined(PETSC_HAVE_CLIQUE)
5606   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5607 #endif
5608   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5609   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5610   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5611   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5612   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5613   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5614   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5615   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5616   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5617   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5618   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5619   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5620   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5621   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5622   PetscFunctionReturn(0);
5623 }
5624 
5625 #undef __FUNCT__
5626 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5627 /*@C
5628      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5629          and "off-diagonal" part of the matrix in CSR format.
5630 
5631    Collective on MPI_Comm
5632 
5633    Input Parameters:
5634 +  comm - MPI communicator
5635 .  m - number of local rows (Cannot be PETSC_DECIDE)
5636 .  n - This value should be the same as the local size used in creating the
5637        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5638        calculated if N is given) For square matrices n is almost always m.
5639 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5640 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5641 .   i - row indices for "diagonal" portion of matrix
5642 .   j - column indices
5643 .   a - matrix values
5644 .   oi - row indices for "off-diagonal" portion of matrix
5645 .   oj - column indices
5646 -   oa - matrix values
5647 
5648    Output Parameter:
5649 .   mat - the matrix
5650 
5651    Level: advanced
5652 
5653    Notes:
5654        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5655        must free the arrays once the matrix has been destroyed and not before.
5656 
5657        The i and j indices are 0 based
5658 
5659        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5660 
5661        This sets local rows and cannot be used to set off-processor values.
5662 
5663        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5664        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5665        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5666        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5667        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5668        communication if it is known that only local entries will be set.
5669 
5670 .keywords: matrix, aij, compressed row, sparse, parallel
5671 
5672 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5673           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5674 C@*/
5675 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5676 {
5677   PetscErrorCode ierr;
5678   Mat_MPIAIJ     *maij;
5679 
5680   PetscFunctionBegin;
5681   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5682   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5683   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5684   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5685   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5686   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5687   maij = (Mat_MPIAIJ*) (*mat)->data;
5688 
5689   (*mat)->preallocated = PETSC_TRUE;
5690 
5691   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5692   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5693 
5694   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5695   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5696 
5697   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5698   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5699   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5700   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5701 
5702   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5703   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5704   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5705   PetscFunctionReturn(0);
5706 }
5707 
5708 /*
5709     Special version for direct calls from Fortran
5710 */
5711 #include <petsc-private/fortranimpl.h>
5712 
5713 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5714 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5715 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5716 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5717 #endif
5718 
5719 /* Change these macros so can be used in void function */
5720 #undef CHKERRQ
5721 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5722 #undef SETERRQ2
5723 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5724 #undef SETERRQ3
5725 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5726 #undef SETERRQ
5727 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5728 
5729 #undef __FUNCT__
5730 #define __FUNCT__ "matsetvaluesmpiaij_"
5731 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5732 {
5733   Mat            mat  = *mmat;
5734   PetscInt       m    = *mm, n = *mn;
5735   InsertMode     addv = *maddv;
5736   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5737   PetscScalar    value;
5738   PetscErrorCode ierr;
5739 
5740   MatCheckPreallocated(mat,1);
5741   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5742 
5743 #if defined(PETSC_USE_DEBUG)
5744   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5745 #endif
5746   {
5747     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5748     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5749     PetscBool roworiented = aij->roworiented;
5750 
5751     /* Some Variables required in the macro */
5752     Mat        A                 = aij->A;
5753     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5754     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5755     MatScalar  *aa               = a->a;
5756     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5757     Mat        B                 = aij->B;
5758     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5759     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5760     MatScalar  *ba               = b->a;
5761 
5762     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5763     PetscInt  nonew = a->nonew;
5764     MatScalar *ap1,*ap2;
5765 
5766     PetscFunctionBegin;
5767     for (i=0; i<m; i++) {
5768       if (im[i] < 0) continue;
5769 #if defined(PETSC_USE_DEBUG)
5770       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5771 #endif
5772       if (im[i] >= rstart && im[i] < rend) {
5773         row      = im[i] - rstart;
5774         lastcol1 = -1;
5775         rp1      = aj + ai[row];
5776         ap1      = aa + ai[row];
5777         rmax1    = aimax[row];
5778         nrow1    = ailen[row];
5779         low1     = 0;
5780         high1    = nrow1;
5781         lastcol2 = -1;
5782         rp2      = bj + bi[row];
5783         ap2      = ba + bi[row];
5784         rmax2    = bimax[row];
5785         nrow2    = bilen[row];
5786         low2     = 0;
5787         high2    = nrow2;
5788 
5789         for (j=0; j<n; j++) {
5790           if (roworiented) value = v[i*n+j];
5791           else value = v[i+j*m];
5792           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5793           if (in[j] >= cstart && in[j] < cend) {
5794             col = in[j] - cstart;
5795             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5796           } else if (in[j] < 0) continue;
5797 #if defined(PETSC_USE_DEBUG)
5798           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5799 #endif
5800           else {
5801             if (mat->was_assembled) {
5802               if (!aij->colmap) {
5803                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5804               }
5805 #if defined(PETSC_USE_CTABLE)
5806               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5807               col--;
5808 #else
5809               col = aij->colmap[in[j]] - 1;
5810 #endif
5811               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5812                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5813                 col  =  in[j];
5814                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5815                 B     = aij->B;
5816                 b     = (Mat_SeqAIJ*)B->data;
5817                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5818                 rp2   = bj + bi[row];
5819                 ap2   = ba + bi[row];
5820                 rmax2 = bimax[row];
5821                 nrow2 = bilen[row];
5822                 low2  = 0;
5823                 high2 = nrow2;
5824                 bm    = aij->B->rmap->n;
5825                 ba    = b->a;
5826               }
5827             } else col = in[j];
5828             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5829           }
5830         }
5831       } else if (!aij->donotstash) {
5832         if (roworiented) {
5833           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5834         } else {
5835           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5836         }
5837       }
5838     }
5839   }
5840   PetscFunctionReturnVoid();
5841 }
5842 
5843