xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 7034d64c92c192525a38fb0408afc37694e34045)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
187     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
188     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
189     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
190     rowners[0] = 0;
191     for (i=2; i<=size; i++) {
192       rowners[i] += rowners[i-1];
193     }
194     rstart = rowners[rank];
195     rend   = rowners[rank+1];
196     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
197     if (!rank) {
198       gmata = (Mat_SeqAIJ*) gmat->data;
199       /* send row lengths to all processors */
200       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201       for (i=1; i<size; i++) {
202 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
203       }
204       /* determine number diagonal and off-diagonal counts */
205       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
206       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
207       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
208       jj = 0;
209       for (i=0; i<m; i++) {
210 	for (j=0; j<dlens[i]; j++) {
211           if (gmata->j[jj] < rstart) ld[i]++;
212 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213 	  jj++;
214 	}
215       }
216       /* send column indices to other processes */
217       for (i=1; i<size; i++) {
218 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
220 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222 
223       /* send numerical values to other processes */
224       for (i=1; i<size; i++) {
225         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
227       }
228       gmataa = gmata->a;
229       gmataj = gmata->j;
230 
231     } else {
232       /* receive row lengths */
233       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
234       /* receive column indices */
235       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
236       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
237       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
238       /* determine number diagonal and off-diagonal counts */
239       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
240       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
241       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
242       jj = 0;
243       for (i=0; i<m; i++) {
244 	for (j=0; j<dlens[i]; j++) {
245           if (gmataj[jj] < rstart) ld[i]++;
246 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247 	  jj++;
248 	}
249       }
250       /* receive numerical values */
251       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
253     }
254     /* set preallocation */
255     for (i=0; i<m; i++) {
256       dlens[i] -= olens[i];
257     }
258     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
259     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
260 
261     for (i=0; i<m; i++) {
262       dlens[i] += olens[i];
263     }
264     cnt  = 0;
265     for (i=0; i<m; i++) {
266       row  = rstart + i;
267       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
268       cnt += dlens[i];
269     }
270     if (rank) {
271       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
272     }
273     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
274     ierr = PetscFree(rowners);CHKERRQ(ierr);
275     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276     *inmat = mat;
277   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
278     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280     mat   = *inmat;
281     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
282     if (!rank) {
283       /* send numerical values to other processes */
284       gmata = (Mat_SeqAIJ*) gmat->data;
285       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
286       gmataa = gmata->a;
287       for (i=1; i<size; i++) {
288         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
290       }
291       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292     } else {
293       /* receive numerical values from process 0*/
294       nz   = Ad->nz + Ao->nz;
295       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
296       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
297     }
298     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300     ad = Ad->a;
301     ao = Ao->a;
302     if (mat->rmap->n) {
303       i  = 0;
304       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
305       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
306     }
307     for (i=1; i<mat->rmap->n; i++) {
308       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
309       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
310     }
311     i--;
312     if (mat->rmap->n) {
313       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
314     }
315     if (rank) {
316       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
317     }
318   }
319   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   CHKMEMQ;
322   PetscFunctionReturn(0);
323 }
324 
325 /*
326   Local utility routine that creates a mapping from the global column
327 number to the local number in the off-diagonal part of the local
328 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
329 a slightly higher hash table cost; without it it is not scalable (each processor
330 has an order N integer array but is fast to acess.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
334 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
335 {
336   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
337   PetscErrorCode ierr;
338   PetscInt       n = aij->B->cmap->n,i;
339 
340   PetscFunctionBegin;
341   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
342 #if defined (PETSC_USE_CTABLE)
343   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
344   for (i=0; i<n; i++){
345     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
346   }
347 #else
348   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
349   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
350   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
351   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
352 #endif
353   PetscFunctionReturn(0);
354 }
355 
356 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
357 { \
358     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
359     lastcol1 = col;\
360     while (high1-low1 > 5) { \
361       t = (low1+high1)/2; \
362       if (rp1[t] > col) high1 = t; \
363       else             low1  = t; \
364     } \
365       for (_i=low1; _i<high1; _i++) { \
366         if (rp1[_i] > col) break; \
367         if (rp1[_i] == col) { \
368           if (addv == ADD_VALUES) ap1[_i] += value;   \
369           else                    ap1[_i] = value; \
370           goto a_noinsert; \
371         } \
372       }  \
373       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
374       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
375       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
376       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
377       N = nrow1++ - 1; a->nz++; high1++; \
378       /* shift up all the later entries in this row */ \
379       for (ii=N; ii>=_i; ii--) { \
380         rp1[ii+1] = rp1[ii]; \
381         ap1[ii+1] = ap1[ii]; \
382       } \
383       rp1[_i] = col;  \
384       ap1[_i] = value;  \
385       a_noinsert: ; \
386       ailen[row] = nrow1; \
387 }
388 
389 
390 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
391 { \
392     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
393     lastcol2 = col;\
394     while (high2-low2 > 5) { \
395       t = (low2+high2)/2; \
396       if (rp2[t] > col) high2 = t; \
397       else             low2  = t; \
398     } \
399     for (_i=low2; _i<high2; _i++) {		\
400       if (rp2[_i] > col) break;			\
401       if (rp2[_i] == col) {			      \
402 	if (addv == ADD_VALUES) ap2[_i] += value;     \
403 	else                    ap2[_i] = value;      \
404 	goto b_noinsert;			      \
405       }						      \
406     }							      \
407     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
408     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
409     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
410     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
411     N = nrow2++ - 1; b->nz++; high2++;					\
412     /* shift up all the later entries in this row */			\
413     for (ii=N; ii>=_i; ii--) {						\
414       rp2[ii+1] = rp2[ii];						\
415       ap2[ii+1] = ap2[ii];						\
416     }									\
417     rp2[_i] = col;							\
418     ap2[_i] = value;							\
419     b_noinsert: ;								\
420     bilen[row] = nrow2;							\
421 }
422 
423 #undef __FUNCT__
424 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
425 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
426 {
427   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
428   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
429   PetscErrorCode ierr;
430   PetscInt       l,*garray = mat->garray,diag;
431 
432   PetscFunctionBegin;
433   /* code only works for square matrices A */
434 
435   /* find size of row to the left of the diagonal part */
436   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
437   row  = row - diag;
438   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
439     if (garray[b->j[b->i[row]+l]] > diag) break;
440   }
441   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
442 
443   /* diagonal part */
444   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
445 
446   /* right of diagonal part */
447   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
448   PetscFunctionReturn(0);
449 }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatSetValues_MPIAIJ"
453 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
454 {
455   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
456   PetscScalar    value;
457   PetscErrorCode ierr;
458   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
459   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
460   PetscBool      roworiented = aij->roworiented;
461 
462   /* Some Variables required in the macro */
463   Mat            A = aij->A;
464   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
465   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
466   MatScalar      *aa = a->a;
467   PetscBool      ignorezeroentries = a->ignorezeroentries;
468   Mat            B = aij->B;
469   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
470   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
471   MatScalar      *ba = b->a;
472 
473   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
474   PetscInt       nonew;
475   MatScalar      *ap1,*ap2;
476 
477   PetscFunctionBegin;
478   if (v) PetscValidScalarPointer(v,6);
479   for (i=0; i<m; i++) {
480     if (im[i] < 0) continue;
481 #if defined(PETSC_USE_DEBUG)
482     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
483 #endif
484     if (im[i] >= rstart && im[i] < rend) {
485       row      = im[i] - rstart;
486       lastcol1 = -1;
487       rp1      = aj + ai[row];
488       ap1      = aa + ai[row];
489       rmax1    = aimax[row];
490       nrow1    = ailen[row];
491       low1     = 0;
492       high1    = nrow1;
493       lastcol2 = -1;
494       rp2      = bj + bi[row];
495       ap2      = ba + bi[row];
496       rmax2    = bimax[row];
497       nrow2    = bilen[row];
498       low2     = 0;
499       high2    = nrow2;
500 
501       for (j=0; j<n; j++) {
502         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
503         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
504         if (in[j] >= cstart && in[j] < cend){
505           col = in[j] - cstart;
506           nonew = a->nonew;
507           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
508         } else if (in[j] < 0) continue;
509 #if defined(PETSC_USE_DEBUG)
510         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
511 #endif
512         else {
513           if (mat->was_assembled) {
514             if (!aij->colmap) {
515               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
516             }
517 #if defined (PETSC_USE_CTABLE)
518             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
519 	    col--;
520 #else
521             col = aij->colmap[in[j]] - 1;
522 #endif
523             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
524               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
525               col =  in[j];
526               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
527               B = aij->B;
528               b = (Mat_SeqAIJ*)B->data;
529               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
530               rp2      = bj + bi[row];
531               ap2      = ba + bi[row];
532               rmax2    = bimax[row];
533               nrow2    = bilen[row];
534               low2     = 0;
535               high2    = nrow2;
536               bm       = aij->B->rmap->n;
537               ba = b->a;
538             }
539           } else col = in[j];
540           nonew = b->nonew;
541           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
542         }
543       }
544     } else {
545       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
546       if (!aij->donotstash) {
547         mat->assembled = PETSC_FALSE;
548         if (roworiented) {
549           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
550         } else {
551           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
552         }
553       }
554     }
555   }
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatGetValues_MPIAIJ"
561 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
562 {
563   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
564   PetscErrorCode ierr;
565   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
566   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
567 
568   PetscFunctionBegin;
569   for (i=0; i<m; i++) {
570     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
571     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
572     if (idxm[i] >= rstart && idxm[i] < rend) {
573       row = idxm[i] - rstart;
574       for (j=0; j<n; j++) {
575         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
576         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
577         if (idxn[j] >= cstart && idxn[j] < cend){
578           col = idxn[j] - cstart;
579           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
580         } else {
581           if (!aij->colmap) {
582             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
583           }
584 #if defined (PETSC_USE_CTABLE)
585           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
586           col --;
587 #else
588           col = aij->colmap[idxn[j]] - 1;
589 #endif
590           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
591           else {
592             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
593           }
594         }
595       }
596     } else {
597       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
598     }
599   }
600   PetscFunctionReturn(0);
601 }
602 
603 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
604 
605 #undef __FUNCT__
606 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
607 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
608 {
609   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
610   PetscErrorCode ierr;
611   PetscInt       nstash,reallocs;
612   InsertMode     addv;
613 
614   PetscFunctionBegin;
615   if (aij->donotstash || mat->nooffprocentries) {
616     PetscFunctionReturn(0);
617   }
618 
619   /* make sure all processors are either in INSERTMODE or ADDMODE */
620   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
621   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
622   mat->insertmode = addv; /* in case this processor had no cache */
623 
624   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
625   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
626   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
627   PetscFunctionReturn(0);
628 }
629 
630 #undef __FUNCT__
631 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
632 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
633 {
634   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
635   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
636   PetscErrorCode ierr;
637   PetscMPIInt    n;
638   PetscInt       i,j,rstart,ncols,flg;
639   PetscInt       *row,*col;
640   PetscBool      other_disassembled;
641   PetscScalar    *val;
642   InsertMode     addv = mat->insertmode;
643 
644   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
645   PetscFunctionBegin;
646   if (!aij->donotstash && !mat->nooffprocentries) {
647     while (1) {
648       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
649       if (!flg) break;
650 
651       for (i=0; i<n;) {
652         /* Now identify the consecutive vals belonging to the same row */
653         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
654         if (j < n) ncols = j-i;
655         else       ncols = n-i;
656         /* Now assemble all these values with a single function call */
657         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
658         i = j;
659       }
660     }
661     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
662   }
663   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
664   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
665 
666   /* determine if any processor has disassembled, if so we must
667      also disassemble ourselfs, in order that we may reassemble. */
668   /*
669      if nonzero structure of submatrix B cannot change then we know that
670      no processor disassembled thus we can skip this stuff
671   */
672   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
673     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
674     if (mat->was_assembled && !other_disassembled) {
675       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
676     }
677   }
678   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
679     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
680   }
681   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
682   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
683   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
684   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
685 
686   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
687   aij->rowvalues = 0;
688 
689   /* used by MatAXPY() */
690   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
691   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
692 
693   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
694   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
695   PetscFunctionReturn(0);
696 }
697 
698 #undef __FUNCT__
699 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
700 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
701 {
702   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
703   PetscErrorCode ierr;
704 
705   PetscFunctionBegin;
706   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
707   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatZeroRows_MPIAIJ"
713 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
714 {
715   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode    ierr;
717   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
718   PetscInt          i,*owners = A->rmap->range;
719   PetscInt          *nprocs,j,idx,nsends,row;
720   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
721   PetscInt          *rvalues,count,base,slen,*source;
722   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
723   MPI_Comm          comm = ((PetscObject)A)->comm;
724   MPI_Request       *send_waits,*recv_waits;
725   MPI_Status        recv_status,*send_status;
726   const PetscScalar *xx;
727   PetscScalar       *bb;
728 #if defined(PETSC_DEBUG)
729   PetscBool      found = PETSC_FALSE;
730 #endif
731 
732   PetscFunctionBegin;
733   /*  first count number of contributors to each processor */
734   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
735   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
736   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
737   j = 0;
738   for (i=0; i<N; i++) {
739     if (lastidx > (idx = rows[i])) j = 0;
740     lastidx = idx;
741     for (; j<size; j++) {
742       if (idx >= owners[j] && idx < owners[j+1]) {
743         nprocs[2*j]++;
744         nprocs[2*j+1] = 1;
745         owner[i] = j;
746 #if defined(PETSC_DEBUG)
747         found = PETSC_TRUE;
748 #endif
749         break;
750       }
751     }
752 #if defined(PETSC_DEBUG)
753     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
754     found = PETSC_FALSE;
755 #endif
756   }
757   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
758 
759   if (A->nooffproczerorows) {
760     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
761     nrecvs = nsends;
762     nmax   = N;
763   } else {
764     /* inform other processors of number of messages and max length*/
765     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
766   }
767 
768   /* post receives:   */
769   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
770   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
771   for (i=0; i<nrecvs; i++) {
772     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
773   }
774 
775   /* do sends:
776       1) starts[i] gives the starting index in svalues for stuff going to
777          the ith processor
778   */
779   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
780   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
781   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
782   starts[0] = 0;
783   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
784   for (i=0; i<N; i++) {
785     svalues[starts[owner[i]]++] = rows[i];
786   }
787 
788   starts[0] = 0;
789   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
790   count = 0;
791   for (i=0; i<size; i++) {
792     if (nprocs[2*i+1]) {
793       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
794     }
795   }
796   ierr = PetscFree(starts);CHKERRQ(ierr);
797 
798   base = owners[rank];
799 
800   /*  wait on receives */
801   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
802   count  = nrecvs; slen = 0;
803   while (count) {
804     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
805     /* unpack receives into our local space */
806     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
807     source[imdex]  = recv_status.MPI_SOURCE;
808     lens[imdex]    = n;
809     slen          += n;
810     count--;
811   }
812   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
813 
814   /* move the data into the send scatter */
815   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
816   count = 0;
817   for (i=0; i<nrecvs; i++) {
818     values = rvalues + i*nmax;
819     for (j=0; j<lens[i]; j++) {
820       lrows[count++] = values[j] - base;
821     }
822   }
823   ierr = PetscFree(rvalues);CHKERRQ(ierr);
824   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
825   ierr = PetscFree(owner);CHKERRQ(ierr);
826   ierr = PetscFree(nprocs);CHKERRQ(ierr);
827 
828   /* fix right hand side if needed */
829   if (x && b) {
830     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
831     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
832     for (i=0; i<slen; i++) {
833       bb[lrows[i]] = diag*xx[lrows[i]];
834     }
835     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
836     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
837   }
838   /*
839         Zero the required rows. If the "diagonal block" of the matrix
840      is square and the user wishes to set the diagonal we use separate
841      code so that MatSetValues() is not called for each diagonal allocating
842      new memory, thus calling lots of mallocs and slowing things down.
843 
844   */
845   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
846   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
847   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
848     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
849   } else if (diag != 0.0) {
850     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
851     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
852       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
853 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
854     }
855     for (i = 0; i < slen; i++) {
856       row  = lrows[i] + rstart;
857       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
858     }
859     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
860     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
861   } else {
862     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
863   }
864   ierr = PetscFree(lrows);CHKERRQ(ierr);
865 
866   /* wait on sends */
867   if (nsends) {
868     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
869     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
870     ierr = PetscFree(send_status);CHKERRQ(ierr);
871   }
872   ierr = PetscFree(send_waits);CHKERRQ(ierr);
873   ierr = PetscFree(svalues);CHKERRQ(ierr);
874   PetscFunctionReturn(0);
875 }
876 
877 #undef __FUNCT__
878 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
879 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
880 {
881   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
882   PetscErrorCode    ierr;
883   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
884   PetscInt          i,*owners = A->rmap->range;
885   PetscInt          *nprocs,j,idx,nsends;
886   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
887   PetscInt          *rvalues,count,base,slen,*source;
888   PetscInt          *lens,*lrows,*values,m;
889   MPI_Comm          comm = ((PetscObject)A)->comm;
890   MPI_Request       *send_waits,*recv_waits;
891   MPI_Status        recv_status,*send_status;
892   const PetscScalar *xx;
893   PetscScalar       *bb,*mask;
894   Vec               xmask,lmask;
895   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
896   const PetscInt    *aj, *ii,*ridx;
897   PetscScalar       *aa;
898 #if defined(PETSC_DEBUG)
899   PetscBool         found = PETSC_FALSE;
900 #endif
901 
902   PetscFunctionBegin;
903   /*  first count number of contributors to each processor */
904   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
905   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
906   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
907   j = 0;
908   for (i=0; i<N; i++) {
909     if (lastidx > (idx = rows[i])) j = 0;
910     lastidx = idx;
911     for (; j<size; j++) {
912       if (idx >= owners[j] && idx < owners[j+1]) {
913         nprocs[2*j]++;
914         nprocs[2*j+1] = 1;
915         owner[i] = j;
916 #if defined(PETSC_DEBUG)
917         found = PETSC_TRUE;
918 #endif
919         break;
920       }
921     }
922 #if defined(PETSC_DEBUG)
923     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
924     found = PETSC_FALSE;
925 #endif
926   }
927   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
928 
929   /* inform other processors of number of messages and max length*/
930   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
931 
932   /* post receives:   */
933   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
934   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
935   for (i=0; i<nrecvs; i++) {
936     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
937   }
938 
939   /* do sends:
940       1) starts[i] gives the starting index in svalues for stuff going to
941          the ith processor
942   */
943   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
944   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
945   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
946   starts[0] = 0;
947   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
948   for (i=0; i<N; i++) {
949     svalues[starts[owner[i]]++] = rows[i];
950   }
951 
952   starts[0] = 0;
953   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
954   count = 0;
955   for (i=0; i<size; i++) {
956     if (nprocs[2*i+1]) {
957       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
958     }
959   }
960   ierr = PetscFree(starts);CHKERRQ(ierr);
961 
962   base = owners[rank];
963 
964   /*  wait on receives */
965   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
966   count  = nrecvs; slen = 0;
967   while (count) {
968     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
969     /* unpack receives into our local space */
970     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
971     source[imdex]  = recv_status.MPI_SOURCE;
972     lens[imdex]    = n;
973     slen          += n;
974     count--;
975   }
976   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
977 
978   /* move the data into the send scatter */
979   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
980   count = 0;
981   for (i=0; i<nrecvs; i++) {
982     values = rvalues + i*nmax;
983     for (j=0; j<lens[i]; j++) {
984       lrows[count++] = values[j] - base;
985     }
986   }
987   ierr = PetscFree(rvalues);CHKERRQ(ierr);
988   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
989   ierr = PetscFree(owner);CHKERRQ(ierr);
990   ierr = PetscFree(nprocs);CHKERRQ(ierr);
991   /* lrows are the local rows to be zeroed, slen is the number of local rows */
992 
993   /* zero diagonal part of matrix */
994   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
995 
996   /* handle off diagonal part of matrix */
997   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
998   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
999   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1000   for (i=0; i<slen; i++) {
1001     bb[lrows[i]] = 1;
1002   }
1003   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1004   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1007   if (x) {
1008     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1009     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1010     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1011     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1012   }
1013   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1014 
1015   /* remove zeroed rows of off diagonal matrix */
1016   ii = aij->i;
1017   for (i=0; i<slen; i++) {
1018     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1019   }
1020 
1021   /* loop over all elements of off process part of matrix zeroing removed columns*/
1022   if (aij->compressedrow.use){
1023     m    = aij->compressedrow.nrows;
1024     ii   = aij->compressedrow.i;
1025     ridx = aij->compressedrow.rindex;
1026     for (i=0; i<m; i++){
1027       n   = ii[i+1] - ii[i];
1028       aj  = aij->j + ii[i];
1029       aa  = aij->a + ii[i];
1030 
1031       for (j=0; j<n; j++) {
1032         if (PetscAbsScalar(mask[*aj])) {
1033           if (b) bb[*ridx] -= *aa*xx[*aj];
1034           *aa        = 0.0;
1035         }
1036         aa++;
1037         aj++;
1038       }
1039       ridx++;
1040     }
1041   } else { /* do not use compressed row format */
1042     m = l->B->rmap->n;
1043     for (i=0; i<m; i++) {
1044       n   = ii[i+1] - ii[i];
1045       aj  = aij->j + ii[i];
1046       aa  = aij->a + ii[i];
1047       for (j=0; j<n; j++) {
1048         if (PetscAbsScalar(mask[*aj])) {
1049           if (b) bb[i] -= *aa*xx[*aj];
1050           *aa    = 0.0;
1051         }
1052         aa++;
1053         aj++;
1054       }
1055     }
1056   }
1057   if (x) {
1058     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1059     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1060   }
1061   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1062   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1063   ierr = PetscFree(lrows);CHKERRQ(ierr);
1064 
1065   /* wait on sends */
1066   if (nsends) {
1067     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1068     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1069     ierr = PetscFree(send_status);CHKERRQ(ierr);
1070   }
1071   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1072   ierr = PetscFree(svalues);CHKERRQ(ierr);
1073 
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatMult_MPIAIJ"
1079 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1080 {
1081   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1082   PetscErrorCode ierr;
1083   PetscInt       nt;
1084 
1085   PetscFunctionBegin;
1086   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1087   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1088   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1089   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1090   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1091   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1097 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1098 {
1099   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1100   PetscErrorCode ierr;
1101 
1102   PetscFunctionBegin;
1103   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1109 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1110 {
1111   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1112   PetscErrorCode ierr;
1113 
1114   PetscFunctionBegin;
1115   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1116   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1117   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1118   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1124 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1125 {
1126   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1127   PetscErrorCode ierr;
1128   PetscBool      merged;
1129 
1130   PetscFunctionBegin;
1131   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1132   /* do nondiagonal part */
1133   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1134   if (!merged) {
1135     /* send it on its way */
1136     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1137     /* do local part */
1138     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1139     /* receive remote parts: note this assumes the values are not actually */
1140     /* added in yy until the next line, */
1141     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1142   } else {
1143     /* do local part */
1144     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1145     /* send it on its way */
1146     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1147     /* values actually were received in the Begin() but we need to call this nop */
1148     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1149   }
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 EXTERN_C_BEGIN
1154 #undef __FUNCT__
1155 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1156 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1157 {
1158   MPI_Comm       comm;
1159   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1160   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1161   IS             Me,Notme;
1162   PetscErrorCode ierr;
1163   PetscInt       M,N,first,last,*notme,i;
1164   PetscMPIInt    size;
1165 
1166   PetscFunctionBegin;
1167 
1168   /* Easy test: symmetric diagonal block */
1169   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1170   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1171   if (!*f) PetscFunctionReturn(0);
1172   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1173   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1174   if (size == 1) PetscFunctionReturn(0);
1175 
1176   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1177   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1178   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1179   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1180   for (i=0; i<first; i++) notme[i] = i;
1181   for (i=last; i<M; i++) notme[i-last+first] = i;
1182   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1183   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1184   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1185   Aoff = Aoffs[0];
1186   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1187   Boff = Boffs[0];
1188   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1189   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1190   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1191   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1192   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1193   ierr = PetscFree(notme);CHKERRQ(ierr);
1194    PetscFunctionReturn(0);
1195 }
1196 EXTERN_C_END
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1200 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1201 {
1202   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1203   PetscErrorCode ierr;
1204 
1205   PetscFunctionBegin;
1206   /* do nondiagonal part */
1207   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1208   /* send it on its way */
1209   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1210   /* do local part */
1211   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1212   /* receive remote parts */
1213   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 /*
1218   This only works correctly for square matrices where the subblock A->A is the
1219    diagonal block
1220 */
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1223 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1224 {
1225   PetscErrorCode ierr;
1226   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1227 
1228   PetscFunctionBegin;
1229   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1230   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1231   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatScale_MPIAIJ"
1237 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1238 {
1239   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1240   PetscErrorCode ierr;
1241 
1242   PetscFunctionBegin;
1243   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1244   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 #undef __FUNCT__
1249 #define __FUNCT__ "MatDestroy_MPIAIJ"
1250 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1251 {
1252   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1253   PetscErrorCode ierr;
1254 
1255   PetscFunctionBegin;
1256 #if defined(PETSC_USE_LOG)
1257   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1258 #endif
1259   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1260   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1261   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1262   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1263 #if defined (PETSC_USE_CTABLE)
1264   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1265 #else
1266   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1267 #endif
1268   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1269   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1270   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1271   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1272   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1273   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1274 
1275   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1284   PetscFunctionReturn(0);
1285 }
1286 
1287 #undef __FUNCT__
1288 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1289 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1290 {
1291   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1292   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1293   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1294   PetscErrorCode    ierr;
1295   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1296   int               fd;
1297   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1298   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1299   PetscScalar       *column_values;
1300   PetscInt          message_count,flowcontrolcount;
1301 
1302   PetscFunctionBegin;
1303   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1304   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1305   nz   = A->nz + B->nz;
1306   if (!rank) {
1307     header[0] = MAT_FILE_CLASSID;
1308     header[1] = mat->rmap->N;
1309     header[2] = mat->cmap->N;
1310     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1311     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1312     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1313     /* get largest number of rows any processor has */
1314     rlen = mat->rmap->n;
1315     range = mat->rmap->range;
1316     for (i=1; i<size; i++) {
1317       rlen = PetscMax(rlen,range[i+1] - range[i]);
1318     }
1319   } else {
1320     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1321     rlen = mat->rmap->n;
1322   }
1323 
1324   /* load up the local row counts */
1325   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1326   for (i=0; i<mat->rmap->n; i++) {
1327     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1328   }
1329 
1330   /* store the row lengths to the file */
1331   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1332   if (!rank) {
1333     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1334     for (i=1; i<size; i++) {
1335       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1336       rlen = range[i+1] - range[i];
1337       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1338       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1339     }
1340     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1341   } else {
1342     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1343     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1344     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1345   }
1346   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1347 
1348   /* load up the local column indices */
1349   nzmax = nz; /* )th processor needs space a largest processor needs */
1350   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1351   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1352   cnt  = 0;
1353   for (i=0; i<mat->rmap->n; i++) {
1354     for (j=B->i[i]; j<B->i[i+1]; j++) {
1355       if ( (col = garray[B->j[j]]) > cstart) break;
1356       column_indices[cnt++] = col;
1357     }
1358     for (k=A->i[i]; k<A->i[i+1]; k++) {
1359       column_indices[cnt++] = A->j[k] + cstart;
1360     }
1361     for (; j<B->i[i+1]; j++) {
1362       column_indices[cnt++] = garray[B->j[j]];
1363     }
1364   }
1365   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1366 
1367   /* store the column indices to the file */
1368    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1369   if (!rank) {
1370     MPI_Status status;
1371     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1372     for (i=1; i<size; i++) {
1373       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1374       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1375       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1376       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1377       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1378     }
1379      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1380   } else {
1381     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1382     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1383     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1384     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1385   }
1386   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1387 
1388   /* load up the local column values */
1389   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1390   cnt  = 0;
1391   for (i=0; i<mat->rmap->n; i++) {
1392     for (j=B->i[i]; j<B->i[i+1]; j++) {
1393       if ( garray[B->j[j]] > cstart) break;
1394       column_values[cnt++] = B->a[j];
1395     }
1396     for (k=A->i[i]; k<A->i[i+1]; k++) {
1397       column_values[cnt++] = A->a[k];
1398     }
1399     for (; j<B->i[i+1]; j++) {
1400       column_values[cnt++] = B->a[j];
1401     }
1402   }
1403   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1404 
1405   /* store the column values to the file */
1406    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1407   if (!rank) {
1408     MPI_Status status;
1409     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1410     for (i=1; i<size; i++) {
1411        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1412       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1413       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1414       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1415       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1416     }
1417     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1418   } else {
1419     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1420     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1421     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1422     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1423   }
1424   ierr = PetscFree(column_values);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1430 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1431 {
1432   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1433   PetscErrorCode    ierr;
1434   PetscMPIInt       rank = aij->rank,size = aij->size;
1435   PetscBool         isdraw,iascii,isbinary;
1436   PetscViewer       sviewer;
1437   PetscViewerFormat format;
1438 
1439   PetscFunctionBegin;
1440   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1441   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1442   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1443   if (iascii) {
1444     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1445     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1446       MatInfo    info;
1447       PetscBool  inodes;
1448 
1449       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1450       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1451       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1452       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1453       if (!inodes) {
1454         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1455 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1456       } else {
1457         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1458 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1459       }
1460       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1461       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1462       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1463       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1464       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1465       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1467       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1468       PetscFunctionReturn(0);
1469     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1470       PetscInt   inodecount,inodelimit,*inodes;
1471       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1472       if (inodes) {
1473         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1474       } else {
1475         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1476       }
1477       PetscFunctionReturn(0);
1478     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1479       PetscFunctionReturn(0);
1480     }
1481   } else if (isbinary) {
1482     if (size == 1) {
1483       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1484       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1485     } else {
1486       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1487     }
1488     PetscFunctionReturn(0);
1489   } else if (isdraw) {
1490     PetscDraw  draw;
1491     PetscBool  isnull;
1492     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1493     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1494   }
1495 
1496   if (size == 1) {
1497     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1498     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1499   } else {
1500     /* assemble the entire matrix onto first processor. */
1501     Mat         A;
1502     Mat_SeqAIJ  *Aloc;
1503     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1504     MatScalar   *a;
1505 
1506     if (mat->rmap->N > 1024) {
1507       PetscBool  flg = PETSC_FALSE;
1508 
1509       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1510       if (!flg) {
1511         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1512       }
1513     }
1514 
1515     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1516     if (!rank) {
1517       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1518     } else {
1519       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1520     }
1521     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1522     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1523     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1524     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1525     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1526 
1527     /* copy over the A part */
1528     Aloc = (Mat_SeqAIJ*)aij->A->data;
1529     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1530     row = mat->rmap->rstart;
1531     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1532     for (i=0; i<m; i++) {
1533       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1534       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1535     }
1536     aj = Aloc->j;
1537     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1538 
1539     /* copy over the B part */
1540     Aloc = (Mat_SeqAIJ*)aij->B->data;
1541     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1542     row  = mat->rmap->rstart;
1543     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1544     ct   = cols;
1545     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1546     for (i=0; i<m; i++) {
1547       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1548       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1549     }
1550     ierr = PetscFree(ct);CHKERRQ(ierr);
1551     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1552     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1553     /*
1554        Everyone has to call to draw the matrix since the graphics waits are
1555        synchronized across all processors that share the PetscDraw object
1556     */
1557     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1558     if (!rank) {
1559       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1560       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1561       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1562       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1563     }
1564     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1565     ierr = MatDestroy(&A);CHKERRQ(ierr);
1566   }
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatView_MPIAIJ"
1572 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1573 {
1574   PetscErrorCode ierr;
1575   PetscBool      iascii,isdraw,issocket,isbinary;
1576 
1577   PetscFunctionBegin;
1578   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1579   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1580   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1581   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1582   if (iascii || isdraw || isbinary || issocket) {
1583     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1584   } else {
1585     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1586   }
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 #undef __FUNCT__
1591 #define __FUNCT__ "MatSOR_MPIAIJ"
1592 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1593 {
1594   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1595   PetscErrorCode ierr;
1596   Vec            bb1 = 0;
1597   PetscBool      hasop;
1598 
1599   PetscFunctionBegin;
1600   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1601     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1602   }
1603 
1604   if (flag == SOR_APPLY_UPPER) {
1605     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1606     PetscFunctionReturn(0);
1607   }
1608 
1609   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1610     if (flag & SOR_ZERO_INITIAL_GUESS) {
1611       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1612       its--;
1613     }
1614 
1615     while (its--) {
1616       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1617       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1618 
1619       /* update rhs: bb1 = bb - B*x */
1620       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1621       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1622 
1623       /* local sweep */
1624       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1625     }
1626   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1627     if (flag & SOR_ZERO_INITIAL_GUESS) {
1628       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1629       its--;
1630     }
1631     while (its--) {
1632       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1633       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1634 
1635       /* update rhs: bb1 = bb - B*x */
1636       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1637       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1638 
1639       /* local sweep */
1640       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1641     }
1642   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1643     if (flag & SOR_ZERO_INITIAL_GUESS) {
1644       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1645       its--;
1646     }
1647     while (its--) {
1648       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1649       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1650 
1651       /* update rhs: bb1 = bb - B*x */
1652       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1653       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1654 
1655       /* local sweep */
1656       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1657     }
1658   }  else if (flag & SOR_EISENSTAT) {
1659     Vec         xx1;
1660 
1661     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1662     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1663 
1664     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1665     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1666     if (!mat->diag) {
1667       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1668       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1669     }
1670     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1671     if (hasop) {
1672       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1673     } else {
1674       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1675     }
1676     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1677 
1678     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1679 
1680     /* local sweep */
1681     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1682     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1683     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1684   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1685 
1686   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1687   PetscFunctionReturn(0);
1688 }
1689 
1690 #undef __FUNCT__
1691 #define __FUNCT__ "MatPermute_MPIAIJ"
1692 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1693 {
1694   MPI_Comm       comm;
1695   PetscInt       first,local_rowsize,local_colsize;
1696   const PetscInt *rows;
1697   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1698   PetscErrorCode ierr;
1699 
1700   PetscFunctionBegin;
1701   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1702   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1703   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1704   /* collect the global row permutation and invert it */
1705   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1706   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1707   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1708   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1709   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1710   /* get the local target indices */
1711   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1712   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1713   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1714   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1715   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1716   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1717   /* the column permutation is so much easier;
1718      make a local version of 'colp' and invert it */
1719   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1720   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1721   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1722   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1723   /* now we just get the submatrix */
1724   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1725   /* clean up */
1726   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1727   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1728   PetscFunctionReturn(0);
1729 }
1730 
1731 #undef __FUNCT__
1732 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1733 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1734 {
1735   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1736   Mat            A = mat->A,B = mat->B;
1737   PetscErrorCode ierr;
1738   PetscReal      isend[5],irecv[5];
1739 
1740   PetscFunctionBegin;
1741   info->block_size     = 1.0;
1742   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1743   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1744   isend[3] = info->memory;  isend[4] = info->mallocs;
1745   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1746   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1747   isend[3] += info->memory;  isend[4] += info->mallocs;
1748   if (flag == MAT_LOCAL) {
1749     info->nz_used      = isend[0];
1750     info->nz_allocated = isend[1];
1751     info->nz_unneeded  = isend[2];
1752     info->memory       = isend[3];
1753     info->mallocs      = isend[4];
1754   } else if (flag == MAT_GLOBAL_MAX) {
1755     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1756     info->nz_used      = irecv[0];
1757     info->nz_allocated = irecv[1];
1758     info->nz_unneeded  = irecv[2];
1759     info->memory       = irecv[3];
1760     info->mallocs      = irecv[4];
1761   } else if (flag == MAT_GLOBAL_SUM) {
1762     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1763     info->nz_used      = irecv[0];
1764     info->nz_allocated = irecv[1];
1765     info->nz_unneeded  = irecv[2];
1766     info->memory       = irecv[3];
1767     info->mallocs      = irecv[4];
1768   }
1769   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1770   info->fill_ratio_needed = 0;
1771   info->factor_mallocs    = 0;
1772 
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 #undef __FUNCT__
1777 #define __FUNCT__ "MatSetOption_MPIAIJ"
1778 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1779 {
1780   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1781   PetscErrorCode ierr;
1782 
1783   PetscFunctionBegin;
1784   switch (op) {
1785   case MAT_NEW_NONZERO_LOCATIONS:
1786   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1787   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1788   case MAT_KEEP_NONZERO_PATTERN:
1789   case MAT_NEW_NONZERO_LOCATION_ERR:
1790   case MAT_USE_INODES:
1791   case MAT_IGNORE_ZERO_ENTRIES:
1792     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1793     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1794     break;
1795   case MAT_ROW_ORIENTED:
1796     a->roworiented = flg;
1797     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1798     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1799     break;
1800   case MAT_NEW_DIAGONALS:
1801     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1802     break;
1803   case MAT_IGNORE_OFF_PROC_ENTRIES:
1804     a->donotstash = flg;
1805     break;
1806   case MAT_SPD:
1807     A->spd_set                         = PETSC_TRUE;
1808     A->spd                             = flg;
1809     if (flg) {
1810       A->symmetric                     = PETSC_TRUE;
1811       A->structurally_symmetric        = PETSC_TRUE;
1812       A->symmetric_set                 = PETSC_TRUE;
1813       A->structurally_symmetric_set    = PETSC_TRUE;
1814     }
1815     break;
1816   case MAT_SYMMETRIC:
1817     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1818     break;
1819   case MAT_STRUCTURALLY_SYMMETRIC:
1820     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1821     break;
1822   case MAT_HERMITIAN:
1823     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1824     break;
1825   case MAT_SYMMETRY_ETERNAL:
1826     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1827     break;
1828   default:
1829     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1830   }
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 #undef __FUNCT__
1835 #define __FUNCT__ "MatGetRow_MPIAIJ"
1836 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1837 {
1838   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1839   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1840   PetscErrorCode ierr;
1841   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1842   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1843   PetscInt       *cmap,*idx_p;
1844 
1845   PetscFunctionBegin;
1846   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1847   mat->getrowactive = PETSC_TRUE;
1848 
1849   if (!mat->rowvalues && (idx || v)) {
1850     /*
1851         allocate enough space to hold information from the longest row.
1852     */
1853     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1854     PetscInt   max = 1,tmp;
1855     for (i=0; i<matin->rmap->n; i++) {
1856       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1857       if (max < tmp) { max = tmp; }
1858     }
1859     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1860   }
1861 
1862   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1863   lrow = row - rstart;
1864 
1865   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1866   if (!v)   {pvA = 0; pvB = 0;}
1867   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1868   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1869   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1870   nztot = nzA + nzB;
1871 
1872   cmap  = mat->garray;
1873   if (v  || idx) {
1874     if (nztot) {
1875       /* Sort by increasing column numbers, assuming A and B already sorted */
1876       PetscInt imark = -1;
1877       if (v) {
1878         *v = v_p = mat->rowvalues;
1879         for (i=0; i<nzB; i++) {
1880           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1881           else break;
1882         }
1883         imark = i;
1884         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1885         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1886       }
1887       if (idx) {
1888         *idx = idx_p = mat->rowindices;
1889         if (imark > -1) {
1890           for (i=0; i<imark; i++) {
1891             idx_p[i] = cmap[cworkB[i]];
1892           }
1893         } else {
1894           for (i=0; i<nzB; i++) {
1895             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1896             else break;
1897           }
1898           imark = i;
1899         }
1900         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1901         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1902       }
1903     } else {
1904       if (idx) *idx = 0;
1905       if (v)   *v   = 0;
1906     }
1907   }
1908   *nz = nztot;
1909   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1910   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1916 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1917 {
1918   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1919 
1920   PetscFunctionBegin;
1921   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1922   aij->getrowactive = PETSC_FALSE;
1923   PetscFunctionReturn(0);
1924 }
1925 
1926 #undef __FUNCT__
1927 #define __FUNCT__ "MatNorm_MPIAIJ"
1928 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1929 {
1930   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1931   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1932   PetscErrorCode ierr;
1933   PetscInt       i,j,cstart = mat->cmap->rstart;
1934   PetscReal      sum = 0.0;
1935   MatScalar      *v;
1936 
1937   PetscFunctionBegin;
1938   if (aij->size == 1) {
1939     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1940   } else {
1941     if (type == NORM_FROBENIUS) {
1942       v = amat->a;
1943       for (i=0; i<amat->nz; i++) {
1944 #if defined(PETSC_USE_COMPLEX)
1945         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1946 #else
1947         sum += (*v)*(*v); v++;
1948 #endif
1949       }
1950       v = bmat->a;
1951       for (i=0; i<bmat->nz; i++) {
1952 #if defined(PETSC_USE_COMPLEX)
1953         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1954 #else
1955         sum += (*v)*(*v); v++;
1956 #endif
1957       }
1958       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1959       *norm = PetscSqrtReal(*norm);
1960     } else if (type == NORM_1) { /* max column norm */
1961       PetscReal *tmp,*tmp2;
1962       PetscInt  *jj,*garray = aij->garray;
1963       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1964       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1965       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1966       *norm = 0.0;
1967       v = amat->a; jj = amat->j;
1968       for (j=0; j<amat->nz; j++) {
1969         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1970       }
1971       v = bmat->a; jj = bmat->j;
1972       for (j=0; j<bmat->nz; j++) {
1973         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1974       }
1975       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1976       for (j=0; j<mat->cmap->N; j++) {
1977         if (tmp2[j] > *norm) *norm = tmp2[j];
1978       }
1979       ierr = PetscFree(tmp);CHKERRQ(ierr);
1980       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1981     } else if (type == NORM_INFINITY) { /* max row norm */
1982       PetscReal ntemp = 0.0;
1983       for (j=0; j<aij->A->rmap->n; j++) {
1984         v = amat->a + amat->i[j];
1985         sum = 0.0;
1986         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1987           sum += PetscAbsScalar(*v); v++;
1988         }
1989         v = bmat->a + bmat->i[j];
1990         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1991           sum += PetscAbsScalar(*v); v++;
1992         }
1993         if (sum > ntemp) ntemp = sum;
1994       }
1995       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1996     } else {
1997       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1998     }
1999   }
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 #undef __FUNCT__
2004 #define __FUNCT__ "MatTranspose_MPIAIJ"
2005 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2006 {
2007   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2008   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2009   PetscErrorCode ierr;
2010   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2011   PetscInt       cstart=A->cmap->rstart,ncol;
2012   Mat            B;
2013   MatScalar      *array;
2014 
2015   PetscFunctionBegin;
2016   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2017 
2018   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2019   ai = Aloc->i; aj = Aloc->j;
2020   bi = Bloc->i; bj = Bloc->j;
2021   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2022     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2023     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2024     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2025     for (i=0; i<ai[ma]; i++){
2026       d_nnz[aj[i]] ++;
2027       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2028     }
2029 
2030     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2031     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2032     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2033     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2034     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2035     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2036   } else {
2037     B = *matout;
2038   }
2039 
2040   /* copy over the A part */
2041   array = Aloc->a;
2042   row = A->rmap->rstart;
2043   for (i=0; i<ma; i++) {
2044     ncol = ai[i+1]-ai[i];
2045     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2046     row++; array += ncol; aj += ncol;
2047   }
2048   aj = Aloc->j;
2049   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2050 
2051   /* copy over the B part */
2052   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2053   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2054   array = Bloc->a;
2055   row = A->rmap->rstart;
2056   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2057   cols_tmp = cols;
2058   for (i=0; i<mb; i++) {
2059     ncol = bi[i+1]-bi[i];
2060     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2061     row++; array += ncol; cols_tmp += ncol;
2062   }
2063   ierr = PetscFree(cols);CHKERRQ(ierr);
2064 
2065   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2066   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2067   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2068     *matout = B;
2069   } else {
2070     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2071   }
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 #undef __FUNCT__
2076 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2077 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2078 {
2079   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2080   Mat            a = aij->A,b = aij->B;
2081   PetscErrorCode ierr;
2082   PetscInt       s1,s2,s3;
2083 
2084   PetscFunctionBegin;
2085   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2086   if (rr) {
2087     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2088     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2089     /* Overlap communication with computation. */
2090     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2091   }
2092   if (ll) {
2093     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2094     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2095     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2096   }
2097   /* scale  the diagonal block */
2098   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2099 
2100   if (rr) {
2101     /* Do a scatter end and then right scale the off-diagonal block */
2102     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2103     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2104   }
2105 
2106   PetscFunctionReturn(0);
2107 }
2108 
2109 #undef __FUNCT__
2110 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2111 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2112 {
2113   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2114   PetscErrorCode ierr;
2115 
2116   PetscFunctionBegin;
2117   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2118   PetscFunctionReturn(0);
2119 }
2120 
2121 #undef __FUNCT__
2122 #define __FUNCT__ "MatEqual_MPIAIJ"
2123 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2124 {
2125   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2126   Mat            a,b,c,d;
2127   PetscBool      flg;
2128   PetscErrorCode ierr;
2129 
2130   PetscFunctionBegin;
2131   a = matA->A; b = matA->B;
2132   c = matB->A; d = matB->B;
2133 
2134   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2135   if (flg) {
2136     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2137   }
2138   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 #undef __FUNCT__
2143 #define __FUNCT__ "MatCopy_MPIAIJ"
2144 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2145 {
2146   PetscErrorCode ierr;
2147   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2148   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2149 
2150   PetscFunctionBegin;
2151   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2152   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2153     /* because of the column compression in the off-processor part of the matrix a->B,
2154        the number of columns in a->B and b->B may be different, hence we cannot call
2155        the MatCopy() directly on the two parts. If need be, we can provide a more
2156        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2157        then copying the submatrices */
2158     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2159   } else {
2160     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2161     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2162   }
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 #undef __FUNCT__
2167 #define __FUNCT__ "MatSetUp_MPIAIJ"
2168 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2169 {
2170   PetscErrorCode ierr;
2171 
2172   PetscFunctionBegin;
2173   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 #undef __FUNCT__
2178 #define __FUNCT__ "MatAXPY_MPIAIJ"
2179 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2180 {
2181   PetscErrorCode ierr;
2182   PetscInt       i;
2183   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2184   PetscBLASInt   bnz,one=1;
2185   Mat_SeqAIJ     *x,*y;
2186 
2187   PetscFunctionBegin;
2188   if (str == SAME_NONZERO_PATTERN) {
2189     PetscScalar alpha = a;
2190     x = (Mat_SeqAIJ *)xx->A->data;
2191     y = (Mat_SeqAIJ *)yy->A->data;
2192     bnz = PetscBLASIntCast(x->nz);
2193     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2194     x = (Mat_SeqAIJ *)xx->B->data;
2195     y = (Mat_SeqAIJ *)yy->B->data;
2196     bnz = PetscBLASIntCast(x->nz);
2197     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2198   } else if (str == SUBSET_NONZERO_PATTERN) {
2199     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2200 
2201     x = (Mat_SeqAIJ *)xx->B->data;
2202     y = (Mat_SeqAIJ *)yy->B->data;
2203     if (y->xtoy && y->XtoY != xx->B) {
2204       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2205       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2206     }
2207     if (!y->xtoy) { /* get xtoy */
2208       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2209       y->XtoY = xx->B;
2210       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2211     }
2212     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2213   } else {
2214     Mat B;
2215     PetscInt *nnz_d,*nnz_o;
2216     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2217     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2218     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2219     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2220     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2221     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2222     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2223     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2224     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2225     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2226     ierr = MatHeaderReplace(Y,B);
2227     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2228     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2229   }
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2234 
2235 #undef __FUNCT__
2236 #define __FUNCT__ "MatConjugate_MPIAIJ"
2237 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2238 {
2239 #if defined(PETSC_USE_COMPLEX)
2240   PetscErrorCode ierr;
2241   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2242 
2243   PetscFunctionBegin;
2244   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2245   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2246 #else
2247   PetscFunctionBegin;
2248 #endif
2249   PetscFunctionReturn(0);
2250 }
2251 
2252 #undef __FUNCT__
2253 #define __FUNCT__ "MatRealPart_MPIAIJ"
2254 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2255 {
2256   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2257   PetscErrorCode ierr;
2258 
2259   PetscFunctionBegin;
2260   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2261   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2262   PetscFunctionReturn(0);
2263 }
2264 
2265 #undef __FUNCT__
2266 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2267 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2268 {
2269   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2270   PetscErrorCode ierr;
2271 
2272   PetscFunctionBegin;
2273   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2274   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2275   PetscFunctionReturn(0);
2276 }
2277 
2278 #ifdef PETSC_HAVE_PBGL
2279 
2280 #include <boost/parallel/mpi/bsp_process_group.hpp>
2281 #include <boost/graph/distributed/ilu_default_graph.hpp>
2282 #include <boost/graph/distributed/ilu_0_block.hpp>
2283 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2284 #include <boost/graph/distributed/petsc/interface.hpp>
2285 #include <boost/multi_array.hpp>
2286 #include <boost/parallel/distributed_property_map->hpp>
2287 
2288 #undef __FUNCT__
2289 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2290 /*
2291   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2292 */
2293 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2294 {
2295   namespace petsc = boost::distributed::petsc;
2296 
2297   namespace graph_dist = boost::graph::distributed;
2298   using boost::graph::distributed::ilu_default::process_group_type;
2299   using boost::graph::ilu_permuted;
2300 
2301   PetscBool       row_identity, col_identity;
2302   PetscContainer  c;
2303   PetscInt        m, n, M, N;
2304   PetscErrorCode  ierr;
2305 
2306   PetscFunctionBegin;
2307   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2308   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2309   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2310   if (!row_identity || !col_identity) {
2311     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2312   }
2313 
2314   process_group_type pg;
2315   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2316   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2317   lgraph_type&   level_graph = *lgraph_p;
2318   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2319 
2320   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2321   ilu_permuted(level_graph);
2322 
2323   /* put together the new matrix */
2324   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2325   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2326   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2327   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2328   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2329   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2330   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2331 
2332   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2333   ierr = PetscContainerSetPointer(c, lgraph_p);
2334   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2335   ierr = PetscContainerDestroy(&c);
2336   PetscFunctionReturn(0);
2337 }
2338 
2339 #undef __FUNCT__
2340 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2341 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2342 {
2343   PetscFunctionBegin;
2344   PetscFunctionReturn(0);
2345 }
2346 
2347 #undef __FUNCT__
2348 #define __FUNCT__ "MatSolve_MPIAIJ"
2349 /*
2350   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2351 */
2352 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2353 {
2354   namespace graph_dist = boost::graph::distributed;
2355 
2356   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2357   lgraph_type*   lgraph_p;
2358   PetscContainer c;
2359   PetscErrorCode ierr;
2360 
2361   PetscFunctionBegin;
2362   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2363   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2364   ierr = VecCopy(b, x);CHKERRQ(ierr);
2365 
2366   PetscScalar* array_x;
2367   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2368   PetscInt sx;
2369   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2370 
2371   PetscScalar* array_b;
2372   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2373   PetscInt sb;
2374   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2375 
2376   lgraph_type&   level_graph = *lgraph_p;
2377   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2378 
2379   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2380   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2381                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2382 
2383   typedef boost::iterator_property_map<array_ref_type::iterator,
2384                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2385   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2386                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2387 
2388   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2389 
2390   PetscFunctionReturn(0);
2391 }
2392 #endif
2393 
2394 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2395   PetscInt       nzlocal,nsends,nrecvs;
2396   PetscMPIInt    *send_rank,*recv_rank;
2397   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2398   PetscScalar    *sbuf_a,**rbuf_a;
2399   PetscErrorCode (*Destroy)(Mat);
2400 } Mat_Redundant;
2401 
2402 #undef __FUNCT__
2403 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2404 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2405 {
2406   PetscErrorCode       ierr;
2407   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2408   PetscInt             i;
2409 
2410   PetscFunctionBegin;
2411   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2412   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2413   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2414   for (i=0; i<redund->nrecvs; i++){
2415     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2416     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2417   }
2418   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2419   ierr = PetscFree(redund);CHKERRQ(ierr);
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 #undef __FUNCT__
2424 #define __FUNCT__ "MatDestroy_MatRedundant"
2425 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2426 {
2427   PetscErrorCode  ierr;
2428   PetscContainer  container;
2429   Mat_Redundant   *redund=PETSC_NULL;
2430 
2431   PetscFunctionBegin;
2432   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2433   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2434   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2435   A->ops->destroy = redund->Destroy;
2436   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2437   if (A->ops->destroy) {
2438     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2439   }
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 #undef __FUNCT__
2444 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2445 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2446 {
2447   PetscMPIInt    rank,size;
2448   MPI_Comm       comm=((PetscObject)mat)->comm;
2449   PetscErrorCode ierr;
2450   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2451   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2452   PetscInt       *rowrange=mat->rmap->range;
2453   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2454   Mat            A=aij->A,B=aij->B,C=*matredundant;
2455   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2456   PetscScalar    *sbuf_a;
2457   PetscInt       nzlocal=a->nz+b->nz;
2458   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2459   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2460   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2461   MatScalar      *aworkA,*aworkB;
2462   PetscScalar    *vals;
2463   PetscMPIInt    tag1,tag2,tag3,imdex;
2464   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2465                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2466   MPI_Status     recv_status,*send_status;
2467   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2468   PetscInt       **rbuf_j=PETSC_NULL;
2469   PetscScalar    **rbuf_a=PETSC_NULL;
2470   Mat_Redundant  *redund=PETSC_NULL;
2471   PetscContainer container;
2472 
2473   PetscFunctionBegin;
2474   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2475   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2476 
2477   if (reuse == MAT_REUSE_MATRIX) {
2478     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2479     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2480     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2481     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2482     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2483     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2484     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2485     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2486 
2487     nsends    = redund->nsends;
2488     nrecvs    = redund->nrecvs;
2489     send_rank = redund->send_rank;
2490     recv_rank = redund->recv_rank;
2491     sbuf_nz   = redund->sbuf_nz;
2492     rbuf_nz   = redund->rbuf_nz;
2493     sbuf_j    = redund->sbuf_j;
2494     sbuf_a    = redund->sbuf_a;
2495     rbuf_j    = redund->rbuf_j;
2496     rbuf_a    = redund->rbuf_a;
2497   }
2498 
2499   if (reuse == MAT_INITIAL_MATRIX){
2500     PetscMPIInt  subrank,subsize;
2501     PetscInt     nleftover,np_subcomm;
2502     /* get the destination processors' id send_rank, nsends and nrecvs */
2503     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2504     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2505     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2506     np_subcomm = size/nsubcomm;
2507     nleftover  = size - nsubcomm*np_subcomm;
2508     nsends = 0; nrecvs = 0;
2509     for (i=0; i<size; i++){ /* i=rank*/
2510       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2511         send_rank[nsends] = i; nsends++;
2512         recv_rank[nrecvs++] = i;
2513       }
2514     }
2515     if (rank >= size - nleftover){/* this proc is a leftover processor */
2516       i = size-nleftover-1;
2517       j = 0;
2518       while (j < nsubcomm - nleftover){
2519         send_rank[nsends++] = i;
2520         i--; j++;
2521       }
2522     }
2523 
2524     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2525       for (i=0; i<nleftover; i++){
2526         recv_rank[nrecvs++] = size-nleftover+i;
2527       }
2528     }
2529 
2530     /* allocate sbuf_j, sbuf_a */
2531     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2532     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2533     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2534   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2535 
2536   /* copy mat's local entries into the buffers */
2537   if (reuse == MAT_INITIAL_MATRIX){
2538     rownz_max = 0;
2539     rptr = sbuf_j;
2540     cols = sbuf_j + rend-rstart + 1;
2541     vals = sbuf_a;
2542     rptr[0] = 0;
2543     for (i=0; i<rend-rstart; i++){
2544       row = i + rstart;
2545       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2546       ncols  = nzA + nzB;
2547       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2548       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2549       /* load the column indices for this row into cols */
2550       lwrite = 0;
2551       for (l=0; l<nzB; l++) {
2552         if ((ctmp = bmap[cworkB[l]]) < cstart){
2553           vals[lwrite]   = aworkB[l];
2554           cols[lwrite++] = ctmp;
2555         }
2556       }
2557       for (l=0; l<nzA; l++){
2558         vals[lwrite]   = aworkA[l];
2559         cols[lwrite++] = cstart + cworkA[l];
2560       }
2561       for (l=0; l<nzB; l++) {
2562         if ((ctmp = bmap[cworkB[l]]) >= cend){
2563           vals[lwrite]   = aworkB[l];
2564           cols[lwrite++] = ctmp;
2565         }
2566       }
2567       vals += ncols;
2568       cols += ncols;
2569       rptr[i+1] = rptr[i] + ncols;
2570       if (rownz_max < ncols) rownz_max = ncols;
2571     }
2572     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2573   } else { /* only copy matrix values into sbuf_a */
2574     rptr = sbuf_j;
2575     vals = sbuf_a;
2576     rptr[0] = 0;
2577     for (i=0; i<rend-rstart; i++){
2578       row = i + rstart;
2579       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2580       ncols  = nzA + nzB;
2581       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2582       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2583       lwrite = 0;
2584       for (l=0; l<nzB; l++) {
2585         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2586       }
2587       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2588       for (l=0; l<nzB; l++) {
2589         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2590       }
2591       vals += ncols;
2592       rptr[i+1] = rptr[i] + ncols;
2593     }
2594   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2595 
2596   /* send nzlocal to others, and recv other's nzlocal */
2597   /*--------------------------------------------------*/
2598   if (reuse == MAT_INITIAL_MATRIX){
2599     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2600     s_waits2 = s_waits3 + nsends;
2601     s_waits1 = s_waits2 + nsends;
2602     r_waits1 = s_waits1 + nsends;
2603     r_waits2 = r_waits1 + nrecvs;
2604     r_waits3 = r_waits2 + nrecvs;
2605   } else {
2606     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2607     r_waits3 = s_waits3 + nsends;
2608   }
2609 
2610   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2611   if (reuse == MAT_INITIAL_MATRIX){
2612     /* get new tags to keep the communication clean */
2613     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2614     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2615     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2616 
2617     /* post receives of other's nzlocal */
2618     for (i=0; i<nrecvs; i++){
2619       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2620     }
2621     /* send nzlocal to others */
2622     for (i=0; i<nsends; i++){
2623       sbuf_nz[i] = nzlocal;
2624       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2625     }
2626     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2627     count = nrecvs;
2628     while (count) {
2629       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2630       recv_rank[imdex] = recv_status.MPI_SOURCE;
2631       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2632       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2633 
2634       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2635       rbuf_nz[imdex] += i + 2;
2636       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2637       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2638       count--;
2639     }
2640     /* wait on sends of nzlocal */
2641     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2642     /* send mat->i,j to others, and recv from other's */
2643     /*------------------------------------------------*/
2644     for (i=0; i<nsends; i++){
2645       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2646       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2647     }
2648     /* wait on receives of mat->i,j */
2649     /*------------------------------*/
2650     count = nrecvs;
2651     while (count) {
2652       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2653       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2654       count--;
2655     }
2656     /* wait on sends of mat->i,j */
2657     /*---------------------------*/
2658     if (nsends) {
2659       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2660     }
2661   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2662 
2663   /* post receives, send and receive mat->a */
2664   /*----------------------------------------*/
2665   for (imdex=0; imdex<nrecvs; imdex++) {
2666     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2667   }
2668   for (i=0; i<nsends; i++){
2669     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2670   }
2671   count = nrecvs;
2672   while (count) {
2673     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2674     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2675     count--;
2676   }
2677   if (nsends) {
2678     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2679   }
2680 
2681   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2682 
2683   /* create redundant matrix */
2684   /*-------------------------*/
2685   if (reuse == MAT_INITIAL_MATRIX){
2686     /* compute rownz_max for preallocation */
2687     for (imdex=0; imdex<nrecvs; imdex++){
2688       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2689       rptr = rbuf_j[imdex];
2690       for (i=0; i<j; i++){
2691         ncols = rptr[i+1] - rptr[i];
2692         if (rownz_max < ncols) rownz_max = ncols;
2693       }
2694     }
2695 
2696     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2697     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2698     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2699     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2700     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2701   } else {
2702     C = *matredundant;
2703   }
2704 
2705   /* insert local matrix entries */
2706   rptr = sbuf_j;
2707   cols = sbuf_j + rend-rstart + 1;
2708   vals = sbuf_a;
2709   for (i=0; i<rend-rstart; i++){
2710     row   = i + rstart;
2711     ncols = rptr[i+1] - rptr[i];
2712     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2713     vals += ncols;
2714     cols += ncols;
2715   }
2716   /* insert received matrix entries */
2717   for (imdex=0; imdex<nrecvs; imdex++){
2718     rstart = rowrange[recv_rank[imdex]];
2719     rend   = rowrange[recv_rank[imdex]+1];
2720     rptr = rbuf_j[imdex];
2721     cols = rbuf_j[imdex] + rend-rstart + 1;
2722     vals = rbuf_a[imdex];
2723     for (i=0; i<rend-rstart; i++){
2724       row   = i + rstart;
2725       ncols = rptr[i+1] - rptr[i];
2726       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2727       vals += ncols;
2728       cols += ncols;
2729     }
2730   }
2731   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2732   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2733   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2734   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2735   if (reuse == MAT_INITIAL_MATRIX) {
2736     PetscContainer container;
2737     *matredundant = C;
2738     /* create a supporting struct and attach it to C for reuse */
2739     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2740     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2741     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2742     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2743     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2744     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2745 
2746     redund->nzlocal = nzlocal;
2747     redund->nsends  = nsends;
2748     redund->nrecvs  = nrecvs;
2749     redund->send_rank = send_rank;
2750     redund->recv_rank = recv_rank;
2751     redund->sbuf_nz = sbuf_nz;
2752     redund->rbuf_nz = rbuf_nz;
2753     redund->sbuf_j  = sbuf_j;
2754     redund->sbuf_a  = sbuf_a;
2755     redund->rbuf_j  = rbuf_j;
2756     redund->rbuf_a  = rbuf_a;
2757 
2758     redund->Destroy = C->ops->destroy;
2759     C->ops->destroy = MatDestroy_MatRedundant;
2760   }
2761   PetscFunctionReturn(0);
2762 }
2763 
2764 #undef __FUNCT__
2765 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2766 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2767 {
2768   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2769   PetscErrorCode ierr;
2770   PetscInt       i,*idxb = 0;
2771   PetscScalar    *va,*vb;
2772   Vec            vtmp;
2773 
2774   PetscFunctionBegin;
2775   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2776   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2777   if (idx) {
2778     for (i=0; i<A->rmap->n; i++) {
2779       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2780     }
2781   }
2782 
2783   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2784   if (idx) {
2785     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2786   }
2787   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2788   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2789 
2790   for (i=0; i<A->rmap->n; i++){
2791     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2792       va[i] = vb[i];
2793       if (idx) idx[i] = a->garray[idxb[i]];
2794     }
2795   }
2796 
2797   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2798   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2799   ierr = PetscFree(idxb);CHKERRQ(ierr);
2800   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2801   PetscFunctionReturn(0);
2802 }
2803 
2804 #undef __FUNCT__
2805 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2806 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2807 {
2808   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2809   PetscErrorCode ierr;
2810   PetscInt       i,*idxb = 0;
2811   PetscScalar    *va,*vb;
2812   Vec            vtmp;
2813 
2814   PetscFunctionBegin;
2815   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2816   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2817   if (idx) {
2818     for (i=0; i<A->cmap->n; i++) {
2819       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2820     }
2821   }
2822 
2823   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2824   if (idx) {
2825     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2826   }
2827   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2828   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2829 
2830   for (i=0; i<A->rmap->n; i++){
2831     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2832       va[i] = vb[i];
2833       if (idx) idx[i] = a->garray[idxb[i]];
2834     }
2835   }
2836 
2837   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2838   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2839   ierr = PetscFree(idxb);CHKERRQ(ierr);
2840   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2841   PetscFunctionReturn(0);
2842 }
2843 
2844 #undef __FUNCT__
2845 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2846 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2847 {
2848   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2849   PetscInt       n      = A->rmap->n;
2850   PetscInt       cstart = A->cmap->rstart;
2851   PetscInt      *cmap   = mat->garray;
2852   PetscInt      *diagIdx, *offdiagIdx;
2853   Vec            diagV, offdiagV;
2854   PetscScalar   *a, *diagA, *offdiagA;
2855   PetscInt       r;
2856   PetscErrorCode ierr;
2857 
2858   PetscFunctionBegin;
2859   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2860   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2861   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2862   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2863   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2864   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2865   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2866   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2867   for(r = 0; r < n; ++r) {
2868     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2869       a[r]   = diagA[r];
2870       idx[r] = cstart + diagIdx[r];
2871     } else {
2872       a[r]   = offdiagA[r];
2873       idx[r] = cmap[offdiagIdx[r]];
2874     }
2875   }
2876   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2877   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2878   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2879   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2880   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2881   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2882   PetscFunctionReturn(0);
2883 }
2884 
2885 #undef __FUNCT__
2886 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2887 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2888 {
2889   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2890   PetscInt       n      = A->rmap->n;
2891   PetscInt       cstart = A->cmap->rstart;
2892   PetscInt      *cmap   = mat->garray;
2893   PetscInt      *diagIdx, *offdiagIdx;
2894   Vec            diagV, offdiagV;
2895   PetscScalar   *a, *diagA, *offdiagA;
2896   PetscInt       r;
2897   PetscErrorCode ierr;
2898 
2899   PetscFunctionBegin;
2900   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2901   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2902   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2903   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2904   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2905   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2906   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2907   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2908   for(r = 0; r < n; ++r) {
2909     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2910       a[r]   = diagA[r];
2911       idx[r] = cstart + diagIdx[r];
2912     } else {
2913       a[r]   = offdiagA[r];
2914       idx[r] = cmap[offdiagIdx[r]];
2915     }
2916   }
2917   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2918   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2919   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2920   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2921   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2922   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2923   PetscFunctionReturn(0);
2924 }
2925 
2926 #undef __FUNCT__
2927 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2928 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2929 {
2930   PetscErrorCode ierr;
2931   Mat            *dummy;
2932 
2933   PetscFunctionBegin;
2934   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2935   *newmat = *dummy;
2936   ierr = PetscFree(dummy);CHKERRQ(ierr);
2937   PetscFunctionReturn(0);
2938 }
2939 
2940 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2941 
2942 #undef __FUNCT__
2943 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2944 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2945 {
2946   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2947   PetscErrorCode ierr;
2948 
2949   PetscFunctionBegin;
2950   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2951   PetscFunctionReturn(0);
2952 }
2953 
2954 
2955 /* -------------------------------------------------------------------*/
2956 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2957        MatGetRow_MPIAIJ,
2958        MatRestoreRow_MPIAIJ,
2959        MatMult_MPIAIJ,
2960 /* 4*/ MatMultAdd_MPIAIJ,
2961        MatMultTranspose_MPIAIJ,
2962        MatMultTransposeAdd_MPIAIJ,
2963 #ifdef PETSC_HAVE_PBGL
2964        MatSolve_MPIAIJ,
2965 #else
2966        0,
2967 #endif
2968        0,
2969        0,
2970 /*10*/ 0,
2971        0,
2972        0,
2973        MatSOR_MPIAIJ,
2974        MatTranspose_MPIAIJ,
2975 /*15*/ MatGetInfo_MPIAIJ,
2976        MatEqual_MPIAIJ,
2977        MatGetDiagonal_MPIAIJ,
2978        MatDiagonalScale_MPIAIJ,
2979        MatNorm_MPIAIJ,
2980 /*20*/ MatAssemblyBegin_MPIAIJ,
2981        MatAssemblyEnd_MPIAIJ,
2982        MatSetOption_MPIAIJ,
2983        MatZeroEntries_MPIAIJ,
2984 /*24*/ MatZeroRows_MPIAIJ,
2985        0,
2986 #ifdef PETSC_HAVE_PBGL
2987        0,
2988 #else
2989        0,
2990 #endif
2991        0,
2992        0,
2993 /*29*/ MatSetUp_MPIAIJ,
2994 #ifdef PETSC_HAVE_PBGL
2995        0,
2996 #else
2997        0,
2998 #endif
2999        0,
3000        0,
3001        0,
3002 /*34*/ MatDuplicate_MPIAIJ,
3003        0,
3004        0,
3005        0,
3006        0,
3007 /*39*/ MatAXPY_MPIAIJ,
3008        MatGetSubMatrices_MPIAIJ,
3009        MatIncreaseOverlap_MPIAIJ,
3010        MatGetValues_MPIAIJ,
3011        MatCopy_MPIAIJ,
3012 /*44*/ MatGetRowMax_MPIAIJ,
3013        MatScale_MPIAIJ,
3014        0,
3015        0,
3016        MatZeroRowsColumns_MPIAIJ,
3017 /*49*/ 0,
3018        0,
3019        0,
3020        0,
3021        0,
3022 /*54*/ MatFDColoringCreate_MPIAIJ,
3023        0,
3024        MatSetUnfactored_MPIAIJ,
3025        MatPermute_MPIAIJ,
3026        0,
3027 /*59*/ MatGetSubMatrix_MPIAIJ,
3028        MatDestroy_MPIAIJ,
3029        MatView_MPIAIJ,
3030        0,
3031        0,
3032 /*64*/ 0,
3033        0,
3034        0,
3035        0,
3036        0,
3037 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3038        MatGetRowMinAbs_MPIAIJ,
3039        0,
3040        MatSetColoring_MPIAIJ,
3041 #if defined(PETSC_HAVE_ADIC)
3042        MatSetValuesAdic_MPIAIJ,
3043 #else
3044        0,
3045 #endif
3046        MatSetValuesAdifor_MPIAIJ,
3047 /*75*/ MatFDColoringApply_AIJ,
3048        0,
3049        0,
3050        0,
3051        0,
3052 /*80*/ 0,
3053        0,
3054        0,
3055 /*83*/ MatLoad_MPIAIJ,
3056        0,
3057        0,
3058        0,
3059        0,
3060        0,
3061 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3062        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3063        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3064        MatPtAP_Basic,
3065        MatPtAPSymbolic_MPIAIJ,
3066 /*94*/ MatPtAPNumeric_MPIAIJ,
3067        0,
3068        0,
3069        0,
3070        0,
3071 /*99*/ 0,
3072        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3073        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3074        MatConjugate_MPIAIJ,
3075        0,
3076 /*104*/MatSetValuesRow_MPIAIJ,
3077        MatRealPart_MPIAIJ,
3078        MatImaginaryPart_MPIAIJ,
3079        0,
3080        0,
3081 /*109*/0,
3082        MatGetRedundantMatrix_MPIAIJ,
3083        MatGetRowMin_MPIAIJ,
3084        0,
3085        0,
3086 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3087        0,
3088        0,
3089        0,
3090        0,
3091 /*119*/0,
3092        0,
3093        0,
3094        0,
3095        MatGetMultiProcBlock_MPIAIJ,
3096 /*124*/MatFindNonZeroRows_MPIAIJ,
3097        MatGetColumnNorms_MPIAIJ,
3098        MatInvertBlockDiagonal_MPIAIJ,
3099        0,
3100        MatGetSubMatricesParallel_MPIAIJ,
3101 /*129*/0,
3102        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3103        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3104        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3105        0,
3106 /*134*/0,
3107        0,
3108        0,
3109        0,
3110        0
3111 };
3112 
3113 /* ----------------------------------------------------------------------------------------*/
3114 
3115 EXTERN_C_BEGIN
3116 #undef __FUNCT__
3117 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3118 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3119 {
3120   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3121   PetscErrorCode ierr;
3122 
3123   PetscFunctionBegin;
3124   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3125   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3126   PetscFunctionReturn(0);
3127 }
3128 EXTERN_C_END
3129 
3130 EXTERN_C_BEGIN
3131 #undef __FUNCT__
3132 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3133 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3134 {
3135   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3136   PetscErrorCode ierr;
3137 
3138   PetscFunctionBegin;
3139   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3140   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3141   PetscFunctionReturn(0);
3142 }
3143 EXTERN_C_END
3144 
3145 EXTERN_C_BEGIN
3146 #undef __FUNCT__
3147 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3148 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3149 {
3150   Mat_MPIAIJ     *b;
3151   PetscErrorCode ierr;
3152   PetscInt       i;
3153   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3154 
3155   PetscFunctionBegin;
3156   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3157   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3158   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3159   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3160   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3161   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3162 
3163   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3164   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3165   if (d_nnz) {
3166     for (i=0; i<B->rmap->n; i++) {
3167       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3168     }
3169   }
3170   if (o_nnz) {
3171     for (i=0; i<B->rmap->n; i++) {
3172       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3173     }
3174   }
3175   b = (Mat_MPIAIJ*)B->data;
3176 
3177   if (!B->preallocated) {
3178     /* Explicitly create 2 MATSEQAIJ matrices. */
3179     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3180     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3181     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3182     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3183     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3184     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3185     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3186     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3187   }
3188 
3189   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3190   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3191   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3192   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3193   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3194   B->preallocated = PETSC_TRUE;
3195   PetscFunctionReturn(0);
3196 }
3197 EXTERN_C_END
3198 
3199 #undef __FUNCT__
3200 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3201 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3202 {
3203   Mat            mat;
3204   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3205   PetscErrorCode ierr;
3206 
3207   PetscFunctionBegin;
3208   *newmat       = 0;
3209   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3210   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3211   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3212   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3213   a    = (Mat_MPIAIJ*)mat->data;
3214 
3215   mat->factortype    = matin->factortype;
3216   mat->rmap->bs      = matin->rmap->bs;
3217   mat->assembled    = PETSC_TRUE;
3218   mat->insertmode   = NOT_SET_VALUES;
3219   mat->preallocated = PETSC_TRUE;
3220 
3221   a->size           = oldmat->size;
3222   a->rank           = oldmat->rank;
3223   a->donotstash     = oldmat->donotstash;
3224   a->roworiented    = oldmat->roworiented;
3225   a->rowindices     = 0;
3226   a->rowvalues      = 0;
3227   a->getrowactive   = PETSC_FALSE;
3228 
3229   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3230   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3231 
3232   if (oldmat->colmap) {
3233 #if defined (PETSC_USE_CTABLE)
3234     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3235 #else
3236     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3237     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3238     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3239 #endif
3240   } else a->colmap = 0;
3241   if (oldmat->garray) {
3242     PetscInt len;
3243     len  = oldmat->B->cmap->n;
3244     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3245     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3246     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3247   } else a->garray = 0;
3248 
3249   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3250   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3251   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3252   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3253   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3254   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3255   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3256   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3257   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3258   *newmat = mat;
3259   PetscFunctionReturn(0);
3260 }
3261 
3262 
3263 
3264 #undef __FUNCT__
3265 #define __FUNCT__ "MatLoad_MPIAIJ"
3266 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3267 {
3268   PetscScalar    *vals,*svals;
3269   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3270   PetscErrorCode ierr;
3271   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3272   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3273   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3274   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3275   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3276   int            fd;
3277 
3278   PetscFunctionBegin;
3279   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3280   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3281   if (!rank) {
3282     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3283     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3284     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3285   }
3286 
3287   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3288 
3289   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3290   M = header[1]; N = header[2];
3291   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3292   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3293   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3294 
3295   /* If global sizes are set, check if they are consistent with that given in the file */
3296   if (sizesset) {
3297     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3298   }
3299   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3300   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3301 
3302   /* determine ownership of all rows */
3303   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3304   else m = newMat->rmap->n; /* Set by user */
3305 
3306   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3307   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3308 
3309   /* First process needs enough room for process with most rows */
3310   if (!rank) {
3311     mmax       = rowners[1];
3312     for (i=2; i<size; i++) {
3313       mmax = PetscMax(mmax,rowners[i]);
3314     }
3315   } else mmax = m;
3316 
3317   rowners[0] = 0;
3318   for (i=2; i<=size; i++) {
3319     rowners[i] += rowners[i-1];
3320   }
3321   rstart = rowners[rank];
3322   rend   = rowners[rank+1];
3323 
3324   /* distribute row lengths to all processors */
3325   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3326   if (!rank) {
3327     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3328     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3329     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3330     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3331     for (j=0; j<m; j++) {
3332       procsnz[0] += ourlens[j];
3333     }
3334     for (i=1; i<size; i++) {
3335       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3336       /* calculate the number of nonzeros on each processor */
3337       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3338         procsnz[i] += rowlengths[j];
3339       }
3340       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3341     }
3342     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3343   } else {
3344     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3345   }
3346 
3347   if (!rank) {
3348     /* determine max buffer needed and allocate it */
3349     maxnz = 0;
3350     for (i=0; i<size; i++) {
3351       maxnz = PetscMax(maxnz,procsnz[i]);
3352     }
3353     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3354 
3355     /* read in my part of the matrix column indices  */
3356     nz   = procsnz[0];
3357     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3358     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3359 
3360     /* read in every one elses and ship off */
3361     for (i=1; i<size; i++) {
3362       nz     = procsnz[i];
3363       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3364       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3365     }
3366     ierr = PetscFree(cols);CHKERRQ(ierr);
3367   } else {
3368     /* determine buffer space needed for message */
3369     nz = 0;
3370     for (i=0; i<m; i++) {
3371       nz += ourlens[i];
3372     }
3373     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3374 
3375     /* receive message of column indices*/
3376     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3377   }
3378 
3379   /* determine column ownership if matrix is not square */
3380   if (N != M) {
3381     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3382     else n = newMat->cmap->n;
3383     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3384     cstart = cend - n;
3385   } else {
3386     cstart = rstart;
3387     cend   = rend;
3388     n      = cend - cstart;
3389   }
3390 
3391   /* loop over local rows, determining number of off diagonal entries */
3392   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3393   jj = 0;
3394   for (i=0; i<m; i++) {
3395     for (j=0; j<ourlens[i]; j++) {
3396       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3397       jj++;
3398     }
3399   }
3400 
3401   for (i=0; i<m; i++) {
3402     ourlens[i] -= offlens[i];
3403   }
3404   if (!sizesset) {
3405     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3406   }
3407   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3408 
3409   for (i=0; i<m; i++) {
3410     ourlens[i] += offlens[i];
3411   }
3412 
3413   if (!rank) {
3414     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3415 
3416     /* read in my part of the matrix numerical values  */
3417     nz   = procsnz[0];
3418     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3419 
3420     /* insert into matrix */
3421     jj      = rstart;
3422     smycols = mycols;
3423     svals   = vals;
3424     for (i=0; i<m; i++) {
3425       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3426       smycols += ourlens[i];
3427       svals   += ourlens[i];
3428       jj++;
3429     }
3430 
3431     /* read in other processors and ship out */
3432     for (i=1; i<size; i++) {
3433       nz     = procsnz[i];
3434       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3435       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3436     }
3437     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3438   } else {
3439     /* receive numeric values */
3440     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3441 
3442     /* receive message of values*/
3443     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3444 
3445     /* insert into matrix */
3446     jj      = rstart;
3447     smycols = mycols;
3448     svals   = vals;
3449     for (i=0; i<m; i++) {
3450       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3451       smycols += ourlens[i];
3452       svals   += ourlens[i];
3453       jj++;
3454     }
3455   }
3456   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3457   ierr = PetscFree(vals);CHKERRQ(ierr);
3458   ierr = PetscFree(mycols);CHKERRQ(ierr);
3459   ierr = PetscFree(rowners);CHKERRQ(ierr);
3460 
3461   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3462   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3463   PetscFunctionReturn(0);
3464 }
3465 
3466 #undef __FUNCT__
3467 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3468 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3469 {
3470   PetscErrorCode ierr;
3471   IS             iscol_local;
3472   PetscInt       csize;
3473 
3474   PetscFunctionBegin;
3475   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3476   if (call == MAT_REUSE_MATRIX) {
3477     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3478     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3479   } else {
3480     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3481   }
3482   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3483   if (call == MAT_INITIAL_MATRIX) {
3484     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3485     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3486   }
3487   PetscFunctionReturn(0);
3488 }
3489 
3490 #undef __FUNCT__
3491 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3492 /*
3493     Not great since it makes two copies of the submatrix, first an SeqAIJ
3494   in local and then by concatenating the local matrices the end result.
3495   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3496 
3497   Note: This requires a sequential iscol with all indices.
3498 */
3499 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3500 {
3501   PetscErrorCode ierr;
3502   PetscMPIInt    rank,size;
3503   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3504   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3505   Mat            *local,M,Mreuse;
3506   MatScalar      *vwork,*aa;
3507   MPI_Comm       comm = ((PetscObject)mat)->comm;
3508   Mat_SeqAIJ     *aij;
3509 
3510 
3511   PetscFunctionBegin;
3512   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3513   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3514 
3515   if (call ==  MAT_REUSE_MATRIX) {
3516     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3517     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3518     local = &Mreuse;
3519     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3520   } else {
3521     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3522     Mreuse = *local;
3523     ierr   = PetscFree(local);CHKERRQ(ierr);
3524   }
3525 
3526   /*
3527       m - number of local rows
3528       n - number of columns (same on all processors)
3529       rstart - first row in new global matrix generated
3530   */
3531   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3532   if (call == MAT_INITIAL_MATRIX) {
3533     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3534     ii  = aij->i;
3535     jj  = aij->j;
3536 
3537     /*
3538         Determine the number of non-zeros in the diagonal and off-diagonal
3539         portions of the matrix in order to do correct preallocation
3540     */
3541 
3542     /* first get start and end of "diagonal" columns */
3543     if (csize == PETSC_DECIDE) {
3544       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3545       if (mglobal == n) { /* square matrix */
3546 	nlocal = m;
3547       } else {
3548         nlocal = n/size + ((n % size) > rank);
3549       }
3550     } else {
3551       nlocal = csize;
3552     }
3553     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3554     rstart = rend - nlocal;
3555     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3556 
3557     /* next, compute all the lengths */
3558     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3559     olens = dlens + m;
3560     for (i=0; i<m; i++) {
3561       jend = ii[i+1] - ii[i];
3562       olen = 0;
3563       dlen = 0;
3564       for (j=0; j<jend; j++) {
3565         if (*jj < rstart || *jj >= rend) olen++;
3566         else dlen++;
3567         jj++;
3568       }
3569       olens[i] = olen;
3570       dlens[i] = dlen;
3571     }
3572     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3573     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3574     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3575     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3576     ierr = PetscFree(dlens);CHKERRQ(ierr);
3577   } else {
3578     PetscInt ml,nl;
3579 
3580     M = *newmat;
3581     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3582     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3583     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3584     /*
3585          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3586        rather than the slower MatSetValues().
3587     */
3588     M->was_assembled = PETSC_TRUE;
3589     M->assembled     = PETSC_FALSE;
3590   }
3591   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3592   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3593   ii  = aij->i;
3594   jj  = aij->j;
3595   aa  = aij->a;
3596   for (i=0; i<m; i++) {
3597     row   = rstart + i;
3598     nz    = ii[i+1] - ii[i];
3599     cwork = jj;     jj += nz;
3600     vwork = aa;     aa += nz;
3601     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3602   }
3603 
3604   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3605   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3606   *newmat = M;
3607 
3608   /* save submatrix used in processor for next request */
3609   if (call ==  MAT_INITIAL_MATRIX) {
3610     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3611     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3612   }
3613 
3614   PetscFunctionReturn(0);
3615 }
3616 
3617 EXTERN_C_BEGIN
3618 #undef __FUNCT__
3619 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3620 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3621 {
3622   PetscInt       m,cstart, cend,j,nnz,i,d;
3623   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3624   const PetscInt *JJ;
3625   PetscScalar    *values;
3626   PetscErrorCode ierr;
3627 
3628   PetscFunctionBegin;
3629   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3630 
3631   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3632   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3633   m      = B->rmap->n;
3634   cstart = B->cmap->rstart;
3635   cend   = B->cmap->rend;
3636   rstart = B->rmap->rstart;
3637 
3638   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3639 
3640 #if defined(PETSC_USE_DEBUGGING)
3641   for (i=0; i<m; i++) {
3642     nnz     = Ii[i+1]- Ii[i];
3643     JJ      = J + Ii[i];
3644     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3645     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3646     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3647   }
3648 #endif
3649 
3650   for (i=0; i<m; i++) {
3651     nnz     = Ii[i+1]- Ii[i];
3652     JJ      = J + Ii[i];
3653     nnz_max = PetscMax(nnz_max,nnz);
3654     d       = 0;
3655     for (j=0; j<nnz; j++) {
3656       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3657     }
3658     d_nnz[i] = d;
3659     o_nnz[i] = nnz - d;
3660   }
3661   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3662   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3663 
3664   if (v) values = (PetscScalar*)v;
3665   else {
3666     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3667     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3668   }
3669 
3670   for (i=0; i<m; i++) {
3671     ii   = i + rstart;
3672     nnz  = Ii[i+1]- Ii[i];
3673     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3674   }
3675   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3676   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3677 
3678   if (!v) {
3679     ierr = PetscFree(values);CHKERRQ(ierr);
3680   }
3681   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3682   PetscFunctionReturn(0);
3683 }
3684 EXTERN_C_END
3685 
3686 #undef __FUNCT__
3687 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3688 /*@
3689    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3690    (the default parallel PETSc format).
3691 
3692    Collective on MPI_Comm
3693 
3694    Input Parameters:
3695 +  B - the matrix
3696 .  i - the indices into j for the start of each local row (starts with zero)
3697 .  j - the column indices for each local row (starts with zero)
3698 -  v - optional values in the matrix
3699 
3700    Level: developer
3701 
3702    Notes:
3703        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3704      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3705      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3706 
3707        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3708 
3709        The format which is used for the sparse matrix input, is equivalent to a
3710     row-major ordering.. i.e for the following matrix, the input data expected is
3711     as shown:
3712 
3713         1 0 0
3714         2 0 3     P0
3715        -------
3716         4 5 6     P1
3717 
3718      Process0 [P0]: rows_owned=[0,1]
3719         i =  {0,1,3}  [size = nrow+1  = 2+1]
3720         j =  {0,0,2}  [size = nz = 6]
3721         v =  {1,2,3}  [size = nz = 6]
3722 
3723      Process1 [P1]: rows_owned=[2]
3724         i =  {0,3}    [size = nrow+1  = 1+1]
3725         j =  {0,1,2}  [size = nz = 6]
3726         v =  {4,5,6}  [size = nz = 6]
3727 
3728 .keywords: matrix, aij, compressed row, sparse, parallel
3729 
3730 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3731           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3732 @*/
3733 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3734 {
3735   PetscErrorCode ierr;
3736 
3737   PetscFunctionBegin;
3738   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3739   PetscFunctionReturn(0);
3740 }
3741 
3742 #undef __FUNCT__
3743 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3744 /*@C
3745    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3746    (the default parallel PETSc format).  For good matrix assembly performance
3747    the user should preallocate the matrix storage by setting the parameters
3748    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3749    performance can be increased by more than a factor of 50.
3750 
3751    Collective on MPI_Comm
3752 
3753    Input Parameters:
3754 +  A - the matrix
3755 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3756            (same value is used for all local rows)
3757 .  d_nnz - array containing the number of nonzeros in the various rows of the
3758            DIAGONAL portion of the local submatrix (possibly different for each row)
3759            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3760            The size of this array is equal to the number of local rows, i.e 'm'.
3761            For matrices that will be factored, you must leave room for (and set)
3762            the diagonal entry even if it is zero.
3763 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3764            submatrix (same value is used for all local rows).
3765 -  o_nnz - array containing the number of nonzeros in the various rows of the
3766            OFF-DIAGONAL portion of the local submatrix (possibly different for
3767            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3768            structure. The size of this array is equal to the number
3769            of local rows, i.e 'm'.
3770 
3771    If the *_nnz parameter is given then the *_nz parameter is ignored
3772 
3773    The AIJ format (also called the Yale sparse matrix format or
3774    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3775    storage.  The stored row and column indices begin with zero.
3776    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3777 
3778    The parallel matrix is partitioned such that the first m0 rows belong to
3779    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3780    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3781 
3782    The DIAGONAL portion of the local submatrix of a processor can be defined
3783    as the submatrix which is obtained by extraction the part corresponding to
3784    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3785    first row that belongs to the processor, r2 is the last row belonging to
3786    the this processor, and c1-c2 is range of indices of the local part of a
3787    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3788    common case of a square matrix, the row and column ranges are the same and
3789    the DIAGONAL part is also square. The remaining portion of the local
3790    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3791 
3792    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3793 
3794    You can call MatGetInfo() to get information on how effective the preallocation was;
3795    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3796    You can also run with the option -info and look for messages with the string
3797    malloc in them to see if additional memory allocation was needed.
3798 
3799    Example usage:
3800 
3801    Consider the following 8x8 matrix with 34 non-zero values, that is
3802    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3803    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3804    as follows:
3805 
3806 .vb
3807             1  2  0  |  0  3  0  |  0  4
3808     Proc0   0  5  6  |  7  0  0  |  8  0
3809             9  0 10  | 11  0  0  | 12  0
3810     -------------------------------------
3811            13  0 14  | 15 16 17  |  0  0
3812     Proc1   0 18  0  | 19 20 21  |  0  0
3813             0  0  0  | 22 23  0  | 24  0
3814     -------------------------------------
3815     Proc2  25 26 27  |  0  0 28  | 29  0
3816            30  0  0  | 31 32 33  |  0 34
3817 .ve
3818 
3819    This can be represented as a collection of submatrices as:
3820 
3821 .vb
3822       A B C
3823       D E F
3824       G H I
3825 .ve
3826 
3827    Where the submatrices A,B,C are owned by proc0, D,E,F are
3828    owned by proc1, G,H,I are owned by proc2.
3829 
3830    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3831    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3832    The 'M','N' parameters are 8,8, and have the same values on all procs.
3833 
3834    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3835    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3836    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3837    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3838    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3839    matrix, ans [DF] as another SeqAIJ matrix.
3840 
3841    When d_nz, o_nz parameters are specified, d_nz storage elements are
3842    allocated for every row of the local diagonal submatrix, and o_nz
3843    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3844    One way to choose d_nz and o_nz is to use the max nonzerors per local
3845    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3846    In this case, the values of d_nz,o_nz are:
3847 .vb
3848      proc0 : dnz = 2, o_nz = 2
3849      proc1 : dnz = 3, o_nz = 2
3850      proc2 : dnz = 1, o_nz = 4
3851 .ve
3852    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3853    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3854    for proc3. i.e we are using 12+15+10=37 storage locations to store
3855    34 values.
3856 
3857    When d_nnz, o_nnz parameters are specified, the storage is specified
3858    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3859    In the above case the values for d_nnz,o_nnz are:
3860 .vb
3861      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3862      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3863      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3864 .ve
3865    Here the space allocated is sum of all the above values i.e 34, and
3866    hence pre-allocation is perfect.
3867 
3868    Level: intermediate
3869 
3870 .keywords: matrix, aij, compressed row, sparse, parallel
3871 
3872 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3873           MPIAIJ, MatGetInfo()
3874 @*/
3875 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3876 {
3877   PetscErrorCode ierr;
3878 
3879   PetscFunctionBegin;
3880   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3881   PetscValidType(B,1);
3882   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3883   PetscFunctionReturn(0);
3884 }
3885 
3886 #undef __FUNCT__
3887 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3888 /*@
3889      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3890          CSR format the local rows.
3891 
3892    Collective on MPI_Comm
3893 
3894    Input Parameters:
3895 +  comm - MPI communicator
3896 .  m - number of local rows (Cannot be PETSC_DECIDE)
3897 .  n - This value should be the same as the local size used in creating the
3898        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3899        calculated if N is given) For square matrices n is almost always m.
3900 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3901 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3902 .   i - row indices
3903 .   j - column indices
3904 -   a - matrix values
3905 
3906    Output Parameter:
3907 .   mat - the matrix
3908 
3909    Level: intermediate
3910 
3911    Notes:
3912        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3913      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3914      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3915 
3916        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3917 
3918        The format which is used for the sparse matrix input, is equivalent to a
3919     row-major ordering.. i.e for the following matrix, the input data expected is
3920     as shown:
3921 
3922         1 0 0
3923         2 0 3     P0
3924        -------
3925         4 5 6     P1
3926 
3927      Process0 [P0]: rows_owned=[0,1]
3928         i =  {0,1,3}  [size = nrow+1  = 2+1]
3929         j =  {0,0,2}  [size = nz = 6]
3930         v =  {1,2,3}  [size = nz = 6]
3931 
3932      Process1 [P1]: rows_owned=[2]
3933         i =  {0,3}    [size = nrow+1  = 1+1]
3934         j =  {0,1,2}  [size = nz = 6]
3935         v =  {4,5,6}  [size = nz = 6]
3936 
3937 .keywords: matrix, aij, compressed row, sparse, parallel
3938 
3939 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3940           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3941 @*/
3942 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3943 {
3944   PetscErrorCode ierr;
3945 
3946   PetscFunctionBegin;
3947   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3948   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3949   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3950   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3951   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3952   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3953   PetscFunctionReturn(0);
3954 }
3955 
3956 #undef __FUNCT__
3957 #define __FUNCT__ "MatCreateAIJ"
3958 /*@C
3959    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3960    (the default parallel PETSc format).  For good matrix assembly performance
3961    the user should preallocate the matrix storage by setting the parameters
3962    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3963    performance can be increased by more than a factor of 50.
3964 
3965    Collective on MPI_Comm
3966 
3967    Input Parameters:
3968 +  comm - MPI communicator
3969 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3970            This value should be the same as the local size used in creating the
3971            y vector for the matrix-vector product y = Ax.
3972 .  n - This value should be the same as the local size used in creating the
3973        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3974        calculated if N is given) For square matrices n is almost always m.
3975 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3976 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3977 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3978            (same value is used for all local rows)
3979 .  d_nnz - array containing the number of nonzeros in the various rows of the
3980            DIAGONAL portion of the local submatrix (possibly different for each row)
3981            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3982            The size of this array is equal to the number of local rows, i.e 'm'.
3983 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3984            submatrix (same value is used for all local rows).
3985 -  o_nnz - array containing the number of nonzeros in the various rows of the
3986            OFF-DIAGONAL portion of the local submatrix (possibly different for
3987            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3988            structure. The size of this array is equal to the number
3989            of local rows, i.e 'm'.
3990 
3991    Output Parameter:
3992 .  A - the matrix
3993 
3994    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3995    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3996    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3997 
3998    Notes:
3999    If the *_nnz parameter is given then the *_nz parameter is ignored
4000 
4001    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4002    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4003    storage requirements for this matrix.
4004 
4005    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4006    processor than it must be used on all processors that share the object for
4007    that argument.
4008 
4009    The user MUST specify either the local or global matrix dimensions
4010    (possibly both).
4011 
4012    The parallel matrix is partitioned across processors such that the
4013    first m0 rows belong to process 0, the next m1 rows belong to
4014    process 1, the next m2 rows belong to process 2 etc.. where
4015    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4016    values corresponding to [m x N] submatrix.
4017 
4018    The columns are logically partitioned with the n0 columns belonging
4019    to 0th partition, the next n1 columns belonging to the next
4020    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4021 
4022    The DIAGONAL portion of the local submatrix on any given processor
4023    is the submatrix corresponding to the rows and columns m,n
4024    corresponding to the given processor. i.e diagonal matrix on
4025    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4026    etc. The remaining portion of the local submatrix [m x (N-n)]
4027    constitute the OFF-DIAGONAL portion. The example below better
4028    illustrates this concept.
4029 
4030    For a square global matrix we define each processor's diagonal portion
4031    to be its local rows and the corresponding columns (a square submatrix);
4032    each processor's off-diagonal portion encompasses the remainder of the
4033    local matrix (a rectangular submatrix).
4034 
4035    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4036 
4037    When calling this routine with a single process communicator, a matrix of
4038    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4039    type of communicator, use the construction mechanism:
4040      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4041 
4042    By default, this format uses inodes (identical nodes) when possible.
4043    We search for consecutive rows with the same nonzero structure, thereby
4044    reusing matrix information to achieve increased efficiency.
4045 
4046    Options Database Keys:
4047 +  -mat_no_inode  - Do not use inodes
4048 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4049 -  -mat_aij_oneindex - Internally use indexing starting at 1
4050         rather than 0.  Note that when calling MatSetValues(),
4051         the user still MUST index entries starting at 0!
4052 
4053 
4054    Example usage:
4055 
4056    Consider the following 8x8 matrix with 34 non-zero values, that is
4057    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4058    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4059    as follows:
4060 
4061 .vb
4062             1  2  0  |  0  3  0  |  0  4
4063     Proc0   0  5  6  |  7  0  0  |  8  0
4064             9  0 10  | 11  0  0  | 12  0
4065     -------------------------------------
4066            13  0 14  | 15 16 17  |  0  0
4067     Proc1   0 18  0  | 19 20 21  |  0  0
4068             0  0  0  | 22 23  0  | 24  0
4069     -------------------------------------
4070     Proc2  25 26 27  |  0  0 28  | 29  0
4071            30  0  0  | 31 32 33  |  0 34
4072 .ve
4073 
4074    This can be represented as a collection of submatrices as:
4075 
4076 .vb
4077       A B C
4078       D E F
4079       G H I
4080 .ve
4081 
4082    Where the submatrices A,B,C are owned by proc0, D,E,F are
4083    owned by proc1, G,H,I are owned by proc2.
4084 
4085    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4086    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4087    The 'M','N' parameters are 8,8, and have the same values on all procs.
4088 
4089    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4090    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4091    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4092    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4093    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4094    matrix, ans [DF] as another SeqAIJ matrix.
4095 
4096    When d_nz, o_nz parameters are specified, d_nz storage elements are
4097    allocated for every row of the local diagonal submatrix, and o_nz
4098    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4099    One way to choose d_nz and o_nz is to use the max nonzerors per local
4100    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4101    In this case, the values of d_nz,o_nz are:
4102 .vb
4103      proc0 : dnz = 2, o_nz = 2
4104      proc1 : dnz = 3, o_nz = 2
4105      proc2 : dnz = 1, o_nz = 4
4106 .ve
4107    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4108    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4109    for proc3. i.e we are using 12+15+10=37 storage locations to store
4110    34 values.
4111 
4112    When d_nnz, o_nnz parameters are specified, the storage is specified
4113    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4114    In the above case the values for d_nnz,o_nnz are:
4115 .vb
4116      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4117      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4118      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4119 .ve
4120    Here the space allocated is sum of all the above values i.e 34, and
4121    hence pre-allocation is perfect.
4122 
4123    Level: intermediate
4124 
4125 .keywords: matrix, aij, compressed row, sparse, parallel
4126 
4127 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4128           MPIAIJ, MatCreateMPIAIJWithArrays()
4129 @*/
4130 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4131 {
4132   PetscErrorCode ierr;
4133   PetscMPIInt    size;
4134 
4135   PetscFunctionBegin;
4136   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4137   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4138   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4139   if (size > 1) {
4140     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4141     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4142   } else {
4143     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4144     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4145   }
4146   PetscFunctionReturn(0);
4147 }
4148 
4149 #undef __FUNCT__
4150 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4151 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4152 {
4153   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4154 
4155   PetscFunctionBegin;
4156   *Ad     = a->A;
4157   *Ao     = a->B;
4158   *colmap = a->garray;
4159   PetscFunctionReturn(0);
4160 }
4161 
4162 #undef __FUNCT__
4163 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4164 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4165 {
4166   PetscErrorCode ierr;
4167   PetscInt       i;
4168   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4169 
4170   PetscFunctionBegin;
4171   if (coloring->ctype == IS_COLORING_GLOBAL) {
4172     ISColoringValue *allcolors,*colors;
4173     ISColoring      ocoloring;
4174 
4175     /* set coloring for diagonal portion */
4176     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4177 
4178     /* set coloring for off-diagonal portion */
4179     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4180     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4181     for (i=0; i<a->B->cmap->n; i++) {
4182       colors[i] = allcolors[a->garray[i]];
4183     }
4184     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4185     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4186     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4187     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4188   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4189     ISColoringValue *colors;
4190     PetscInt        *larray;
4191     ISColoring      ocoloring;
4192 
4193     /* set coloring for diagonal portion */
4194     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4195     for (i=0; i<a->A->cmap->n; i++) {
4196       larray[i] = i + A->cmap->rstart;
4197     }
4198     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4199     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4200     for (i=0; i<a->A->cmap->n; i++) {
4201       colors[i] = coloring->colors[larray[i]];
4202     }
4203     ierr = PetscFree(larray);CHKERRQ(ierr);
4204     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4205     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4206     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4207 
4208     /* set coloring for off-diagonal portion */
4209     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4210     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4211     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4212     for (i=0; i<a->B->cmap->n; i++) {
4213       colors[i] = coloring->colors[larray[i]];
4214     }
4215     ierr = PetscFree(larray);CHKERRQ(ierr);
4216     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4217     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4218     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4219   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4220 
4221   PetscFunctionReturn(0);
4222 }
4223 
4224 #if defined(PETSC_HAVE_ADIC)
4225 #undef __FUNCT__
4226 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4227 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4228 {
4229   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4230   PetscErrorCode ierr;
4231 
4232   PetscFunctionBegin;
4233   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4234   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4235   PetscFunctionReturn(0);
4236 }
4237 #endif
4238 
4239 #undef __FUNCT__
4240 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4241 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4242 {
4243   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4244   PetscErrorCode ierr;
4245 
4246   PetscFunctionBegin;
4247   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4248   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4249   PetscFunctionReturn(0);
4250 }
4251 
4252 #undef __FUNCT__
4253 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4254 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4255 {
4256   PetscErrorCode ierr;
4257   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum;
4258   PetscInt       *indx;
4259 
4260   PetscFunctionBegin;
4261   /* This routine will ONLY return MPIAIJ type matrix */
4262   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4263   if (n == PETSC_DECIDE){
4264     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4265   }
4266   /* Check sum(n) = N */
4267   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4268   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4269 
4270   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4271   rstart -= m;
4272 
4273   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4274   for (i=0;i<m;i++) {
4275     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4276     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4277     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4278   }
4279 
4280   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4281   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4282   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4283   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4284   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4285   PetscFunctionReturn(0);
4286 }
4287 
4288 #undef __FUNCT__
4289 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4290 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4291 {
4292   PetscErrorCode ierr;
4293   PetscInt       m,N,i,rstart,nnz,Ii;
4294   PetscInt       *indx;
4295   PetscScalar    *values;
4296 
4297   PetscFunctionBegin;
4298   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4299   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4300   for (i=0;i<m;i++) {
4301     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4302     Ii    = i + rstart;
4303     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4304     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4305   }
4306   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4307   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4308   PetscFunctionReturn(0);
4309 }
4310 
4311 #undef __FUNCT__
4312 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4313 /*@
4314       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4315                  matrices from each processor
4316 
4317     Collective on MPI_Comm
4318 
4319    Input Parameters:
4320 +    comm - the communicators the parallel matrix will live on
4321 .    inmat - the input sequential matrices
4322 .    n - number of local columns (or PETSC_DECIDE)
4323 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4324 
4325    Output Parameter:
4326 .    outmat - the parallel matrix generated
4327 
4328     Level: advanced
4329 
4330    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4331 
4332 @*/
4333 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4334 {
4335   PetscErrorCode ierr;
4336 
4337   PetscFunctionBegin;
4338   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4339   if (scall == MAT_INITIAL_MATRIX){
4340     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4341   }
4342   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4343   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4344   PetscFunctionReturn(0);
4345 }
4346 
4347 #undef __FUNCT__
4348 #define __FUNCT__ "MatFileSplit"
4349 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4350 {
4351   PetscErrorCode    ierr;
4352   PetscMPIInt       rank;
4353   PetscInt          m,N,i,rstart,nnz;
4354   size_t            len;
4355   const PetscInt    *indx;
4356   PetscViewer       out;
4357   char              *name;
4358   Mat               B;
4359   const PetscScalar *values;
4360 
4361   PetscFunctionBegin;
4362   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4363   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4364   /* Should this be the type of the diagonal block of A? */
4365   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4366   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4367   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4368   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4369   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4370   for (i=0;i<m;i++) {
4371     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4372     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4373     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4374   }
4375   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4376   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4377 
4378   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4379   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4380   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4381   sprintf(name,"%s.%d",outfile,rank);
4382   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4383   ierr = PetscFree(name);
4384   ierr = MatView(B,out);CHKERRQ(ierr);
4385   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4386   ierr = MatDestroy(&B);CHKERRQ(ierr);
4387   PetscFunctionReturn(0);
4388 }
4389 
4390 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4391 #undef __FUNCT__
4392 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4393 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4394 {
4395   PetscErrorCode       ierr;
4396   Mat_Merge_SeqsToMPI  *merge;
4397   PetscContainer       container;
4398 
4399   PetscFunctionBegin;
4400   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4401   if (container) {
4402     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4403     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4404     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4405     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4406     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4407     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4408     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4409     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4410     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4411     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4412     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4413     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4414     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4415     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4416     ierr = PetscFree(merge);CHKERRQ(ierr);
4417     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4418   }
4419   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4420   PetscFunctionReturn(0);
4421 }
4422 
4423 #include <../src/mat/utils/freespace.h>
4424 #include <petscbt.h>
4425 
4426 #undef __FUNCT__
4427 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4428 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4429 {
4430   PetscErrorCode       ierr;
4431   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4432   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4433   PetscMPIInt          size,rank,taga,*len_s;
4434   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4435   PetscInt             proc,m;
4436   PetscInt             **buf_ri,**buf_rj;
4437   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4438   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4439   MPI_Request          *s_waits,*r_waits;
4440   MPI_Status           *status;
4441   MatScalar            *aa=a->a;
4442   MatScalar            **abuf_r,*ba_i;
4443   Mat_Merge_SeqsToMPI  *merge;
4444   PetscContainer       container;
4445 
4446   PetscFunctionBegin;
4447   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4448 
4449   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4450   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4451 
4452   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4453   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4454 
4455   bi     = merge->bi;
4456   bj     = merge->bj;
4457   buf_ri = merge->buf_ri;
4458   buf_rj = merge->buf_rj;
4459 
4460   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4461   owners = merge->rowmap->range;
4462   len_s  = merge->len_s;
4463 
4464   /* send and recv matrix values */
4465   /*-----------------------------*/
4466   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4467   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4468 
4469   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4470   for (proc=0,k=0; proc<size; proc++){
4471     if (!len_s[proc]) continue;
4472     i = owners[proc];
4473     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4474     k++;
4475   }
4476 
4477   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4478   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4479   ierr = PetscFree(status);CHKERRQ(ierr);
4480 
4481   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4482   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4483 
4484   /* insert mat values of mpimat */
4485   /*----------------------------*/
4486   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4487   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4488 
4489   for (k=0; k<merge->nrecv; k++){
4490     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4491     nrows = *(buf_ri_k[k]);
4492     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4493     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4494   }
4495 
4496   /* set values of ba */
4497   m = merge->rowmap->n;
4498   for (i=0; i<m; i++) {
4499     arow = owners[rank] + i;
4500     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4501     bnzi = bi[i+1] - bi[i];
4502     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4503 
4504     /* add local non-zero vals of this proc's seqmat into ba */
4505     anzi = ai[arow+1] - ai[arow];
4506     aj   = a->j + ai[arow];
4507     aa   = a->a + ai[arow];
4508     nextaj = 0;
4509     for (j=0; nextaj<anzi; j++){
4510       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4511         ba_i[j] += aa[nextaj++];
4512       }
4513     }
4514 
4515     /* add received vals into ba */
4516     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4517       /* i-th row */
4518       if (i == *nextrow[k]) {
4519         anzi = *(nextai[k]+1) - *nextai[k];
4520         aj   = buf_rj[k] + *(nextai[k]);
4521         aa   = abuf_r[k] + *(nextai[k]);
4522         nextaj = 0;
4523         for (j=0; nextaj<anzi; j++){
4524           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4525             ba_i[j] += aa[nextaj++];
4526           }
4527         }
4528         nextrow[k]++; nextai[k]++;
4529       }
4530     }
4531     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4532   }
4533   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4534   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4535 
4536   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4537   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4538   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4539   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4540   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4541   PetscFunctionReturn(0);
4542 }
4543 
4544 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4545 
4546 #undef __FUNCT__
4547 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4548 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4549 {
4550   PetscErrorCode       ierr;
4551   Mat                  B_mpi;
4552   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4553   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4554   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4555   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4556   PetscInt             len,proc,*dnz,*onz;
4557   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4558   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4559   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4560   MPI_Status           *status;
4561   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4562   PetscBT              lnkbt;
4563   Mat_Merge_SeqsToMPI  *merge;
4564   PetscContainer       container;
4565 
4566   PetscFunctionBegin;
4567   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4568 
4569   /* make sure it is a PETSc comm */
4570   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4571   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4572   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4573 
4574   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4575   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4576 
4577   /* determine row ownership */
4578   /*---------------------------------------------------------*/
4579   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4580   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4581   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4582   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4583   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4584   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4585   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4586 
4587   m      = merge->rowmap->n;
4588   M      = merge->rowmap->N;
4589   owners = merge->rowmap->range;
4590 
4591   /* determine the number of messages to send, their lengths */
4592   /*---------------------------------------------------------*/
4593   len_s  = merge->len_s;
4594 
4595   len = 0;  /* length of buf_si[] */
4596   merge->nsend = 0;
4597   for (proc=0; proc<size; proc++){
4598     len_si[proc] = 0;
4599     if (proc == rank){
4600       len_s[proc] = 0;
4601     } else {
4602       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4603       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4604     }
4605     if (len_s[proc]) {
4606       merge->nsend++;
4607       nrows = 0;
4608       for (i=owners[proc]; i<owners[proc+1]; i++){
4609         if (ai[i+1] > ai[i]) nrows++;
4610       }
4611       len_si[proc] = 2*(nrows+1);
4612       len += len_si[proc];
4613     }
4614   }
4615 
4616   /* determine the number and length of messages to receive for ij-structure */
4617   /*-------------------------------------------------------------------------*/
4618   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4619   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4620 
4621   /* post the Irecv of j-structure */
4622   /*-------------------------------*/
4623   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4624   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4625 
4626   /* post the Isend of j-structure */
4627   /*--------------------------------*/
4628   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4629 
4630   for (proc=0, k=0; proc<size; proc++){
4631     if (!len_s[proc]) continue;
4632     i = owners[proc];
4633     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4634     k++;
4635   }
4636 
4637   /* receives and sends of j-structure are complete */
4638   /*------------------------------------------------*/
4639   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4640   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4641 
4642   /* send and recv i-structure */
4643   /*---------------------------*/
4644   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4645   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4646 
4647   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4648   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4649   for (proc=0,k=0; proc<size; proc++){
4650     if (!len_s[proc]) continue;
4651     /* form outgoing message for i-structure:
4652          buf_si[0]:                 nrows to be sent
4653                [1:nrows]:           row index (global)
4654                [nrows+1:2*nrows+1]: i-structure index
4655     */
4656     /*-------------------------------------------*/
4657     nrows = len_si[proc]/2 - 1;
4658     buf_si_i    = buf_si + nrows+1;
4659     buf_si[0]   = nrows;
4660     buf_si_i[0] = 0;
4661     nrows = 0;
4662     for (i=owners[proc]; i<owners[proc+1]; i++){
4663       anzi = ai[i+1] - ai[i];
4664       if (anzi) {
4665         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4666         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4667         nrows++;
4668       }
4669     }
4670     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4671     k++;
4672     buf_si += len_si[proc];
4673   }
4674 
4675   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4676   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4677 
4678   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4679   for (i=0; i<merge->nrecv; i++){
4680     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4681   }
4682 
4683   ierr = PetscFree(len_si);CHKERRQ(ierr);
4684   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4685   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4686   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4687   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4688   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4689   ierr = PetscFree(status);CHKERRQ(ierr);
4690 
4691   /* compute a local seq matrix in each processor */
4692   /*----------------------------------------------*/
4693   /* allocate bi array and free space for accumulating nonzero column info */
4694   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4695   bi[0] = 0;
4696 
4697   /* create and initialize a linked list */
4698   nlnk = N+1;
4699   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4700 
4701   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4702   len = 0;
4703   len  = ai[owners[rank+1]] - ai[owners[rank]];
4704   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4705   current_space = free_space;
4706 
4707   /* determine symbolic info for each local row */
4708   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4709 
4710   for (k=0; k<merge->nrecv; k++){
4711     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4712     nrows = *buf_ri_k[k];
4713     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4714     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4715   }
4716 
4717   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4718   len = 0;
4719   for (i=0;i<m;i++) {
4720     bnzi   = 0;
4721     /* add local non-zero cols of this proc's seqmat into lnk */
4722     arow   = owners[rank] + i;
4723     anzi   = ai[arow+1] - ai[arow];
4724     aj     = a->j + ai[arow];
4725     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4726     bnzi += nlnk;
4727     /* add received col data into lnk */
4728     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4729       if (i == *nextrow[k]) { /* i-th row */
4730         anzi = *(nextai[k]+1) - *nextai[k];
4731         aj   = buf_rj[k] + *nextai[k];
4732         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4733         bnzi += nlnk;
4734         nextrow[k]++; nextai[k]++;
4735       }
4736     }
4737     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4738 
4739     /* if free space is not available, make more free space */
4740     if (current_space->local_remaining<bnzi) {
4741       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4742       nspacedouble++;
4743     }
4744     /* copy data into free space, then initialize lnk */
4745     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4746     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4747 
4748     current_space->array           += bnzi;
4749     current_space->local_used      += bnzi;
4750     current_space->local_remaining -= bnzi;
4751 
4752     bi[i+1] = bi[i] + bnzi;
4753   }
4754 
4755   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4756 
4757   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4758   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4759   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4760 
4761   /* create symbolic parallel matrix B_mpi */
4762   /*---------------------------------------*/
4763   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4764   if (n==PETSC_DECIDE) {
4765     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4766   } else {
4767     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4768   }
4769   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4770   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4771   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4772   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4773 
4774   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4775   B_mpi->assembled     = PETSC_FALSE;
4776   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4777   merge->bi            = bi;
4778   merge->bj            = bj;
4779   merge->buf_ri        = buf_ri;
4780   merge->buf_rj        = buf_rj;
4781   merge->coi           = PETSC_NULL;
4782   merge->coj           = PETSC_NULL;
4783   merge->owners_co     = PETSC_NULL;
4784 
4785   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4786 
4787   /* attach the supporting struct to B_mpi for reuse */
4788   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4789   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4790   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4791   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4792   *mpimat = B_mpi;
4793 
4794   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4795   PetscFunctionReturn(0);
4796 }
4797 
4798 #undef __FUNCT__
4799 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4800 /*@C
4801       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4802                  matrices from each processor
4803 
4804     Collective on MPI_Comm
4805 
4806    Input Parameters:
4807 +    comm - the communicators the parallel matrix will live on
4808 .    seqmat - the input sequential matrices
4809 .    m - number of local rows (or PETSC_DECIDE)
4810 .    n - number of local columns (or PETSC_DECIDE)
4811 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4812 
4813    Output Parameter:
4814 .    mpimat - the parallel matrix generated
4815 
4816     Level: advanced
4817 
4818    Notes:
4819      The dimensions of the sequential matrix in each processor MUST be the same.
4820      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4821      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4822 @*/
4823 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4824 {
4825   PetscErrorCode   ierr;
4826   PetscMPIInt     size;
4827 
4828   PetscFunctionBegin;
4829   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4830   if (size == 1){
4831      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4832      if (scall == MAT_INITIAL_MATRIX){
4833        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4834      } else {
4835        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4836      }
4837      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4838      PetscFunctionReturn(0);
4839   }
4840   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4841   if (scall == MAT_INITIAL_MATRIX){
4842     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4843   }
4844   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4845   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4846   PetscFunctionReturn(0);
4847 }
4848 
4849 #undef __FUNCT__
4850 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4851 /*@
4852      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4853           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4854           with MatGetSize()
4855 
4856     Not Collective
4857 
4858    Input Parameters:
4859 +    A - the matrix
4860 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4861 
4862    Output Parameter:
4863 .    A_loc - the local sequential matrix generated
4864 
4865     Level: developer
4866 
4867 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4868 
4869 @*/
4870 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4871 {
4872   PetscErrorCode  ierr;
4873   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4874   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4875   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4876   MatScalar       *aa=a->a,*ba=b->a,*cam;
4877   PetscScalar     *ca;
4878   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4879   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4880   PetscBool       match;
4881 
4882   PetscFunctionBegin;
4883   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4884   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4885   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4886   if (scall == MAT_INITIAL_MATRIX){
4887     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4888     ci[0] = 0;
4889     for (i=0; i<am; i++){
4890       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4891     }
4892     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4893     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4894     k = 0;
4895     for (i=0; i<am; i++) {
4896       ncols_o = bi[i+1] - bi[i];
4897       ncols_d = ai[i+1] - ai[i];
4898       /* off-diagonal portion of A */
4899       for (jo=0; jo<ncols_o; jo++) {
4900         col = cmap[*bj];
4901         if (col >= cstart) break;
4902         cj[k]   = col; bj++;
4903         ca[k++] = *ba++;
4904       }
4905       /* diagonal portion of A */
4906       for (j=0; j<ncols_d; j++) {
4907         cj[k]   = cstart + *aj++;
4908         ca[k++] = *aa++;
4909       }
4910       /* off-diagonal portion of A */
4911       for (j=jo; j<ncols_o; j++) {
4912         cj[k]   = cmap[*bj++];
4913         ca[k++] = *ba++;
4914       }
4915     }
4916     /* put together the new matrix */
4917     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4918     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4919     /* Since these are PETSc arrays, change flags to free them as necessary. */
4920     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4921     mat->free_a  = PETSC_TRUE;
4922     mat->free_ij = PETSC_TRUE;
4923     mat->nonew   = 0;
4924   } else if (scall == MAT_REUSE_MATRIX){
4925     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4926     ci = mat->i; cj = mat->j; cam = mat->a;
4927     for (i=0; i<am; i++) {
4928       /* off-diagonal portion of A */
4929       ncols_o = bi[i+1] - bi[i];
4930       for (jo=0; jo<ncols_o; jo++) {
4931         col = cmap[*bj];
4932         if (col >= cstart) break;
4933         *cam++ = *ba++; bj++;
4934       }
4935       /* diagonal portion of A */
4936       ncols_d = ai[i+1] - ai[i];
4937       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4938       /* off-diagonal portion of A */
4939       for (j=jo; j<ncols_o; j++) {
4940         *cam++ = *ba++; bj++;
4941       }
4942     }
4943   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4944   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4945   PetscFunctionReturn(0);
4946 }
4947 
4948 #undef __FUNCT__
4949 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4950 /*@C
4951      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4952 
4953     Not Collective
4954 
4955    Input Parameters:
4956 +    A - the matrix
4957 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4958 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4959 
4960    Output Parameter:
4961 .    A_loc - the local sequential matrix generated
4962 
4963     Level: developer
4964 
4965 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4966 
4967 @*/
4968 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4969 {
4970   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4971   PetscErrorCode    ierr;
4972   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4973   IS                isrowa,iscola;
4974   Mat               *aloc;
4975   PetscBool       match;
4976 
4977   PetscFunctionBegin;
4978   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4979   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4980   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4981   if (!row){
4982     start = A->rmap->rstart; end = A->rmap->rend;
4983     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4984   } else {
4985     isrowa = *row;
4986   }
4987   if (!col){
4988     start = A->cmap->rstart;
4989     cmap  = a->garray;
4990     nzA   = a->A->cmap->n;
4991     nzB   = a->B->cmap->n;
4992     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4993     ncols = 0;
4994     for (i=0; i<nzB; i++) {
4995       if (cmap[i] < start) idx[ncols++] = cmap[i];
4996       else break;
4997     }
4998     imark = i;
4999     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5000     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5001     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5002   } else {
5003     iscola = *col;
5004   }
5005   if (scall != MAT_INITIAL_MATRIX){
5006     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5007     aloc[0] = *A_loc;
5008   }
5009   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5010   *A_loc = aloc[0];
5011   ierr = PetscFree(aloc);CHKERRQ(ierr);
5012   if (!row){
5013     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5014   }
5015   if (!col){
5016     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5017   }
5018   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5019   PetscFunctionReturn(0);
5020 }
5021 
5022 #undef __FUNCT__
5023 #define __FUNCT__ "MatGetBrowsOfAcols"
5024 /*@C
5025     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5026 
5027     Collective on Mat
5028 
5029    Input Parameters:
5030 +    A,B - the matrices in mpiaij format
5031 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5032 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5033 
5034    Output Parameter:
5035 +    rowb, colb - index sets of rows and columns of B to extract
5036 -    B_seq - the sequential matrix generated
5037 
5038     Level: developer
5039 
5040 @*/
5041 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5042 {
5043   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5044   PetscErrorCode    ierr;
5045   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5046   IS                isrowb,iscolb;
5047   Mat               *bseq=PETSC_NULL;
5048 
5049   PetscFunctionBegin;
5050   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5051     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5052   }
5053   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5054 
5055   if (scall == MAT_INITIAL_MATRIX){
5056     start = A->cmap->rstart;
5057     cmap  = a->garray;
5058     nzA   = a->A->cmap->n;
5059     nzB   = a->B->cmap->n;
5060     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5061     ncols = 0;
5062     for (i=0; i<nzB; i++) {  /* row < local row index */
5063       if (cmap[i] < start) idx[ncols++] = cmap[i];
5064       else break;
5065     }
5066     imark = i;
5067     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5068     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5069     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5070     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5071   } else {
5072     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5073     isrowb = *rowb; iscolb = *colb;
5074     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5075     bseq[0] = *B_seq;
5076   }
5077   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5078   *B_seq = bseq[0];
5079   ierr = PetscFree(bseq);CHKERRQ(ierr);
5080   if (!rowb){
5081     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5082   } else {
5083     *rowb = isrowb;
5084   }
5085   if (!colb){
5086     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5087   } else {
5088     *colb = iscolb;
5089   }
5090   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5091   PetscFunctionReturn(0);
5092 }
5093 
5094 #undef __FUNCT__
5095 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5096 /*
5097     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5098     of the OFF-DIAGONAL portion of local A
5099 
5100     Collective on Mat
5101 
5102    Input Parameters:
5103 +    A,B - the matrices in mpiaij format
5104 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5105 
5106    Output Parameter:
5107 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5108 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5109 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5110 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5111 
5112     Level: developer
5113 
5114 */
5115 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5116 {
5117   VecScatter_MPI_General *gen_to,*gen_from;
5118   PetscErrorCode         ierr;
5119   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5120   Mat_SeqAIJ             *b_oth;
5121   VecScatter             ctx=a->Mvctx;
5122   MPI_Comm               comm=((PetscObject)ctx)->comm;
5123   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5124   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5125   PetscScalar            *rvalues,*svalues;
5126   MatScalar              *b_otha,*bufa,*bufA;
5127   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5128   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5129   MPI_Status             *sstatus,rstatus;
5130   PetscMPIInt            jj;
5131   PetscInt               *cols,sbs,rbs;
5132   PetscScalar            *vals;
5133 
5134   PetscFunctionBegin;
5135   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5136     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5137   }
5138   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5139   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5140 
5141   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5142   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5143   rvalues  = gen_from->values; /* holds the length of receiving row */
5144   svalues  = gen_to->values;   /* holds the length of sending row */
5145   nrecvs   = gen_from->n;
5146   nsends   = gen_to->n;
5147 
5148   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5149   srow     = gen_to->indices;   /* local row index to be sent */
5150   sstarts  = gen_to->starts;
5151   sprocs   = gen_to->procs;
5152   sstatus  = gen_to->sstatus;
5153   sbs      = gen_to->bs;
5154   rstarts  = gen_from->starts;
5155   rprocs   = gen_from->procs;
5156   rbs      = gen_from->bs;
5157 
5158   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5159   if (scall == MAT_INITIAL_MATRIX){
5160     /* i-array */
5161     /*---------*/
5162     /*  post receives */
5163     for (i=0; i<nrecvs; i++){
5164       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5165       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5166       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5167     }
5168 
5169     /* pack the outgoing message */
5170     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5171     sstartsj[0] = 0;  rstartsj[0] = 0;
5172     len = 0; /* total length of j or a array to be sent */
5173     k = 0;
5174     for (i=0; i<nsends; i++){
5175       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5176       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5177       for (j=0; j<nrows; j++) {
5178         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5179         for (l=0; l<sbs; l++){
5180           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5181           rowlen[j*sbs+l] = ncols;
5182           len += ncols;
5183           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5184         }
5185         k++;
5186       }
5187       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5188       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5189     }
5190     /* recvs and sends of i-array are completed */
5191     i = nrecvs;
5192     while (i--) {
5193       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5194     }
5195     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5196 
5197     /* allocate buffers for sending j and a arrays */
5198     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5199     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5200 
5201     /* create i-array of B_oth */
5202     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5203     b_othi[0] = 0;
5204     len = 0; /* total length of j or a array to be received */
5205     k = 0;
5206     for (i=0; i<nrecvs; i++){
5207       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5208       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5209       for (j=0; j<nrows; j++) {
5210         b_othi[k+1] = b_othi[k] + rowlen[j];
5211         len += rowlen[j]; k++;
5212       }
5213       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5214     }
5215 
5216     /* allocate space for j and a arrrays of B_oth */
5217     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5218     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5219 
5220     /* j-array */
5221     /*---------*/
5222     /*  post receives of j-array */
5223     for (i=0; i<nrecvs; i++){
5224       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5225       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5226     }
5227 
5228     /* pack the outgoing message j-array */
5229     k = 0;
5230     for (i=0; i<nsends; i++){
5231       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5232       bufJ = bufj+sstartsj[i];
5233       for (j=0; j<nrows; j++) {
5234         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5235         for (ll=0; ll<sbs; ll++){
5236           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5237           for (l=0; l<ncols; l++){
5238             *bufJ++ = cols[l];
5239           }
5240           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5241         }
5242       }
5243       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5244     }
5245 
5246     /* recvs and sends of j-array are completed */
5247     i = nrecvs;
5248     while (i--) {
5249       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5250     }
5251     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5252   } else if (scall == MAT_REUSE_MATRIX){
5253     sstartsj = *startsj_s;
5254     rstartsj = *startsj_r;
5255     bufa     = *bufa_ptr;
5256     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5257     b_otha   = b_oth->a;
5258   } else {
5259     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5260   }
5261 
5262   /* a-array */
5263   /*---------*/
5264   /*  post receives of a-array */
5265   for (i=0; i<nrecvs; i++){
5266     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5267     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5268   }
5269 
5270   /* pack the outgoing message a-array */
5271   k = 0;
5272   for (i=0; i<nsends; i++){
5273     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5274     bufA = bufa+sstartsj[i];
5275     for (j=0; j<nrows; j++) {
5276       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5277       for (ll=0; ll<sbs; ll++){
5278         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5279         for (l=0; l<ncols; l++){
5280           *bufA++ = vals[l];
5281         }
5282         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5283       }
5284     }
5285     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5286   }
5287   /* recvs and sends of a-array are completed */
5288   i = nrecvs;
5289   while (i--) {
5290     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5291   }
5292   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5293   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5294 
5295   if (scall == MAT_INITIAL_MATRIX){
5296     /* put together the new matrix */
5297     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5298 
5299     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5300     /* Since these are PETSc arrays, change flags to free them as necessary. */
5301     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5302     b_oth->free_a  = PETSC_TRUE;
5303     b_oth->free_ij = PETSC_TRUE;
5304     b_oth->nonew   = 0;
5305 
5306     ierr = PetscFree(bufj);CHKERRQ(ierr);
5307     if (!startsj_s || !bufa_ptr){
5308       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5309       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5310     } else {
5311       *startsj_s = sstartsj;
5312       *startsj_r = rstartsj;
5313       *bufa_ptr  = bufa;
5314     }
5315   }
5316   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5317   PetscFunctionReturn(0);
5318 }
5319 
5320 #undef __FUNCT__
5321 #define __FUNCT__ "MatGetCommunicationStructs"
5322 /*@C
5323   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5324 
5325   Not Collective
5326 
5327   Input Parameters:
5328 . A - The matrix in mpiaij format
5329 
5330   Output Parameter:
5331 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5332 . colmap - A map from global column index to local index into lvec
5333 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5334 
5335   Level: developer
5336 
5337 @*/
5338 #if defined (PETSC_USE_CTABLE)
5339 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5340 #else
5341 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5342 #endif
5343 {
5344   Mat_MPIAIJ *a;
5345 
5346   PetscFunctionBegin;
5347   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5348   PetscValidPointer(lvec, 2);
5349   PetscValidPointer(colmap, 3);
5350   PetscValidPointer(multScatter, 4);
5351   a = (Mat_MPIAIJ *) A->data;
5352   if (lvec) *lvec = a->lvec;
5353   if (colmap) *colmap = a->colmap;
5354   if (multScatter) *multScatter = a->Mvctx;
5355   PetscFunctionReturn(0);
5356 }
5357 
5358 EXTERN_C_BEGIN
5359 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5360 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5361 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5362 EXTERN_C_END
5363 
5364 #undef __FUNCT__
5365 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5366 /*
5367     Computes (B'*A')' since computing B*A directly is untenable
5368 
5369                n                       p                          p
5370         (              )       (              )         (                  )
5371       m (      A       )  *  n (       B      )   =   m (         C        )
5372         (              )       (              )         (                  )
5373 
5374 */
5375 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5376 {
5377   PetscErrorCode     ierr;
5378   Mat                At,Bt,Ct;
5379 
5380   PetscFunctionBegin;
5381   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5382   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5383   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5384   ierr = MatDestroy(&At);CHKERRQ(ierr);
5385   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5386   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5387   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5388   PetscFunctionReturn(0);
5389 }
5390 
5391 #undef __FUNCT__
5392 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5393 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5394 {
5395   PetscErrorCode ierr;
5396   PetscInt       m=A->rmap->n,n=B->cmap->n;
5397   Mat            Cmat;
5398 
5399   PetscFunctionBegin;
5400   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5401   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5402   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5403   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5404   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5405   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5406   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5407   *C   = Cmat;
5408   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5409   PetscFunctionReturn(0);
5410 }
5411 
5412 /* ----------------------------------------------------------------*/
5413 #undef __FUNCT__
5414 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5415 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5416 {
5417   PetscErrorCode ierr;
5418 
5419   PetscFunctionBegin;
5420   if (scall == MAT_INITIAL_MATRIX){
5421     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5422   }
5423   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5424   PetscFunctionReturn(0);
5425 }
5426 
5427 EXTERN_C_BEGIN
5428 #if defined(PETSC_HAVE_MUMPS)
5429 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5430 #endif
5431 #if defined(PETSC_HAVE_PASTIX)
5432 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5433 #endif
5434 #if defined(PETSC_HAVE_SUPERLU_DIST)
5435 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5436 #endif
5437 #if defined(PETSC_HAVE_SPOOLES)
5438 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5439 #endif
5440 EXTERN_C_END
5441 
5442 /*MC
5443    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5444 
5445    Options Database Keys:
5446 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5447 
5448   Level: beginner
5449 
5450 .seealso: MatCreateAIJ()
5451 M*/
5452 
5453 EXTERN_C_BEGIN
5454 #undef __FUNCT__
5455 #define __FUNCT__ "MatCreate_MPIAIJ"
5456 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5457 {
5458   Mat_MPIAIJ     *b;
5459   PetscErrorCode ierr;
5460   PetscMPIInt    size;
5461 
5462   PetscFunctionBegin;
5463   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5464 
5465   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5466   B->data         = (void*)b;
5467   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5468   B->assembled    = PETSC_FALSE;
5469 
5470   B->insertmode   = NOT_SET_VALUES;
5471   b->size         = size;
5472   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5473 
5474   /* build cache for off array entries formed */
5475   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5476   b->donotstash  = PETSC_FALSE;
5477   b->colmap      = 0;
5478   b->garray      = 0;
5479   b->roworiented = PETSC_TRUE;
5480 
5481   /* stuff used for matrix vector multiply */
5482   b->lvec      = PETSC_NULL;
5483   b->Mvctx     = PETSC_NULL;
5484 
5485   /* stuff for MatGetRow() */
5486   b->rowindices   = 0;
5487   b->rowvalues    = 0;
5488   b->getrowactive = PETSC_FALSE;
5489 
5490 #if defined(PETSC_HAVE_SPOOLES)
5491   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5492                                      "MatGetFactor_mpiaij_spooles",
5493                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5494 #endif
5495 #if defined(PETSC_HAVE_MUMPS)
5496   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5497                                      "MatGetFactor_aij_mumps",
5498                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5499 #endif
5500 #if defined(PETSC_HAVE_PASTIX)
5501   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5502 					   "MatGetFactor_mpiaij_pastix",
5503 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5504 #endif
5505 #if defined(PETSC_HAVE_SUPERLU_DIST)
5506   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5507                                      "MatGetFactor_mpiaij_superlu_dist",
5508                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5509 #endif
5510   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5511                                      "MatStoreValues_MPIAIJ",
5512                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5513   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5514                                      "MatRetrieveValues_MPIAIJ",
5515                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5516   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5517 				     "MatGetDiagonalBlock_MPIAIJ",
5518                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5519   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5520 				     "MatIsTranspose_MPIAIJ",
5521 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5522   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5523 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5524 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5525   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5526 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5527 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5528   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5529 				     "MatDiagonalScaleLocal_MPIAIJ",
5530 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5531   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5532                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5533                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5534   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5535                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5536                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5537   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5538                                      "MatConvert_MPIAIJ_MPISBAIJ",
5539                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5540   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5541                                      "MatMatMult_MPIDense_MPIAIJ",
5542                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5543   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5544                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5545                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5546   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5547                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5548                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5549   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5550   PetscFunctionReturn(0);
5551 }
5552 EXTERN_C_END
5553 
5554 #undef __FUNCT__
5555 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5556 /*@
5557      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5558          and "off-diagonal" part of the matrix in CSR format.
5559 
5560    Collective on MPI_Comm
5561 
5562    Input Parameters:
5563 +  comm - MPI communicator
5564 .  m - number of local rows (Cannot be PETSC_DECIDE)
5565 .  n - This value should be the same as the local size used in creating the
5566        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5567        calculated if N is given) For square matrices n is almost always m.
5568 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5569 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5570 .   i - row indices for "diagonal" portion of matrix
5571 .   j - column indices
5572 .   a - matrix values
5573 .   oi - row indices for "off-diagonal" portion of matrix
5574 .   oj - column indices
5575 -   oa - matrix values
5576 
5577    Output Parameter:
5578 .   mat - the matrix
5579 
5580    Level: advanced
5581 
5582    Notes:
5583        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5584        must free the arrays once the matrix has been destroyed and not before.
5585 
5586        The i and j indices are 0 based
5587 
5588        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5589 
5590        This sets local rows and cannot be used to set off-processor values.
5591 
5592        You cannot later use MatSetValues() to change values in this matrix.
5593 
5594 .keywords: matrix, aij, compressed row, sparse, parallel
5595 
5596 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5597           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5598 @*/
5599 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5600 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5601 {
5602   PetscErrorCode ierr;
5603   Mat_MPIAIJ     *maij;
5604 
5605  PetscFunctionBegin;
5606   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5607   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5608   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5609   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5610   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5611   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5612   maij = (Mat_MPIAIJ*) (*mat)->data;
5613   maij->donotstash     = PETSC_TRUE;
5614   (*mat)->preallocated = PETSC_TRUE;
5615 
5616   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5617   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5618 
5619   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5620   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5621 
5622   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5623   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5624   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5625   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5626 
5627   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5628   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5629   PetscFunctionReturn(0);
5630 }
5631 
5632 /*
5633     Special version for direct calls from Fortran
5634 */
5635 #include <petsc-private/fortranimpl.h>
5636 
5637 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5638 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5639 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5640 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5641 #endif
5642 
5643 /* Change these macros so can be used in void function */
5644 #undef CHKERRQ
5645 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5646 #undef SETERRQ2
5647 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5648 #undef SETERRQ3
5649 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5650 #undef SETERRQ
5651 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5652 
5653 EXTERN_C_BEGIN
5654 #undef __FUNCT__
5655 #define __FUNCT__ "matsetvaluesmpiaij_"
5656 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5657 {
5658   Mat             mat = *mmat;
5659   PetscInt        m = *mm, n = *mn;
5660   InsertMode      addv = *maddv;
5661   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5662   PetscScalar     value;
5663   PetscErrorCode  ierr;
5664 
5665   MatCheckPreallocated(mat,1);
5666   if (mat->insertmode == NOT_SET_VALUES) {
5667     mat->insertmode = addv;
5668   }
5669 #if defined(PETSC_USE_DEBUG)
5670   else if (mat->insertmode != addv) {
5671     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5672   }
5673 #endif
5674   {
5675   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5676   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5677   PetscBool       roworiented = aij->roworiented;
5678 
5679   /* Some Variables required in the macro */
5680   Mat             A = aij->A;
5681   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5682   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5683   MatScalar       *aa = a->a;
5684   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5685   Mat             B = aij->B;
5686   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5687   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5688   MatScalar       *ba = b->a;
5689 
5690   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5691   PetscInt        nonew = a->nonew;
5692   MatScalar       *ap1,*ap2;
5693 
5694   PetscFunctionBegin;
5695   for (i=0; i<m; i++) {
5696     if (im[i] < 0) continue;
5697 #if defined(PETSC_USE_DEBUG)
5698     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5699 #endif
5700     if (im[i] >= rstart && im[i] < rend) {
5701       row      = im[i] - rstart;
5702       lastcol1 = -1;
5703       rp1      = aj + ai[row];
5704       ap1      = aa + ai[row];
5705       rmax1    = aimax[row];
5706       nrow1    = ailen[row];
5707       low1     = 0;
5708       high1    = nrow1;
5709       lastcol2 = -1;
5710       rp2      = bj + bi[row];
5711       ap2      = ba + bi[row];
5712       rmax2    = bimax[row];
5713       nrow2    = bilen[row];
5714       low2     = 0;
5715       high2    = nrow2;
5716 
5717       for (j=0; j<n; j++) {
5718         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5719         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5720         if (in[j] >= cstart && in[j] < cend){
5721           col = in[j] - cstart;
5722           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5723         } else if (in[j] < 0) continue;
5724 #if defined(PETSC_USE_DEBUG)
5725         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5726 #endif
5727         else {
5728           if (mat->was_assembled) {
5729             if (!aij->colmap) {
5730               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5731             }
5732 #if defined (PETSC_USE_CTABLE)
5733             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5734 	    col--;
5735 #else
5736             col = aij->colmap[in[j]] - 1;
5737 #endif
5738             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5739               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5740               col =  in[j];
5741               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5742               B = aij->B;
5743               b = (Mat_SeqAIJ*)B->data;
5744               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5745               rp2      = bj + bi[row];
5746               ap2      = ba + bi[row];
5747               rmax2    = bimax[row];
5748               nrow2    = bilen[row];
5749               low2     = 0;
5750               high2    = nrow2;
5751               bm       = aij->B->rmap->n;
5752               ba = b->a;
5753             }
5754           } else col = in[j];
5755           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5756         }
5757       }
5758     } else {
5759       if (!aij->donotstash) {
5760         if (roworiented) {
5761           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5762         } else {
5763           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5764         }
5765       }
5766     }
5767   }}
5768   PetscFunctionReturnVoid();
5769 }
5770 EXTERN_C_END
5771 
5772