xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision db609ea75e24d1bf22e421c6099bd64ada4af056)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
187     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
188     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
189     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
190     rowners[0] = 0;
191     for (i=2; i<=size; i++) {
192       rowners[i] += rowners[i-1];
193     }
194     rstart = rowners[rank];
195     rend   = rowners[rank+1];
196     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
197     if (!rank) {
198       gmata = (Mat_SeqAIJ*) gmat->data;
199       /* send row lengths to all processors */
200       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201       for (i=1; i<size; i++) {
202 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
203       }
204       /* determine number diagonal and off-diagonal counts */
205       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
206       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
207       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
208       jj = 0;
209       for (i=0; i<m; i++) {
210 	for (j=0; j<dlens[i]; j++) {
211           if (gmata->j[jj] < rstart) ld[i]++;
212 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213 	  jj++;
214 	}
215       }
216       /* send column indices to other processes */
217       for (i=1; i<size; i++) {
218 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
220 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222 
223       /* send numerical values to other processes */
224       for (i=1; i<size; i++) {
225         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
227       }
228       gmataa = gmata->a;
229       gmataj = gmata->j;
230 
231     } else {
232       /* receive row lengths */
233       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
234       /* receive column indices */
235       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
236       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
237       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
238       /* determine number diagonal and off-diagonal counts */
239       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
240       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
241       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
242       jj = 0;
243       for (i=0; i<m; i++) {
244 	for (j=0; j<dlens[i]; j++) {
245           if (gmataj[jj] < rstart) ld[i]++;
246 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247 	  jj++;
248 	}
249       }
250       /* receive numerical values */
251       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
253     }
254     /* set preallocation */
255     for (i=0; i<m; i++) {
256       dlens[i] -= olens[i];
257     }
258     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
259     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
260 
261     for (i=0; i<m; i++) {
262       dlens[i] += olens[i];
263     }
264     cnt  = 0;
265     for (i=0; i<m; i++) {
266       row  = rstart + i;
267       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
268       cnt += dlens[i];
269     }
270     if (rank) {
271       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
272     }
273     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
274     ierr = PetscFree(rowners);CHKERRQ(ierr);
275     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276     *inmat = mat;
277   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
278     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280     mat   = *inmat;
281     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
282     if (!rank) {
283       /* send numerical values to other processes */
284       gmata = (Mat_SeqAIJ*) gmat->data;
285       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
286       gmataa = gmata->a;
287       for (i=1; i<size; i++) {
288         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
290       }
291       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292     } else {
293       /* receive numerical values from process 0*/
294       nz   = Ad->nz + Ao->nz;
295       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
296       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
297     }
298     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300     ad = Ad->a;
301     ao = Ao->a;
302     if (mat->rmap->n) {
303       i  = 0;
304       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
305       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
306     }
307     for (i=1; i<mat->rmap->n; i++) {
308       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
309       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
310     }
311     i--;
312     if (mat->rmap->n) {
313       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
314     }
315     if (rank) {
316       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
317     }
318   }
319   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   CHKMEMQ;
322   PetscFunctionReturn(0);
323 }
324 
325 /*
326   Local utility routine that creates a mapping from the global column
327 number to the local number in the off-diagonal part of the local
328 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
329 a slightly higher hash table cost; without it it is not scalable (each processor
330 has an order N integer array but is fast to acess.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
334 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
335 {
336   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
337   PetscErrorCode ierr;
338   PetscInt       n = aij->B->cmap->n,i;
339 
340   PetscFunctionBegin;
341   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
342 #if defined (PETSC_USE_CTABLE)
343   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
344   for (i=0; i<n; i++){
345     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
346   }
347 #else
348   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
349   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
350   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
351   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
352 #endif
353   PetscFunctionReturn(0);
354 }
355 
356 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
357 { \
358     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
359     lastcol1 = col;\
360     while (high1-low1 > 5) { \
361       t = (low1+high1)/2; \
362       if (rp1[t] > col) high1 = t; \
363       else             low1  = t; \
364     } \
365       for (_i=low1; _i<high1; _i++) { \
366         if (rp1[_i] > col) break; \
367         if (rp1[_i] == col) { \
368           if (addv == ADD_VALUES) ap1[_i] += value;   \
369           else                    ap1[_i] = value; \
370           goto a_noinsert; \
371         } \
372       }  \
373       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
374       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
375       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
376       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
377       N = nrow1++ - 1; a->nz++; high1++; \
378       /* shift up all the later entries in this row */ \
379       for (ii=N; ii>=_i; ii--) { \
380         rp1[ii+1] = rp1[ii]; \
381         ap1[ii+1] = ap1[ii]; \
382       } \
383       rp1[_i] = col;  \
384       ap1[_i] = value;  \
385       a_noinsert: ; \
386       ailen[row] = nrow1; \
387 }
388 
389 
390 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
391 { \
392     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
393     lastcol2 = col;\
394     while (high2-low2 > 5) { \
395       t = (low2+high2)/2; \
396       if (rp2[t] > col) high2 = t; \
397       else             low2  = t; \
398     } \
399     for (_i=low2; _i<high2; _i++) {		\
400       if (rp2[_i] > col) break;			\
401       if (rp2[_i] == col) {			      \
402 	if (addv == ADD_VALUES) ap2[_i] += value;     \
403 	else                    ap2[_i] = value;      \
404 	goto b_noinsert;			      \
405       }						      \
406     }							      \
407     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
408     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
409     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
410     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
411     N = nrow2++ - 1; b->nz++; high2++;					\
412     /* shift up all the later entries in this row */			\
413     for (ii=N; ii>=_i; ii--) {						\
414       rp2[ii+1] = rp2[ii];						\
415       ap2[ii+1] = ap2[ii];						\
416     }									\
417     rp2[_i] = col;							\
418     ap2[_i] = value;							\
419     b_noinsert: ;								\
420     bilen[row] = nrow2;							\
421 }
422 
423 #undef __FUNCT__
424 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
425 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
426 {
427   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
428   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
429   PetscErrorCode ierr;
430   PetscInt       l,*garray = mat->garray,diag;
431 
432   PetscFunctionBegin;
433   /* code only works for square matrices A */
434 
435   /* find size of row to the left of the diagonal part */
436   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
437   row  = row - diag;
438   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
439     if (garray[b->j[b->i[row]+l]] > diag) break;
440   }
441   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
442 
443   /* diagonal part */
444   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
445 
446   /* right of diagonal part */
447   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
448   PetscFunctionReturn(0);
449 }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatSetValues_MPIAIJ"
453 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
454 {
455   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
456   PetscScalar    value;
457   PetscErrorCode ierr;
458   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
459   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
460   PetscBool      roworiented = aij->roworiented;
461 
462   /* Some Variables required in the macro */
463   Mat            A = aij->A;
464   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
465   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
466   MatScalar      *aa = a->a;
467   PetscBool      ignorezeroentries = a->ignorezeroentries;
468   Mat            B = aij->B;
469   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
470   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
471   MatScalar      *ba = b->a;
472 
473   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
474   PetscInt       nonew;
475   MatScalar      *ap1,*ap2;
476 
477   PetscFunctionBegin;
478   if (v) PetscValidScalarPointer(v,6);
479   for (i=0; i<m; i++) {
480     if (im[i] < 0) continue;
481 #if defined(PETSC_USE_DEBUG)
482     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
483 #endif
484     if (im[i] >= rstart && im[i] < rend) {
485       row      = im[i] - rstart;
486       lastcol1 = -1;
487       rp1      = aj + ai[row];
488       ap1      = aa + ai[row];
489       rmax1    = aimax[row];
490       nrow1    = ailen[row];
491       low1     = 0;
492       high1    = nrow1;
493       lastcol2 = -1;
494       rp2      = bj + bi[row];
495       ap2      = ba + bi[row];
496       rmax2    = bimax[row];
497       nrow2    = bilen[row];
498       low2     = 0;
499       high2    = nrow2;
500 
501       for (j=0; j<n; j++) {
502         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
503         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
504         if (in[j] >= cstart && in[j] < cend){
505           col = in[j] - cstart;
506           nonew = a->nonew;
507           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
508         } else if (in[j] < 0) continue;
509 #if defined(PETSC_USE_DEBUG)
510         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
511 #endif
512         else {
513           if (mat->was_assembled) {
514             if (!aij->colmap) {
515               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
516             }
517 #if defined (PETSC_USE_CTABLE)
518             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
519 	    col--;
520 #else
521             col = aij->colmap[in[j]] - 1;
522 #endif
523             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
524               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
525               col =  in[j];
526               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
527               B = aij->B;
528               b = (Mat_SeqAIJ*)B->data;
529               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
530               rp2      = bj + bi[row];
531               ap2      = ba + bi[row];
532               rmax2    = bimax[row];
533               nrow2    = bilen[row];
534               low2     = 0;
535               high2    = nrow2;
536               bm       = aij->B->rmap->n;
537               ba = b->a;
538             }
539           } else col = in[j];
540           nonew = b->nonew;
541           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
542         }
543       }
544     } else {
545       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
546       if (!aij->donotstash) {
547         mat->assembled = PETSC_FALSE;
548         if (roworiented) {
549           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
550         } else {
551           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
552         }
553       }
554     }
555   }
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatGetValues_MPIAIJ"
561 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
562 {
563   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
564   PetscErrorCode ierr;
565   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
566   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
567 
568   PetscFunctionBegin;
569   for (i=0; i<m; i++) {
570     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
571     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
572     if (idxm[i] >= rstart && idxm[i] < rend) {
573       row = idxm[i] - rstart;
574       for (j=0; j<n; j++) {
575         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
576         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
577         if (idxn[j] >= cstart && idxn[j] < cend){
578           col = idxn[j] - cstart;
579           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
580         } else {
581           if (!aij->colmap) {
582             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
583           }
584 #if defined (PETSC_USE_CTABLE)
585           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
586           col --;
587 #else
588           col = aij->colmap[idxn[j]] - 1;
589 #endif
590           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
591           else {
592             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
593           }
594         }
595       }
596     } else {
597       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
598     }
599   }
600   PetscFunctionReturn(0);
601 }
602 
603 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
604 
605 #undef __FUNCT__
606 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
607 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
608 {
609   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
610   PetscErrorCode ierr;
611   PetscInt       nstash,reallocs;
612   InsertMode     addv;
613 
614   PetscFunctionBegin;
615   if (aij->donotstash || mat->nooffprocentries) {
616     PetscFunctionReturn(0);
617   }
618 
619   /* make sure all processors are either in INSERTMODE or ADDMODE */
620   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
621   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
622   mat->insertmode = addv; /* in case this processor had no cache */
623 
624   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
625   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
626   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
627   PetscFunctionReturn(0);
628 }
629 
630 #undef __FUNCT__
631 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
632 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
633 {
634   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
635   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
636   PetscErrorCode ierr;
637   PetscMPIInt    n;
638   PetscInt       i,j,rstart,ncols,flg;
639   PetscInt       *row,*col;
640   PetscBool      other_disassembled;
641   PetscScalar    *val;
642   InsertMode     addv = mat->insertmode;
643 
644   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
645   PetscFunctionBegin;
646   if (!aij->donotstash && !mat->nooffprocentries) {
647     while (1) {
648       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
649       if (!flg) break;
650 
651       for (i=0; i<n;) {
652         /* Now identify the consecutive vals belonging to the same row */
653         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
654         if (j < n) ncols = j-i;
655         else       ncols = n-i;
656         /* Now assemble all these values with a single function call */
657         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
658         i = j;
659       }
660     }
661     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
662   }
663   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
664   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
665 
666   /* determine if any processor has disassembled, if so we must
667      also disassemble ourselfs, in order that we may reassemble. */
668   /*
669      if nonzero structure of submatrix B cannot change then we know that
670      no processor disassembled thus we can skip this stuff
671   */
672   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
673     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
674     if (mat->was_assembled && !other_disassembled) {
675       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
676     }
677   }
678   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
679     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
680   }
681   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
682   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
683   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
684   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
685 
686   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
687   aij->rowvalues = 0;
688 
689   /* used by MatAXPY() */
690   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
691   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
692 
693   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
694   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
695   PetscFunctionReturn(0);
696 }
697 
698 #undef __FUNCT__
699 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
700 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
701 {
702   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
703   PetscErrorCode ierr;
704 
705   PetscFunctionBegin;
706   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
707   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatZeroRows_MPIAIJ"
713 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
714 {
715   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode    ierr;
717   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
718   PetscInt          i,*owners = A->rmap->range;
719   PetscInt          *nprocs,j,idx,nsends,row;
720   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
721   PetscInt          *rvalues,count,base,slen,*source;
722   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
723   MPI_Comm          comm = ((PetscObject)A)->comm;
724   MPI_Request       *send_waits,*recv_waits;
725   MPI_Status        recv_status,*send_status;
726   const PetscScalar *xx;
727   PetscScalar       *bb;
728 #if defined(PETSC_DEBUG)
729   PetscBool      found = PETSC_FALSE;
730 #endif
731 
732   PetscFunctionBegin;
733   /*  first count number of contributors to each processor */
734   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
735   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
736   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
737   j = 0;
738   for (i=0; i<N; i++) {
739     if (lastidx > (idx = rows[i])) j = 0;
740     lastidx = idx;
741     for (; j<size; j++) {
742       if (idx >= owners[j] && idx < owners[j+1]) {
743         nprocs[2*j]++;
744         nprocs[2*j+1] = 1;
745         owner[i] = j;
746 #if defined(PETSC_DEBUG)
747         found = PETSC_TRUE;
748 #endif
749         break;
750       }
751     }
752 #if defined(PETSC_DEBUG)
753     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
754     found = PETSC_FALSE;
755 #endif
756   }
757   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
758 
759   if (A->nooffproczerorows) {
760     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
761     nrecvs = nsends;
762     nmax   = N;
763   } else {
764     /* inform other processors of number of messages and max length*/
765     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
766   }
767 
768   /* post receives:   */
769   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
770   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
771   for (i=0; i<nrecvs; i++) {
772     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
773   }
774 
775   /* do sends:
776       1) starts[i] gives the starting index in svalues for stuff going to
777          the ith processor
778   */
779   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
780   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
781   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
782   starts[0] = 0;
783   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
784   for (i=0; i<N; i++) {
785     svalues[starts[owner[i]]++] = rows[i];
786   }
787 
788   starts[0] = 0;
789   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
790   count = 0;
791   for (i=0; i<size; i++) {
792     if (nprocs[2*i+1]) {
793       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
794     }
795   }
796   ierr = PetscFree(starts);CHKERRQ(ierr);
797 
798   base = owners[rank];
799 
800   /*  wait on receives */
801   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
802   count  = nrecvs; slen = 0;
803   while (count) {
804     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
805     /* unpack receives into our local space */
806     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
807     source[imdex]  = recv_status.MPI_SOURCE;
808     lens[imdex]    = n;
809     slen          += n;
810     count--;
811   }
812   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
813 
814   /* move the data into the send scatter */
815   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
816   count = 0;
817   for (i=0; i<nrecvs; i++) {
818     values = rvalues + i*nmax;
819     for (j=0; j<lens[i]; j++) {
820       lrows[count++] = values[j] - base;
821     }
822   }
823   ierr = PetscFree(rvalues);CHKERRQ(ierr);
824   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
825   ierr = PetscFree(owner);CHKERRQ(ierr);
826   ierr = PetscFree(nprocs);CHKERRQ(ierr);
827 
828   /* fix right hand side if needed */
829   if (x && b) {
830     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
831     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
832     for (i=0; i<slen; i++) {
833       bb[lrows[i]] = diag*xx[lrows[i]];
834     }
835     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
836     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
837   }
838   /*
839         Zero the required rows. If the "diagonal block" of the matrix
840      is square and the user wishes to set the diagonal we use separate
841      code so that MatSetValues() is not called for each diagonal allocating
842      new memory, thus calling lots of mallocs and slowing things down.
843 
844   */
845   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
846   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
847   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
848     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
849   } else if (diag != 0.0) {
850     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
851     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
852       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
853 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
854     }
855     for (i = 0; i < slen; i++) {
856       row  = lrows[i] + rstart;
857       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
858     }
859     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
860     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
861   } else {
862     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
863   }
864   ierr = PetscFree(lrows);CHKERRQ(ierr);
865 
866   /* wait on sends */
867   if (nsends) {
868     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
869     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
870     ierr = PetscFree(send_status);CHKERRQ(ierr);
871   }
872   ierr = PetscFree(send_waits);CHKERRQ(ierr);
873   ierr = PetscFree(svalues);CHKERRQ(ierr);
874   PetscFunctionReturn(0);
875 }
876 
877 #undef __FUNCT__
878 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
879 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
880 {
881   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
882   PetscErrorCode    ierr;
883   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
884   PetscInt          i,*owners = A->rmap->range;
885   PetscInt          *nprocs,j,idx,nsends;
886   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
887   PetscInt          *rvalues,count,base,slen,*source;
888   PetscInt          *lens,*lrows,*values,m;
889   MPI_Comm          comm = ((PetscObject)A)->comm;
890   MPI_Request       *send_waits,*recv_waits;
891   MPI_Status        recv_status,*send_status;
892   const PetscScalar *xx;
893   PetscScalar       *bb,*mask;
894   Vec               xmask,lmask;
895   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
896   const PetscInt    *aj, *ii,*ridx;
897   PetscScalar       *aa;
898 #if defined(PETSC_DEBUG)
899   PetscBool         found = PETSC_FALSE;
900 #endif
901 
902   PetscFunctionBegin;
903   /*  first count number of contributors to each processor */
904   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
905   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
906   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
907   j = 0;
908   for (i=0; i<N; i++) {
909     if (lastidx > (idx = rows[i])) j = 0;
910     lastidx = idx;
911     for (; j<size; j++) {
912       if (idx >= owners[j] && idx < owners[j+1]) {
913         nprocs[2*j]++;
914         nprocs[2*j+1] = 1;
915         owner[i] = j;
916 #if defined(PETSC_DEBUG)
917         found = PETSC_TRUE;
918 #endif
919         break;
920       }
921     }
922 #if defined(PETSC_DEBUG)
923     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
924     found = PETSC_FALSE;
925 #endif
926   }
927   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
928 
929   /* inform other processors of number of messages and max length*/
930   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
931 
932   /* post receives:   */
933   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
934   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
935   for (i=0; i<nrecvs; i++) {
936     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
937   }
938 
939   /* do sends:
940       1) starts[i] gives the starting index in svalues for stuff going to
941          the ith processor
942   */
943   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
944   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
945   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
946   starts[0] = 0;
947   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
948   for (i=0; i<N; i++) {
949     svalues[starts[owner[i]]++] = rows[i];
950   }
951 
952   starts[0] = 0;
953   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
954   count = 0;
955   for (i=0; i<size; i++) {
956     if (nprocs[2*i+1]) {
957       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
958     }
959   }
960   ierr = PetscFree(starts);CHKERRQ(ierr);
961 
962   base = owners[rank];
963 
964   /*  wait on receives */
965   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
966   count  = nrecvs; slen = 0;
967   while (count) {
968     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
969     /* unpack receives into our local space */
970     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
971     source[imdex]  = recv_status.MPI_SOURCE;
972     lens[imdex]    = n;
973     slen          += n;
974     count--;
975   }
976   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
977 
978   /* move the data into the send scatter */
979   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
980   count = 0;
981   for (i=0; i<nrecvs; i++) {
982     values = rvalues + i*nmax;
983     for (j=0; j<lens[i]; j++) {
984       lrows[count++] = values[j] - base;
985     }
986   }
987   ierr = PetscFree(rvalues);CHKERRQ(ierr);
988   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
989   ierr = PetscFree(owner);CHKERRQ(ierr);
990   ierr = PetscFree(nprocs);CHKERRQ(ierr);
991   /* lrows are the local rows to be zeroed, slen is the number of local rows */
992 
993   /* zero diagonal part of matrix */
994   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
995 
996   /* handle off diagonal part of matrix */
997   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
998   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
999   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1000   for (i=0; i<slen; i++) {
1001     bb[lrows[i]] = 1;
1002   }
1003   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1004   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1007   if (x) {
1008     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1009     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1010     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1011     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1012   }
1013   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1014 
1015   /* remove zeroed rows of off diagonal matrix */
1016   ii = aij->i;
1017   for (i=0; i<slen; i++) {
1018     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1019   }
1020 
1021   /* loop over all elements of off process part of matrix zeroing removed columns*/
1022   if (aij->compressedrow.use){
1023     m    = aij->compressedrow.nrows;
1024     ii   = aij->compressedrow.i;
1025     ridx = aij->compressedrow.rindex;
1026     for (i=0; i<m; i++){
1027       n   = ii[i+1] - ii[i];
1028       aj  = aij->j + ii[i];
1029       aa  = aij->a + ii[i];
1030 
1031       for (j=0; j<n; j++) {
1032         if (PetscAbsScalar(mask[*aj])) {
1033           if (b) bb[*ridx] -= *aa*xx[*aj];
1034           *aa        = 0.0;
1035         }
1036         aa++;
1037         aj++;
1038       }
1039       ridx++;
1040     }
1041   } else { /* do not use compressed row format */
1042     m = l->B->rmap->n;
1043     for (i=0; i<m; i++) {
1044       n   = ii[i+1] - ii[i];
1045       aj  = aij->j + ii[i];
1046       aa  = aij->a + ii[i];
1047       for (j=0; j<n; j++) {
1048         if (PetscAbsScalar(mask[*aj])) {
1049           if (b) bb[i] -= *aa*xx[*aj];
1050           *aa    = 0.0;
1051         }
1052         aa++;
1053         aj++;
1054       }
1055     }
1056   }
1057   if (x) {
1058     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1059     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1060   }
1061   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1062   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1063   ierr = PetscFree(lrows);CHKERRQ(ierr);
1064 
1065   /* wait on sends */
1066   if (nsends) {
1067     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1068     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1069     ierr = PetscFree(send_status);CHKERRQ(ierr);
1070   }
1071   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1072   ierr = PetscFree(svalues);CHKERRQ(ierr);
1073 
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatMult_MPIAIJ"
1079 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1080 {
1081   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1082   PetscErrorCode ierr;
1083   PetscInt       nt;
1084 
1085   PetscFunctionBegin;
1086   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1087   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1088   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1089   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1090   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1091   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1097 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1098 {
1099   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1100   PetscErrorCode ierr;
1101 
1102   PetscFunctionBegin;
1103   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1109 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1110 {
1111   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1112   PetscErrorCode ierr;
1113 
1114   PetscFunctionBegin;
1115   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1116   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1117   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1118   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1124 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1125 {
1126   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1127   PetscErrorCode ierr;
1128   PetscBool      merged;
1129 
1130   PetscFunctionBegin;
1131   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1132   /* do nondiagonal part */
1133   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1134   if (!merged) {
1135     /* send it on its way */
1136     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1137     /* do local part */
1138     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1139     /* receive remote parts: note this assumes the values are not actually */
1140     /* added in yy until the next line, */
1141     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1142   } else {
1143     /* do local part */
1144     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1145     /* send it on its way */
1146     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1147     /* values actually were received in the Begin() but we need to call this nop */
1148     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1149   }
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 EXTERN_C_BEGIN
1154 #undef __FUNCT__
1155 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1156 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1157 {
1158   MPI_Comm       comm;
1159   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1160   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1161   IS             Me,Notme;
1162   PetscErrorCode ierr;
1163   PetscInt       M,N,first,last,*notme,i;
1164   PetscMPIInt    size;
1165 
1166   PetscFunctionBegin;
1167 
1168   /* Easy test: symmetric diagonal block */
1169   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1170   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1171   if (!*f) PetscFunctionReturn(0);
1172   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1173   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1174   if (size == 1) PetscFunctionReturn(0);
1175 
1176   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1177   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1178   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1179   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1180   for (i=0; i<first; i++) notme[i] = i;
1181   for (i=last; i<M; i++) notme[i-last+first] = i;
1182   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1183   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1184   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1185   Aoff = Aoffs[0];
1186   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1187   Boff = Boffs[0];
1188   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1189   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1190   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1191   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1192   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1193   ierr = PetscFree(notme);CHKERRQ(ierr);
1194    PetscFunctionReturn(0);
1195 }
1196 EXTERN_C_END
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1200 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1201 {
1202   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1203   PetscErrorCode ierr;
1204 
1205   PetscFunctionBegin;
1206   /* do nondiagonal part */
1207   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1208   /* send it on its way */
1209   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1210   /* do local part */
1211   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1212   /* receive remote parts */
1213   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 /*
1218   This only works correctly for square matrices where the subblock A->A is the
1219    diagonal block
1220 */
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1223 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1224 {
1225   PetscErrorCode ierr;
1226   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1227 
1228   PetscFunctionBegin;
1229   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1230   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1231   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatScale_MPIAIJ"
1237 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1238 {
1239   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1240   PetscErrorCode ierr;
1241 
1242   PetscFunctionBegin;
1243   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1244   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 #undef __FUNCT__
1249 #define __FUNCT__ "MatDestroy_MPIAIJ"
1250 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1251 {
1252   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1253   PetscErrorCode ierr;
1254 
1255   PetscFunctionBegin;
1256 #if defined(PETSC_USE_LOG)
1257   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1258 #endif
1259   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1260   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1261   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1262   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1263 #if defined (PETSC_USE_CTABLE)
1264   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1265 #else
1266   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1267 #endif
1268   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1269   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1270   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1271   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1272   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1273   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1274 
1275   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1284   PetscFunctionReturn(0);
1285 }
1286 
1287 #undef __FUNCT__
1288 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1289 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1290 {
1291   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1292   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1293   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1294   PetscErrorCode    ierr;
1295   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1296   int               fd;
1297   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1298   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1299   PetscScalar       *column_values;
1300   PetscInt          message_count,flowcontrolcount;
1301 
1302   PetscFunctionBegin;
1303   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1304   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1305   nz   = A->nz + B->nz;
1306   if (!rank) {
1307     header[0] = MAT_FILE_CLASSID;
1308     header[1] = mat->rmap->N;
1309     header[2] = mat->cmap->N;
1310     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1311     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1312     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1313     /* get largest number of rows any processor has */
1314     rlen = mat->rmap->n;
1315     range = mat->rmap->range;
1316     for (i=1; i<size; i++) {
1317       rlen = PetscMax(rlen,range[i+1] - range[i]);
1318     }
1319   } else {
1320     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1321     rlen = mat->rmap->n;
1322   }
1323 
1324   /* load up the local row counts */
1325   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1326   for (i=0; i<mat->rmap->n; i++) {
1327     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1328   }
1329 
1330   /* store the row lengths to the file */
1331   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1332   if (!rank) {
1333     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1334     for (i=1; i<size; i++) {
1335       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1336       rlen = range[i+1] - range[i];
1337       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1338       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1339     }
1340     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1341   } else {
1342     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1343     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1344     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1345   }
1346   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1347 
1348   /* load up the local column indices */
1349   nzmax = nz; /* )th processor needs space a largest processor needs */
1350   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1351   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1352   cnt  = 0;
1353   for (i=0; i<mat->rmap->n; i++) {
1354     for (j=B->i[i]; j<B->i[i+1]; j++) {
1355       if ( (col = garray[B->j[j]]) > cstart) break;
1356       column_indices[cnt++] = col;
1357     }
1358     for (k=A->i[i]; k<A->i[i+1]; k++) {
1359       column_indices[cnt++] = A->j[k] + cstart;
1360     }
1361     for (; j<B->i[i+1]; j++) {
1362       column_indices[cnt++] = garray[B->j[j]];
1363     }
1364   }
1365   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1366 
1367   /* store the column indices to the file */
1368    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1369   if (!rank) {
1370     MPI_Status status;
1371     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1372     for (i=1; i<size; i++) {
1373       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1374       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1375       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1376       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1377       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1378     }
1379      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1380   } else {
1381     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1382     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1383     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1384     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1385   }
1386   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1387 
1388   /* load up the local column values */
1389   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1390   cnt  = 0;
1391   for (i=0; i<mat->rmap->n; i++) {
1392     for (j=B->i[i]; j<B->i[i+1]; j++) {
1393       if ( garray[B->j[j]] > cstart) break;
1394       column_values[cnt++] = B->a[j];
1395     }
1396     for (k=A->i[i]; k<A->i[i+1]; k++) {
1397       column_values[cnt++] = A->a[k];
1398     }
1399     for (; j<B->i[i+1]; j++) {
1400       column_values[cnt++] = B->a[j];
1401     }
1402   }
1403   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1404 
1405   /* store the column values to the file */
1406    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1407   if (!rank) {
1408     MPI_Status status;
1409     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1410     for (i=1; i<size; i++) {
1411        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1412       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1413       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1414       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1415       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1416     }
1417     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1418   } else {
1419     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1420     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1421     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1422     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1423   }
1424   ierr = PetscFree(column_values);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1430 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1431 {
1432   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1433   PetscErrorCode    ierr;
1434   PetscMPIInt       rank = aij->rank,size = aij->size;
1435   PetscBool         isdraw,iascii,isbinary;
1436   PetscViewer       sviewer;
1437   PetscViewerFormat format;
1438 
1439   PetscFunctionBegin;
1440   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1441   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1442   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1443   if (iascii) {
1444     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1445     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1446       MatInfo    info;
1447       PetscBool  inodes;
1448 
1449       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1450       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1451       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1452       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1453       if (!inodes) {
1454         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1455 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1456       } else {
1457         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1458 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1459       }
1460       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1461       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1462       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1463       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1464       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1465       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1467       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1468       PetscFunctionReturn(0);
1469     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1470       PetscInt   inodecount,inodelimit,*inodes;
1471       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1472       if (inodes) {
1473         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1474       } else {
1475         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1476       }
1477       PetscFunctionReturn(0);
1478     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1479       PetscFunctionReturn(0);
1480     }
1481   } else if (isbinary) {
1482     if (size == 1) {
1483       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1484       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1485     } else {
1486       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1487     }
1488     PetscFunctionReturn(0);
1489   } else if (isdraw) {
1490     PetscDraw  draw;
1491     PetscBool  isnull;
1492     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1493     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1494   }
1495 
1496   if (size == 1) {
1497     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1498     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1499   } else {
1500     /* assemble the entire matrix onto first processor. */
1501     Mat         A;
1502     Mat_SeqAIJ  *Aloc;
1503     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1504     MatScalar   *a;
1505 
1506     if (mat->rmap->N > 1024) {
1507       PetscBool  flg = PETSC_FALSE;
1508 
1509       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1510       if (!flg) {
1511         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1512       }
1513     }
1514 
1515     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1516     if (!rank) {
1517       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1518     } else {
1519       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1520     }
1521     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1522     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1523     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1524     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1525     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1526 
1527     /* copy over the A part */
1528     Aloc = (Mat_SeqAIJ*)aij->A->data;
1529     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1530     row = mat->rmap->rstart;
1531     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1532     for (i=0; i<m; i++) {
1533       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1534       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1535     }
1536     aj = Aloc->j;
1537     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1538 
1539     /* copy over the B part */
1540     Aloc = (Mat_SeqAIJ*)aij->B->data;
1541     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1542     row  = mat->rmap->rstart;
1543     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1544     ct   = cols;
1545     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1546     for (i=0; i<m; i++) {
1547       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1548       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1549     }
1550     ierr = PetscFree(ct);CHKERRQ(ierr);
1551     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1552     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1553     /*
1554        Everyone has to call to draw the matrix since the graphics waits are
1555        synchronized across all processors that share the PetscDraw object
1556     */
1557     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1558     if (!rank) {
1559       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1560       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1561       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1562       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1563     }
1564     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1565     ierr = MatDestroy(&A);CHKERRQ(ierr);
1566   }
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatView_MPIAIJ"
1572 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1573 {
1574   PetscErrorCode ierr;
1575   PetscBool      iascii,isdraw,issocket,isbinary;
1576 
1577   PetscFunctionBegin;
1578   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1579   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1580   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1581   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1582   if (iascii || isdraw || isbinary || issocket) {
1583     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1584   } else {
1585     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1586   }
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 #undef __FUNCT__
1591 #define __FUNCT__ "MatSOR_MPIAIJ"
1592 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1593 {
1594   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1595   PetscErrorCode ierr;
1596   Vec            bb1 = 0;
1597   PetscBool      hasop;
1598 
1599   PetscFunctionBegin;
1600   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1601     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1602   }
1603 
1604   if (flag == SOR_APPLY_UPPER) {
1605     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1606     PetscFunctionReturn(0);
1607   }
1608 
1609   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1610     if (flag & SOR_ZERO_INITIAL_GUESS) {
1611       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1612       its--;
1613     }
1614 
1615     while (its--) {
1616       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1617       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1618 
1619       /* update rhs: bb1 = bb - B*x */
1620       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1621       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1622 
1623       /* local sweep */
1624       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1625     }
1626   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1627     if (flag & SOR_ZERO_INITIAL_GUESS) {
1628       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1629       its--;
1630     }
1631     while (its--) {
1632       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1633       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1634 
1635       /* update rhs: bb1 = bb - B*x */
1636       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1637       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1638 
1639       /* local sweep */
1640       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1641     }
1642   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1643     if (flag & SOR_ZERO_INITIAL_GUESS) {
1644       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1645       its--;
1646     }
1647     while (its--) {
1648       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1649       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1650 
1651       /* update rhs: bb1 = bb - B*x */
1652       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1653       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1654 
1655       /* local sweep */
1656       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1657     }
1658   }  else if (flag & SOR_EISENSTAT) {
1659     Vec         xx1;
1660 
1661     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1662     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1663 
1664     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1665     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1666     if (!mat->diag) {
1667       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1668       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1669     }
1670     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1671     if (hasop) {
1672       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1673     } else {
1674       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1675     }
1676     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1677 
1678     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1679 
1680     /* local sweep */
1681     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1682     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1683     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1684   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1685 
1686   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1687   PetscFunctionReturn(0);
1688 }
1689 
1690 #undef __FUNCT__
1691 #define __FUNCT__ "MatPermute_MPIAIJ"
1692 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1693 {
1694   MPI_Comm       comm;
1695   PetscInt       first,local_rowsize,local_colsize;
1696   const PetscInt *rows;
1697   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1698   PetscErrorCode ierr;
1699 
1700   PetscFunctionBegin;
1701   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1702   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1703   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1704   /* collect the global row permutation and invert it */
1705   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1706   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1707   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1708   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1709   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1710   /* get the local target indices */
1711   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1712   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1713   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1714   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1715   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1716   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1717   /* the column permutation is so much easier;
1718      make a local version of 'colp' and invert it */
1719   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1720   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1721   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1722   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1723   /* now we just get the submatrix */
1724   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1725   /* clean up */
1726   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1727   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1728   PetscFunctionReturn(0);
1729 }
1730 
1731 #undef __FUNCT__
1732 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1733 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1734 {
1735   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1736   Mat            A = mat->A,B = mat->B;
1737   PetscErrorCode ierr;
1738   PetscReal      isend[5],irecv[5];
1739 
1740   PetscFunctionBegin;
1741   info->block_size     = 1.0;
1742   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1743   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1744   isend[3] = info->memory;  isend[4] = info->mallocs;
1745   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1746   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1747   isend[3] += info->memory;  isend[4] += info->mallocs;
1748   if (flag == MAT_LOCAL) {
1749     info->nz_used      = isend[0];
1750     info->nz_allocated = isend[1];
1751     info->nz_unneeded  = isend[2];
1752     info->memory       = isend[3];
1753     info->mallocs      = isend[4];
1754   } else if (flag == MAT_GLOBAL_MAX) {
1755     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1756     info->nz_used      = irecv[0];
1757     info->nz_allocated = irecv[1];
1758     info->nz_unneeded  = irecv[2];
1759     info->memory       = irecv[3];
1760     info->mallocs      = irecv[4];
1761   } else if (flag == MAT_GLOBAL_SUM) {
1762     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1763     info->nz_used      = irecv[0];
1764     info->nz_allocated = irecv[1];
1765     info->nz_unneeded  = irecv[2];
1766     info->memory       = irecv[3];
1767     info->mallocs      = irecv[4];
1768   }
1769   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1770   info->fill_ratio_needed = 0;
1771   info->factor_mallocs    = 0;
1772 
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 #undef __FUNCT__
1777 #define __FUNCT__ "MatSetOption_MPIAIJ"
1778 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1779 {
1780   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1781   PetscErrorCode ierr;
1782 
1783   PetscFunctionBegin;
1784   switch (op) {
1785   case MAT_NEW_NONZERO_LOCATIONS:
1786   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1787   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1788   case MAT_KEEP_NONZERO_PATTERN:
1789   case MAT_NEW_NONZERO_LOCATION_ERR:
1790   case MAT_USE_INODES:
1791   case MAT_IGNORE_ZERO_ENTRIES:
1792     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1793     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1794     break;
1795   case MAT_ROW_ORIENTED:
1796     a->roworiented = flg;
1797     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1798     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1799     break;
1800   case MAT_NEW_DIAGONALS:
1801     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1802     break;
1803   case MAT_IGNORE_OFF_PROC_ENTRIES:
1804     a->donotstash = flg;
1805     break;
1806   case MAT_SPD:
1807     A->spd_set                         = PETSC_TRUE;
1808     A->spd                             = flg;
1809     if (flg) {
1810       A->symmetric                     = PETSC_TRUE;
1811       A->structurally_symmetric        = PETSC_TRUE;
1812       A->symmetric_set                 = PETSC_TRUE;
1813       A->structurally_symmetric_set    = PETSC_TRUE;
1814     }
1815     break;
1816   case MAT_SYMMETRIC:
1817     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1818     break;
1819   case MAT_STRUCTURALLY_SYMMETRIC:
1820     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1821     break;
1822   case MAT_HERMITIAN:
1823     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1824     break;
1825   case MAT_SYMMETRY_ETERNAL:
1826     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1827     break;
1828   default:
1829     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1830   }
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 #undef __FUNCT__
1835 #define __FUNCT__ "MatGetRow_MPIAIJ"
1836 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1837 {
1838   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1839   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1840   PetscErrorCode ierr;
1841   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1842   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1843   PetscInt       *cmap,*idx_p;
1844 
1845   PetscFunctionBegin;
1846   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1847   mat->getrowactive = PETSC_TRUE;
1848 
1849   if (!mat->rowvalues && (idx || v)) {
1850     /*
1851         allocate enough space to hold information from the longest row.
1852     */
1853     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1854     PetscInt   max = 1,tmp;
1855     for (i=0; i<matin->rmap->n; i++) {
1856       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1857       if (max < tmp) { max = tmp; }
1858     }
1859     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1860   }
1861 
1862   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1863   lrow = row - rstart;
1864 
1865   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1866   if (!v)   {pvA = 0; pvB = 0;}
1867   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1868   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1869   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1870   nztot = nzA + nzB;
1871 
1872   cmap  = mat->garray;
1873   if (v  || idx) {
1874     if (nztot) {
1875       /* Sort by increasing column numbers, assuming A and B already sorted */
1876       PetscInt imark = -1;
1877       if (v) {
1878         *v = v_p = mat->rowvalues;
1879         for (i=0; i<nzB; i++) {
1880           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1881           else break;
1882         }
1883         imark = i;
1884         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1885         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1886       }
1887       if (idx) {
1888         *idx = idx_p = mat->rowindices;
1889         if (imark > -1) {
1890           for (i=0; i<imark; i++) {
1891             idx_p[i] = cmap[cworkB[i]];
1892           }
1893         } else {
1894           for (i=0; i<nzB; i++) {
1895             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1896             else break;
1897           }
1898           imark = i;
1899         }
1900         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1901         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1902       }
1903     } else {
1904       if (idx) *idx = 0;
1905       if (v)   *v   = 0;
1906     }
1907   }
1908   *nz = nztot;
1909   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1910   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1916 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1917 {
1918   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1919 
1920   PetscFunctionBegin;
1921   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1922   aij->getrowactive = PETSC_FALSE;
1923   PetscFunctionReturn(0);
1924 }
1925 
1926 #undef __FUNCT__
1927 #define __FUNCT__ "MatNorm_MPIAIJ"
1928 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1929 {
1930   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1931   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1932   PetscErrorCode ierr;
1933   PetscInt       i,j,cstart = mat->cmap->rstart;
1934   PetscReal      sum = 0.0;
1935   MatScalar      *v;
1936 
1937   PetscFunctionBegin;
1938   if (aij->size == 1) {
1939     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1940   } else {
1941     if (type == NORM_FROBENIUS) {
1942       v = amat->a;
1943       for (i=0; i<amat->nz; i++) {
1944 #if defined(PETSC_USE_COMPLEX)
1945         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1946 #else
1947         sum += (*v)*(*v); v++;
1948 #endif
1949       }
1950       v = bmat->a;
1951       for (i=0; i<bmat->nz; i++) {
1952 #if defined(PETSC_USE_COMPLEX)
1953         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1954 #else
1955         sum += (*v)*(*v); v++;
1956 #endif
1957       }
1958       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1959       *norm = PetscSqrtReal(*norm);
1960     } else if (type == NORM_1) { /* max column norm */
1961       PetscReal *tmp,*tmp2;
1962       PetscInt  *jj,*garray = aij->garray;
1963       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1964       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1965       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1966       *norm = 0.0;
1967       v = amat->a; jj = amat->j;
1968       for (j=0; j<amat->nz; j++) {
1969         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1970       }
1971       v = bmat->a; jj = bmat->j;
1972       for (j=0; j<bmat->nz; j++) {
1973         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1974       }
1975       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1976       for (j=0; j<mat->cmap->N; j++) {
1977         if (tmp2[j] > *norm) *norm = tmp2[j];
1978       }
1979       ierr = PetscFree(tmp);CHKERRQ(ierr);
1980       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1981     } else if (type == NORM_INFINITY) { /* max row norm */
1982       PetscReal ntemp = 0.0;
1983       for (j=0; j<aij->A->rmap->n; j++) {
1984         v = amat->a + amat->i[j];
1985         sum = 0.0;
1986         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1987           sum += PetscAbsScalar(*v); v++;
1988         }
1989         v = bmat->a + bmat->i[j];
1990         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1991           sum += PetscAbsScalar(*v); v++;
1992         }
1993         if (sum > ntemp) ntemp = sum;
1994       }
1995       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1996     } else {
1997       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1998     }
1999   }
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 #undef __FUNCT__
2004 #define __FUNCT__ "MatTranspose_MPIAIJ"
2005 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2006 {
2007   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2008   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2009   PetscErrorCode ierr;
2010   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2011   PetscInt       cstart=A->cmap->rstart,ncol;
2012   Mat            B;
2013   MatScalar      *array;
2014 
2015   PetscFunctionBegin;
2016   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2017 
2018   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2019   ai = Aloc->i; aj = Aloc->j;
2020   bi = Bloc->i; bj = Bloc->j;
2021   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2022     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2023     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2024     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2025     for (i=0; i<ai[ma]; i++){
2026       d_nnz[aj[i]] ++;
2027       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2028     }
2029 
2030     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2031     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2032     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2033     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2034     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2035     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2036   } else {
2037     B = *matout;
2038   }
2039 
2040   /* copy over the A part */
2041   array = Aloc->a;
2042   row = A->rmap->rstart;
2043   for (i=0; i<ma; i++) {
2044     ncol = ai[i+1]-ai[i];
2045     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2046     row++; array += ncol; aj += ncol;
2047   }
2048   aj = Aloc->j;
2049   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2050 
2051   /* copy over the B part */
2052   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2053   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2054   array = Bloc->a;
2055   row = A->rmap->rstart;
2056   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2057   cols_tmp = cols;
2058   for (i=0; i<mb; i++) {
2059     ncol = bi[i+1]-bi[i];
2060     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2061     row++; array += ncol; cols_tmp += ncol;
2062   }
2063   ierr = PetscFree(cols);CHKERRQ(ierr);
2064 
2065   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2066   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2067   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2068     *matout = B;
2069   } else {
2070     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2071   }
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 #undef __FUNCT__
2076 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2077 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2078 {
2079   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2080   Mat            a = aij->A,b = aij->B;
2081   PetscErrorCode ierr;
2082   PetscInt       s1,s2,s3;
2083 
2084   PetscFunctionBegin;
2085   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2086   if (rr) {
2087     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2088     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2089     /* Overlap communication with computation. */
2090     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2091   }
2092   if (ll) {
2093     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2094     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2095     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2096   }
2097   /* scale  the diagonal block */
2098   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2099 
2100   if (rr) {
2101     /* Do a scatter end and then right scale the off-diagonal block */
2102     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2103     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2104   }
2105 
2106   PetscFunctionReturn(0);
2107 }
2108 
2109 #undef __FUNCT__
2110 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2111 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2112 {
2113   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2114   PetscErrorCode ierr;
2115 
2116   PetscFunctionBegin;
2117   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2118   PetscFunctionReturn(0);
2119 }
2120 
2121 #undef __FUNCT__
2122 #define __FUNCT__ "MatEqual_MPIAIJ"
2123 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2124 {
2125   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2126   Mat            a,b,c,d;
2127   PetscBool      flg;
2128   PetscErrorCode ierr;
2129 
2130   PetscFunctionBegin;
2131   a = matA->A; b = matA->B;
2132   c = matB->A; d = matB->B;
2133 
2134   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2135   if (flg) {
2136     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2137   }
2138   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 #undef __FUNCT__
2143 #define __FUNCT__ "MatCopy_MPIAIJ"
2144 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2145 {
2146   PetscErrorCode ierr;
2147   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2148   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2149 
2150   PetscFunctionBegin;
2151   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2152   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2153     /* because of the column compression in the off-processor part of the matrix a->B,
2154        the number of columns in a->B and b->B may be different, hence we cannot call
2155        the MatCopy() directly on the two parts. If need be, we can provide a more
2156        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2157        then copying the submatrices */
2158     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2159   } else {
2160     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2161     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2162   }
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 #undef __FUNCT__
2167 #define __FUNCT__ "MatSetUp_MPIAIJ"
2168 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2169 {
2170   PetscErrorCode ierr;
2171 
2172   PetscFunctionBegin;
2173   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 #undef __FUNCT__
2178 #define __FUNCT__ "MatAXPY_MPIAIJ"
2179 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2180 {
2181   PetscErrorCode ierr;
2182   PetscInt       i;
2183   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2184   PetscBLASInt   bnz,one=1;
2185   Mat_SeqAIJ     *x,*y;
2186 
2187   PetscFunctionBegin;
2188   if (str == SAME_NONZERO_PATTERN) {
2189     PetscScalar alpha = a;
2190     x = (Mat_SeqAIJ *)xx->A->data;
2191     y = (Mat_SeqAIJ *)yy->A->data;
2192     bnz = PetscBLASIntCast(x->nz);
2193     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2194     x = (Mat_SeqAIJ *)xx->B->data;
2195     y = (Mat_SeqAIJ *)yy->B->data;
2196     bnz = PetscBLASIntCast(x->nz);
2197     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2198   } else if (str == SUBSET_NONZERO_PATTERN) {
2199     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2200 
2201     x = (Mat_SeqAIJ *)xx->B->data;
2202     y = (Mat_SeqAIJ *)yy->B->data;
2203     if (y->xtoy && y->XtoY != xx->B) {
2204       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2205       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2206     }
2207     if (!y->xtoy) { /* get xtoy */
2208       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2209       y->XtoY = xx->B;
2210       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2211     }
2212     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2213   } else {
2214     Mat B;
2215     PetscInt *nnz_d,*nnz_o;
2216     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2217     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2218     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2219     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2220     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2221     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2222     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2223     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2224     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2225     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2226     ierr = MatHeaderReplace(Y,B);
2227     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2228     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2229   }
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2234 
2235 #undef __FUNCT__
2236 #define __FUNCT__ "MatConjugate_MPIAIJ"
2237 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2238 {
2239 #if defined(PETSC_USE_COMPLEX)
2240   PetscErrorCode ierr;
2241   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2242 
2243   PetscFunctionBegin;
2244   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2245   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2246 #else
2247   PetscFunctionBegin;
2248 #endif
2249   PetscFunctionReturn(0);
2250 }
2251 
2252 #undef __FUNCT__
2253 #define __FUNCT__ "MatRealPart_MPIAIJ"
2254 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2255 {
2256   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2257   PetscErrorCode ierr;
2258 
2259   PetscFunctionBegin;
2260   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2261   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2262   PetscFunctionReturn(0);
2263 }
2264 
2265 #undef __FUNCT__
2266 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2267 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2268 {
2269   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2270   PetscErrorCode ierr;
2271 
2272   PetscFunctionBegin;
2273   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2274   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2275   PetscFunctionReturn(0);
2276 }
2277 
2278 #ifdef PETSC_HAVE_PBGL
2279 
2280 #include <boost/parallel/mpi/bsp_process_group.hpp>
2281 #include <boost/graph/distributed/ilu_default_graph.hpp>
2282 #include <boost/graph/distributed/ilu_0_block.hpp>
2283 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2284 #include <boost/graph/distributed/petsc/interface.hpp>
2285 #include <boost/multi_array.hpp>
2286 #include <boost/parallel/distributed_property_map->hpp>
2287 
2288 #undef __FUNCT__
2289 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2290 /*
2291   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2292 */
2293 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2294 {
2295   namespace petsc = boost::distributed::petsc;
2296 
2297   namespace graph_dist = boost::graph::distributed;
2298   using boost::graph::distributed::ilu_default::process_group_type;
2299   using boost::graph::ilu_permuted;
2300 
2301   PetscBool       row_identity, col_identity;
2302   PetscContainer  c;
2303   PetscInt        m, n, M, N;
2304   PetscErrorCode  ierr;
2305 
2306   PetscFunctionBegin;
2307   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2308   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2309   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2310   if (!row_identity || !col_identity) {
2311     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2312   }
2313 
2314   process_group_type pg;
2315   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2316   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2317   lgraph_type&   level_graph = *lgraph_p;
2318   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2319 
2320   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2321   ilu_permuted(level_graph);
2322 
2323   /* put together the new matrix */
2324   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2325   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2326   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2327   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2328   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2329   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2330   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2331 
2332   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2333   ierr = PetscContainerSetPointer(c, lgraph_p);
2334   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2335   ierr = PetscContainerDestroy(&c);
2336   PetscFunctionReturn(0);
2337 }
2338 
2339 #undef __FUNCT__
2340 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2341 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2342 {
2343   PetscFunctionBegin;
2344   PetscFunctionReturn(0);
2345 }
2346 
2347 #undef __FUNCT__
2348 #define __FUNCT__ "MatSolve_MPIAIJ"
2349 /*
2350   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2351 */
2352 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2353 {
2354   namespace graph_dist = boost::graph::distributed;
2355 
2356   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2357   lgraph_type*   lgraph_p;
2358   PetscContainer c;
2359   PetscErrorCode ierr;
2360 
2361   PetscFunctionBegin;
2362   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2363   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2364   ierr = VecCopy(b, x);CHKERRQ(ierr);
2365 
2366   PetscScalar* array_x;
2367   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2368   PetscInt sx;
2369   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2370 
2371   PetscScalar* array_b;
2372   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2373   PetscInt sb;
2374   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2375 
2376   lgraph_type&   level_graph = *lgraph_p;
2377   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2378 
2379   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2380   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2381                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2382 
2383   typedef boost::iterator_property_map<array_ref_type::iterator,
2384                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2385   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2386                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2387 
2388   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2389 
2390   PetscFunctionReturn(0);
2391 }
2392 #endif
2393 
2394 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2395   PetscInt       nzlocal,nsends,nrecvs;
2396   PetscMPIInt    *send_rank,*recv_rank;
2397   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2398   PetscScalar    *sbuf_a,**rbuf_a;
2399   PetscErrorCode (*Destroy)(Mat);
2400 } Mat_Redundant;
2401 
2402 #undef __FUNCT__
2403 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2404 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2405 {
2406   PetscErrorCode       ierr;
2407   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2408   PetscInt             i;
2409 
2410   PetscFunctionBegin;
2411   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2412   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2413   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2414   for (i=0; i<redund->nrecvs; i++){
2415     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2416     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2417   }
2418   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2419   ierr = PetscFree(redund);CHKERRQ(ierr);
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 #undef __FUNCT__
2424 #define __FUNCT__ "MatDestroy_MatRedundant"
2425 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2426 {
2427   PetscErrorCode  ierr;
2428   PetscContainer  container;
2429   Mat_Redundant   *redund=PETSC_NULL;
2430 
2431   PetscFunctionBegin;
2432   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2433   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2434   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2435   A->ops->destroy = redund->Destroy;
2436   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2437   if (A->ops->destroy) {
2438     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2439   }
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 #undef __FUNCT__
2444 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2445 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2446 {
2447   PetscMPIInt    rank,size;
2448   MPI_Comm       comm=((PetscObject)mat)->comm;
2449   PetscErrorCode ierr;
2450   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2451   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2452   PetscInt       *rowrange=mat->rmap->range;
2453   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2454   Mat            A=aij->A,B=aij->B,C=*matredundant;
2455   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2456   PetscScalar    *sbuf_a;
2457   PetscInt       nzlocal=a->nz+b->nz;
2458   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2459   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2460   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2461   MatScalar      *aworkA,*aworkB;
2462   PetscScalar    *vals;
2463   PetscMPIInt    tag1,tag2,tag3,imdex;
2464   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2465                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2466   MPI_Status     recv_status,*send_status;
2467   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2468   PetscInt       **rbuf_j=PETSC_NULL;
2469   PetscScalar    **rbuf_a=PETSC_NULL;
2470   Mat_Redundant  *redund=PETSC_NULL;
2471   PetscContainer container;
2472 
2473   PetscFunctionBegin;
2474   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2475   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2476 
2477   if (reuse == MAT_REUSE_MATRIX) {
2478     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2479     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2480     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2481     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2482     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2483     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2484     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2485     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2486 
2487     nsends    = redund->nsends;
2488     nrecvs    = redund->nrecvs;
2489     send_rank = redund->send_rank;
2490     recv_rank = redund->recv_rank;
2491     sbuf_nz   = redund->sbuf_nz;
2492     rbuf_nz   = redund->rbuf_nz;
2493     sbuf_j    = redund->sbuf_j;
2494     sbuf_a    = redund->sbuf_a;
2495     rbuf_j    = redund->rbuf_j;
2496     rbuf_a    = redund->rbuf_a;
2497   }
2498 
2499   if (reuse == MAT_INITIAL_MATRIX){
2500     PetscMPIInt  subrank,subsize;
2501     PetscInt     nleftover,np_subcomm;
2502     /* get the destination processors' id send_rank, nsends and nrecvs */
2503     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2504     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2505     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2506     np_subcomm = size/nsubcomm;
2507     nleftover  = size - nsubcomm*np_subcomm;
2508     nsends = 0; nrecvs = 0;
2509     for (i=0; i<size; i++){ /* i=rank*/
2510       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2511         send_rank[nsends] = i; nsends++;
2512         recv_rank[nrecvs++] = i;
2513       }
2514     }
2515     if (rank >= size - nleftover){/* this proc is a leftover processor */
2516       i = size-nleftover-1;
2517       j = 0;
2518       while (j < nsubcomm - nleftover){
2519         send_rank[nsends++] = i;
2520         i--; j++;
2521       }
2522     }
2523 
2524     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2525       for (i=0; i<nleftover; i++){
2526         recv_rank[nrecvs++] = size-nleftover+i;
2527       }
2528     }
2529 
2530     /* allocate sbuf_j, sbuf_a */
2531     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2532     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2533     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2534   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2535 
2536   /* copy mat's local entries into the buffers */
2537   if (reuse == MAT_INITIAL_MATRIX){
2538     rownz_max = 0;
2539     rptr = sbuf_j;
2540     cols = sbuf_j + rend-rstart + 1;
2541     vals = sbuf_a;
2542     rptr[0] = 0;
2543     for (i=0; i<rend-rstart; i++){
2544       row = i + rstart;
2545       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2546       ncols  = nzA + nzB;
2547       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2548       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2549       /* load the column indices for this row into cols */
2550       lwrite = 0;
2551       for (l=0; l<nzB; l++) {
2552         if ((ctmp = bmap[cworkB[l]]) < cstart){
2553           vals[lwrite]   = aworkB[l];
2554           cols[lwrite++] = ctmp;
2555         }
2556       }
2557       for (l=0; l<nzA; l++){
2558         vals[lwrite]   = aworkA[l];
2559         cols[lwrite++] = cstart + cworkA[l];
2560       }
2561       for (l=0; l<nzB; l++) {
2562         if ((ctmp = bmap[cworkB[l]]) >= cend){
2563           vals[lwrite]   = aworkB[l];
2564           cols[lwrite++] = ctmp;
2565         }
2566       }
2567       vals += ncols;
2568       cols += ncols;
2569       rptr[i+1] = rptr[i] + ncols;
2570       if (rownz_max < ncols) rownz_max = ncols;
2571     }
2572     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2573   } else { /* only copy matrix values into sbuf_a */
2574     rptr = sbuf_j;
2575     vals = sbuf_a;
2576     rptr[0] = 0;
2577     for (i=0; i<rend-rstart; i++){
2578       row = i + rstart;
2579       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2580       ncols  = nzA + nzB;
2581       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2582       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2583       lwrite = 0;
2584       for (l=0; l<nzB; l++) {
2585         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2586       }
2587       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2588       for (l=0; l<nzB; l++) {
2589         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2590       }
2591       vals += ncols;
2592       rptr[i+1] = rptr[i] + ncols;
2593     }
2594   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2595 
2596   /* send nzlocal to others, and recv other's nzlocal */
2597   /*--------------------------------------------------*/
2598   if (reuse == MAT_INITIAL_MATRIX){
2599     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2600     s_waits2 = s_waits3 + nsends;
2601     s_waits1 = s_waits2 + nsends;
2602     r_waits1 = s_waits1 + nsends;
2603     r_waits2 = r_waits1 + nrecvs;
2604     r_waits3 = r_waits2 + nrecvs;
2605   } else {
2606     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2607     r_waits3 = s_waits3 + nsends;
2608   }
2609 
2610   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2611   if (reuse == MAT_INITIAL_MATRIX){
2612     /* get new tags to keep the communication clean */
2613     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2614     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2615     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2616 
2617     /* post receives of other's nzlocal */
2618     for (i=0; i<nrecvs; i++){
2619       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2620     }
2621     /* send nzlocal to others */
2622     for (i=0; i<nsends; i++){
2623       sbuf_nz[i] = nzlocal;
2624       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2625     }
2626     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2627     count = nrecvs;
2628     while (count) {
2629       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2630       recv_rank[imdex] = recv_status.MPI_SOURCE;
2631       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2632       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2633 
2634       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2635       rbuf_nz[imdex] += i + 2;
2636       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2637       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2638       count--;
2639     }
2640     /* wait on sends of nzlocal */
2641     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2642     /* send mat->i,j to others, and recv from other's */
2643     /*------------------------------------------------*/
2644     for (i=0; i<nsends; i++){
2645       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2646       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2647     }
2648     /* wait on receives of mat->i,j */
2649     /*------------------------------*/
2650     count = nrecvs;
2651     while (count) {
2652       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2653       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2654       count--;
2655     }
2656     /* wait on sends of mat->i,j */
2657     /*---------------------------*/
2658     if (nsends) {
2659       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2660     }
2661   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2662 
2663   /* post receives, send and receive mat->a */
2664   /*----------------------------------------*/
2665   for (imdex=0; imdex<nrecvs; imdex++) {
2666     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2667   }
2668   for (i=0; i<nsends; i++){
2669     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2670   }
2671   count = nrecvs;
2672   while (count) {
2673     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2674     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2675     count--;
2676   }
2677   if (nsends) {
2678     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2679   }
2680 
2681   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2682 
2683   /* create redundant matrix */
2684   /*-------------------------*/
2685   if (reuse == MAT_INITIAL_MATRIX){
2686     /* compute rownz_max for preallocation */
2687     for (imdex=0; imdex<nrecvs; imdex++){
2688       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2689       rptr = rbuf_j[imdex];
2690       for (i=0; i<j; i++){
2691         ncols = rptr[i+1] - rptr[i];
2692         if (rownz_max < ncols) rownz_max = ncols;
2693       }
2694     }
2695 
2696     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2697     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2698     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2699     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2700     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2701   } else {
2702     C = *matredundant;
2703   }
2704 
2705   /* insert local matrix entries */
2706   rptr = sbuf_j;
2707   cols = sbuf_j + rend-rstart + 1;
2708   vals = sbuf_a;
2709   for (i=0; i<rend-rstart; i++){
2710     row   = i + rstart;
2711     ncols = rptr[i+1] - rptr[i];
2712     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2713     vals += ncols;
2714     cols += ncols;
2715   }
2716   /* insert received matrix entries */
2717   for (imdex=0; imdex<nrecvs; imdex++){
2718     rstart = rowrange[recv_rank[imdex]];
2719     rend   = rowrange[recv_rank[imdex]+1];
2720     rptr = rbuf_j[imdex];
2721     cols = rbuf_j[imdex] + rend-rstart + 1;
2722     vals = rbuf_a[imdex];
2723     for (i=0; i<rend-rstart; i++){
2724       row   = i + rstart;
2725       ncols = rptr[i+1] - rptr[i];
2726       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2727       vals += ncols;
2728       cols += ncols;
2729     }
2730   }
2731   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2732   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2733   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2734   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2735   if (reuse == MAT_INITIAL_MATRIX) {
2736     PetscContainer container;
2737     *matredundant = C;
2738     /* create a supporting struct and attach it to C for reuse */
2739     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2740     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2741     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2742     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2743     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2744     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2745 
2746     redund->nzlocal = nzlocal;
2747     redund->nsends  = nsends;
2748     redund->nrecvs  = nrecvs;
2749     redund->send_rank = send_rank;
2750     redund->recv_rank = recv_rank;
2751     redund->sbuf_nz = sbuf_nz;
2752     redund->rbuf_nz = rbuf_nz;
2753     redund->sbuf_j  = sbuf_j;
2754     redund->sbuf_a  = sbuf_a;
2755     redund->rbuf_j  = rbuf_j;
2756     redund->rbuf_a  = rbuf_a;
2757 
2758     redund->Destroy = C->ops->destroy;
2759     C->ops->destroy = MatDestroy_MatRedundant;
2760   }
2761   PetscFunctionReturn(0);
2762 }
2763 
2764 #undef __FUNCT__
2765 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2766 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2767 {
2768   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2769   PetscErrorCode ierr;
2770   PetscInt       i,*idxb = 0;
2771   PetscScalar    *va,*vb;
2772   Vec            vtmp;
2773 
2774   PetscFunctionBegin;
2775   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2776   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2777   if (idx) {
2778     for (i=0; i<A->rmap->n; i++) {
2779       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2780     }
2781   }
2782 
2783   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2784   if (idx) {
2785     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2786   }
2787   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2788   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2789 
2790   for (i=0; i<A->rmap->n; i++){
2791     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2792       va[i] = vb[i];
2793       if (idx) idx[i] = a->garray[idxb[i]];
2794     }
2795   }
2796 
2797   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2798   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2799   ierr = PetscFree(idxb);CHKERRQ(ierr);
2800   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2801   PetscFunctionReturn(0);
2802 }
2803 
2804 #undef __FUNCT__
2805 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2806 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2807 {
2808   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2809   PetscErrorCode ierr;
2810   PetscInt       i,*idxb = 0;
2811   PetscScalar    *va,*vb;
2812   Vec            vtmp;
2813 
2814   PetscFunctionBegin;
2815   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2816   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2817   if (idx) {
2818     for (i=0; i<A->cmap->n; i++) {
2819       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2820     }
2821   }
2822 
2823   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2824   if (idx) {
2825     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2826   }
2827   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2828   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2829 
2830   for (i=0; i<A->rmap->n; i++){
2831     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2832       va[i] = vb[i];
2833       if (idx) idx[i] = a->garray[idxb[i]];
2834     }
2835   }
2836 
2837   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2838   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2839   ierr = PetscFree(idxb);CHKERRQ(ierr);
2840   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2841   PetscFunctionReturn(0);
2842 }
2843 
2844 #undef __FUNCT__
2845 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2846 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2847 {
2848   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2849   PetscInt       n      = A->rmap->n;
2850   PetscInt       cstart = A->cmap->rstart;
2851   PetscInt      *cmap   = mat->garray;
2852   PetscInt      *diagIdx, *offdiagIdx;
2853   Vec            diagV, offdiagV;
2854   PetscScalar   *a, *diagA, *offdiagA;
2855   PetscInt       r;
2856   PetscErrorCode ierr;
2857 
2858   PetscFunctionBegin;
2859   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2860   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2861   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2862   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2863   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2864   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2865   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2866   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2867   for(r = 0; r < n; ++r) {
2868     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2869       a[r]   = diagA[r];
2870       idx[r] = cstart + diagIdx[r];
2871     } else {
2872       a[r]   = offdiagA[r];
2873       idx[r] = cmap[offdiagIdx[r]];
2874     }
2875   }
2876   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2877   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2878   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2879   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2880   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2881   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2882   PetscFunctionReturn(0);
2883 }
2884 
2885 #undef __FUNCT__
2886 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2887 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2888 {
2889   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2890   PetscInt       n      = A->rmap->n;
2891   PetscInt       cstart = A->cmap->rstart;
2892   PetscInt      *cmap   = mat->garray;
2893   PetscInt      *diagIdx, *offdiagIdx;
2894   Vec            diagV, offdiagV;
2895   PetscScalar   *a, *diagA, *offdiagA;
2896   PetscInt       r;
2897   PetscErrorCode ierr;
2898 
2899   PetscFunctionBegin;
2900   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2901   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2902   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2903   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2904   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2905   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2906   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2907   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2908   for(r = 0; r < n; ++r) {
2909     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2910       a[r]   = diagA[r];
2911       idx[r] = cstart + diagIdx[r];
2912     } else {
2913       a[r]   = offdiagA[r];
2914       idx[r] = cmap[offdiagIdx[r]];
2915     }
2916   }
2917   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2918   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2919   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2920   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2921   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2922   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2923   PetscFunctionReturn(0);
2924 }
2925 
2926 #undef __FUNCT__
2927 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2928 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2929 {
2930   PetscErrorCode ierr;
2931   Mat            *dummy;
2932 
2933   PetscFunctionBegin;
2934   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2935   *newmat = *dummy;
2936   ierr = PetscFree(dummy);CHKERRQ(ierr);
2937   PetscFunctionReturn(0);
2938 }
2939 
2940 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2941 
2942 #undef __FUNCT__
2943 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2944 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2945 {
2946   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2947   PetscErrorCode ierr;
2948 
2949   PetscFunctionBegin;
2950   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2951   PetscFunctionReturn(0);
2952 }
2953 
2954 
2955 /* -------------------------------------------------------------------*/
2956 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2957        MatGetRow_MPIAIJ,
2958        MatRestoreRow_MPIAIJ,
2959        MatMult_MPIAIJ,
2960 /* 4*/ MatMultAdd_MPIAIJ,
2961        MatMultTranspose_MPIAIJ,
2962        MatMultTransposeAdd_MPIAIJ,
2963 #ifdef PETSC_HAVE_PBGL
2964        MatSolve_MPIAIJ,
2965 #else
2966        0,
2967 #endif
2968        0,
2969        0,
2970 /*10*/ 0,
2971        0,
2972        0,
2973        MatSOR_MPIAIJ,
2974        MatTranspose_MPIAIJ,
2975 /*15*/ MatGetInfo_MPIAIJ,
2976        MatEqual_MPIAIJ,
2977        MatGetDiagonal_MPIAIJ,
2978        MatDiagonalScale_MPIAIJ,
2979        MatNorm_MPIAIJ,
2980 /*20*/ MatAssemblyBegin_MPIAIJ,
2981        MatAssemblyEnd_MPIAIJ,
2982        MatSetOption_MPIAIJ,
2983        MatZeroEntries_MPIAIJ,
2984 /*24*/ MatZeroRows_MPIAIJ,
2985        0,
2986 #ifdef PETSC_HAVE_PBGL
2987        0,
2988 #else
2989        0,
2990 #endif
2991        0,
2992        0,
2993 /*29*/ MatSetUp_MPIAIJ,
2994 #ifdef PETSC_HAVE_PBGL
2995        0,
2996 #else
2997        0,
2998 #endif
2999        0,
3000        0,
3001        0,
3002 /*34*/ MatDuplicate_MPIAIJ,
3003        0,
3004        0,
3005        0,
3006        0,
3007 /*39*/ MatAXPY_MPIAIJ,
3008        MatGetSubMatrices_MPIAIJ,
3009        MatIncreaseOverlap_MPIAIJ,
3010        MatGetValues_MPIAIJ,
3011        MatCopy_MPIAIJ,
3012 /*44*/ MatGetRowMax_MPIAIJ,
3013        MatScale_MPIAIJ,
3014        0,
3015        0,
3016        MatZeroRowsColumns_MPIAIJ,
3017 /*49*/ 0,
3018        0,
3019        0,
3020        0,
3021        0,
3022 /*54*/ MatFDColoringCreate_MPIAIJ,
3023        0,
3024        MatSetUnfactored_MPIAIJ,
3025        MatPermute_MPIAIJ,
3026        0,
3027 /*59*/ MatGetSubMatrix_MPIAIJ,
3028        MatDestroy_MPIAIJ,
3029        MatView_MPIAIJ,
3030        0,
3031        0,
3032 /*64*/ 0,
3033        0,
3034        0,
3035        0,
3036        0,
3037 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3038        MatGetRowMinAbs_MPIAIJ,
3039        0,
3040        MatSetColoring_MPIAIJ,
3041 #if defined(PETSC_HAVE_ADIC)
3042        MatSetValuesAdic_MPIAIJ,
3043 #else
3044        0,
3045 #endif
3046        MatSetValuesAdifor_MPIAIJ,
3047 /*75*/ MatFDColoringApply_AIJ,
3048        0,
3049        0,
3050        0,
3051        0,
3052 /*80*/ 0,
3053        0,
3054        0,
3055 /*83*/ MatLoad_MPIAIJ,
3056        0,
3057        0,
3058        0,
3059        0,
3060        0,
3061 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3062        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3063        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3064        MatPtAP_Basic,
3065        MatPtAPSymbolic_MPIAIJ,
3066 /*94*/ MatPtAPNumeric_MPIAIJ,
3067        0,
3068        0,
3069        0,
3070        0,
3071 /*99*/ 0,
3072        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3073        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3074        MatConjugate_MPIAIJ,
3075        0,
3076 /*104*/MatSetValuesRow_MPIAIJ,
3077        MatRealPart_MPIAIJ,
3078        MatImaginaryPart_MPIAIJ,
3079        0,
3080        0,
3081 /*109*/0,
3082        MatGetRedundantMatrix_MPIAIJ,
3083        MatGetRowMin_MPIAIJ,
3084        0,
3085        0,
3086 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3087        0,
3088        0,
3089        0,
3090        0,
3091 /*119*/0,
3092        0,
3093        0,
3094        0,
3095        MatGetMultiProcBlock_MPIAIJ,
3096 /*124*/MatFindNonZeroRows_MPIAIJ,
3097        MatGetColumnNorms_MPIAIJ,
3098        MatInvertBlockDiagonal_MPIAIJ,
3099        0,
3100        MatGetSubMatricesParallel_MPIAIJ,
3101 /*129*/0,
3102        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3103        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3104        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3105        0,
3106 /*134*/0,
3107        0,
3108        0,
3109        0,
3110        0
3111 };
3112 
3113 /* ----------------------------------------------------------------------------------------*/
3114 
3115 EXTERN_C_BEGIN
3116 #undef __FUNCT__
3117 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3118 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3119 {
3120   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3121   PetscErrorCode ierr;
3122 
3123   PetscFunctionBegin;
3124   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3125   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3126   PetscFunctionReturn(0);
3127 }
3128 EXTERN_C_END
3129 
3130 EXTERN_C_BEGIN
3131 #undef __FUNCT__
3132 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3133 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3134 {
3135   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3136   PetscErrorCode ierr;
3137 
3138   PetscFunctionBegin;
3139   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3140   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3141   PetscFunctionReturn(0);
3142 }
3143 EXTERN_C_END
3144 
3145 EXTERN_C_BEGIN
3146 #undef __FUNCT__
3147 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3148 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3149 {
3150   Mat_MPIAIJ     *b;
3151   PetscErrorCode ierr;
3152   PetscInt       i;
3153   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3154 
3155   PetscFunctionBegin;
3156   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3157   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3158   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3159   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3160   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3161   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3162 
3163   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3164   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3165   if (d_nnz) {
3166     for (i=0; i<B->rmap->n; i++) {
3167       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3168     }
3169   }
3170   if (o_nnz) {
3171     for (i=0; i<B->rmap->n; i++) {
3172       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3173     }
3174   }
3175   b = (Mat_MPIAIJ*)B->data;
3176 
3177   if (!B->preallocated) {
3178     /* Explicitly create 2 MATSEQAIJ matrices. */
3179     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3180     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3181     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3182     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3183     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3184     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3185     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3186     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3187   }
3188 
3189   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3190   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3191   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3192   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3193   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3194   B->preallocated = PETSC_TRUE;
3195   PetscFunctionReturn(0);
3196 }
3197 EXTERN_C_END
3198 
3199 #undef __FUNCT__
3200 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3201 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3202 {
3203   Mat            mat;
3204   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3205   PetscErrorCode ierr;
3206 
3207   PetscFunctionBegin;
3208   *newmat       = 0;
3209   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3210   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3211   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3212   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3213   a    = (Mat_MPIAIJ*)mat->data;
3214 
3215   mat->factortype    = matin->factortype;
3216   mat->rmap->bs      = matin->rmap->bs;
3217   mat->assembled    = PETSC_TRUE;
3218   mat->insertmode   = NOT_SET_VALUES;
3219   mat->preallocated = PETSC_TRUE;
3220 
3221   a->size           = oldmat->size;
3222   a->rank           = oldmat->rank;
3223   a->donotstash     = oldmat->donotstash;
3224   a->roworiented    = oldmat->roworiented;
3225   a->rowindices     = 0;
3226   a->rowvalues      = 0;
3227   a->getrowactive   = PETSC_FALSE;
3228 
3229   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3230   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3231 
3232   if (oldmat->colmap) {
3233 #if defined (PETSC_USE_CTABLE)
3234     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3235 #else
3236     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3237     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3238     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3239 #endif
3240   } else a->colmap = 0;
3241   if (oldmat->garray) {
3242     PetscInt len;
3243     len  = oldmat->B->cmap->n;
3244     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3245     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3246     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3247   } else a->garray = 0;
3248 
3249   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3250   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3251   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3252   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3253   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3254   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3255   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3256   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3257   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3258   *newmat = mat;
3259   PetscFunctionReturn(0);
3260 }
3261 
3262 
3263 
3264 #undef __FUNCT__
3265 #define __FUNCT__ "MatLoad_MPIAIJ"
3266 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3267 {
3268   PetscScalar    *vals,*svals;
3269   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3270   PetscErrorCode ierr;
3271   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3272   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3273   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3274   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3275   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3276   int            fd;
3277 
3278   PetscFunctionBegin;
3279   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3280   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3281   if (!rank) {
3282     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3283     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3284     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3285   }
3286 
3287   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3288 
3289   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3290   M = header[1]; N = header[2];
3291   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3292   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3293   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3294 
3295   /* If global sizes are set, check if they are consistent with that given in the file */
3296   if (sizesset) {
3297     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3298   }
3299   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3300   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3301 
3302   /* determine ownership of all rows */
3303   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3304   else m = newMat->rmap->n; /* Set by user */
3305 
3306   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3307   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3308 
3309   /* First process needs enough room for process with most rows */
3310   if (!rank) {
3311     mmax       = rowners[1];
3312     for (i=2; i<size; i++) {
3313       mmax = PetscMax(mmax,rowners[i]);
3314     }
3315   } else mmax = m;
3316 
3317   rowners[0] = 0;
3318   for (i=2; i<=size; i++) {
3319     rowners[i] += rowners[i-1];
3320   }
3321   rstart = rowners[rank];
3322   rend   = rowners[rank+1];
3323 
3324   /* distribute row lengths to all processors */
3325   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3326   if (!rank) {
3327     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3328     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3329     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3330     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3331     for (j=0; j<m; j++) {
3332       procsnz[0] += ourlens[j];
3333     }
3334     for (i=1; i<size; i++) {
3335       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3336       /* calculate the number of nonzeros on each processor */
3337       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3338         procsnz[i] += rowlengths[j];
3339       }
3340       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3341     }
3342     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3343   } else {
3344     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3345   }
3346 
3347   if (!rank) {
3348     /* determine max buffer needed and allocate it */
3349     maxnz = 0;
3350     for (i=0; i<size; i++) {
3351       maxnz = PetscMax(maxnz,procsnz[i]);
3352     }
3353     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3354 
3355     /* read in my part of the matrix column indices  */
3356     nz   = procsnz[0];
3357     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3358     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3359 
3360     /* read in every one elses and ship off */
3361     for (i=1; i<size; i++) {
3362       nz     = procsnz[i];
3363       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3364       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3365     }
3366     ierr = PetscFree(cols);CHKERRQ(ierr);
3367   } else {
3368     /* determine buffer space needed for message */
3369     nz = 0;
3370     for (i=0; i<m; i++) {
3371       nz += ourlens[i];
3372     }
3373     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3374 
3375     /* receive message of column indices*/
3376     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3377   }
3378 
3379   /* determine column ownership if matrix is not square */
3380   if (N != M) {
3381     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3382     else n = newMat->cmap->n;
3383     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3384     cstart = cend - n;
3385   } else {
3386     cstart = rstart;
3387     cend   = rend;
3388     n      = cend - cstart;
3389   }
3390 
3391   /* loop over local rows, determining number of off diagonal entries */
3392   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3393   jj = 0;
3394   for (i=0; i<m; i++) {
3395     for (j=0; j<ourlens[i]; j++) {
3396       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3397       jj++;
3398     }
3399   }
3400 
3401   for (i=0; i<m; i++) {
3402     ourlens[i] -= offlens[i];
3403   }
3404   if (!sizesset) {
3405     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3406   }
3407   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3408 
3409   for (i=0; i<m; i++) {
3410     ourlens[i] += offlens[i];
3411   }
3412 
3413   if (!rank) {
3414     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3415 
3416     /* read in my part of the matrix numerical values  */
3417     nz   = procsnz[0];
3418     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3419 
3420     /* insert into matrix */
3421     jj      = rstart;
3422     smycols = mycols;
3423     svals   = vals;
3424     for (i=0; i<m; i++) {
3425       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3426       smycols += ourlens[i];
3427       svals   += ourlens[i];
3428       jj++;
3429     }
3430 
3431     /* read in other processors and ship out */
3432     for (i=1; i<size; i++) {
3433       nz     = procsnz[i];
3434       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3435       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3436     }
3437     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3438   } else {
3439     /* receive numeric values */
3440     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3441 
3442     /* receive message of values*/
3443     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3444 
3445     /* insert into matrix */
3446     jj      = rstart;
3447     smycols = mycols;
3448     svals   = vals;
3449     for (i=0; i<m; i++) {
3450       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3451       smycols += ourlens[i];
3452       svals   += ourlens[i];
3453       jj++;
3454     }
3455   }
3456   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3457   ierr = PetscFree(vals);CHKERRQ(ierr);
3458   ierr = PetscFree(mycols);CHKERRQ(ierr);
3459   ierr = PetscFree(rowners);CHKERRQ(ierr);
3460 
3461   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3462   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3463   PetscFunctionReturn(0);
3464 }
3465 
3466 #undef __FUNCT__
3467 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3468 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3469 {
3470   PetscErrorCode ierr;
3471   IS             iscol_local;
3472   PetscInt       csize;
3473 
3474   PetscFunctionBegin;
3475   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3476   if (call == MAT_REUSE_MATRIX) {
3477     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3478     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3479   } else {
3480     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3481   }
3482   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3483   if (call == MAT_INITIAL_MATRIX) {
3484     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3485     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3486   }
3487   PetscFunctionReturn(0);
3488 }
3489 
3490 #undef __FUNCT__
3491 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3492 /*
3493     Not great since it makes two copies of the submatrix, first an SeqAIJ
3494   in local and then by concatenating the local matrices the end result.
3495   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3496 
3497   Note: This requires a sequential iscol with all indices.
3498 */
3499 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3500 {
3501   PetscErrorCode ierr;
3502   PetscMPIInt    rank,size;
3503   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3504   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3505   Mat            *local,M,Mreuse;
3506   MatScalar      *vwork,*aa;
3507   MPI_Comm       comm = ((PetscObject)mat)->comm;
3508   Mat_SeqAIJ     *aij;
3509 
3510 
3511   PetscFunctionBegin;
3512   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3513   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3514 
3515   if (call ==  MAT_REUSE_MATRIX) {
3516     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3517     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3518     local = &Mreuse;
3519     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3520   } else {
3521     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3522     Mreuse = *local;
3523     ierr   = PetscFree(local);CHKERRQ(ierr);
3524   }
3525 
3526   /*
3527       m - number of local rows
3528       n - number of columns (same on all processors)
3529       rstart - first row in new global matrix generated
3530   */
3531   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3532   if (call == MAT_INITIAL_MATRIX) {
3533     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3534     ii  = aij->i;
3535     jj  = aij->j;
3536 
3537     /*
3538         Determine the number of non-zeros in the diagonal and off-diagonal
3539         portions of the matrix in order to do correct preallocation
3540     */
3541 
3542     /* first get start and end of "diagonal" columns */
3543     if (csize == PETSC_DECIDE) {
3544       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3545       if (mglobal == n) { /* square matrix */
3546 	nlocal = m;
3547       } else {
3548         nlocal = n/size + ((n % size) > rank);
3549       }
3550     } else {
3551       nlocal = csize;
3552     }
3553     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3554     rstart = rend - nlocal;
3555     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3556 
3557     /* next, compute all the lengths */
3558     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3559     olens = dlens + m;
3560     for (i=0; i<m; i++) {
3561       jend = ii[i+1] - ii[i];
3562       olen = 0;
3563       dlen = 0;
3564       for (j=0; j<jend; j++) {
3565         if (*jj < rstart || *jj >= rend) olen++;
3566         else dlen++;
3567         jj++;
3568       }
3569       olens[i] = olen;
3570       dlens[i] = dlen;
3571     }
3572     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3573     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3574     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3575     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3576     ierr = PetscFree(dlens);CHKERRQ(ierr);
3577   } else {
3578     PetscInt ml,nl;
3579 
3580     M = *newmat;
3581     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3582     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3583     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3584     /*
3585          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3586        rather than the slower MatSetValues().
3587     */
3588     M->was_assembled = PETSC_TRUE;
3589     M->assembled     = PETSC_FALSE;
3590   }
3591   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3592   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3593   ii  = aij->i;
3594   jj  = aij->j;
3595   aa  = aij->a;
3596   for (i=0; i<m; i++) {
3597     row   = rstart + i;
3598     nz    = ii[i+1] - ii[i];
3599     cwork = jj;     jj += nz;
3600     vwork = aa;     aa += nz;
3601     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3602   }
3603 
3604   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3605   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3606   *newmat = M;
3607 
3608   /* save submatrix used in processor for next request */
3609   if (call ==  MAT_INITIAL_MATRIX) {
3610     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3611     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3612   }
3613 
3614   PetscFunctionReturn(0);
3615 }
3616 
3617 EXTERN_C_BEGIN
3618 #undef __FUNCT__
3619 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3620 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3621 {
3622   PetscInt       m,cstart, cend,j,nnz,i,d;
3623   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3624   const PetscInt *JJ;
3625   PetscScalar    *values;
3626   PetscErrorCode ierr;
3627 
3628   PetscFunctionBegin;
3629   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3630 
3631   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3632   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3633   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3634   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3635   m      = B->rmap->n;
3636   cstart = B->cmap->rstart;
3637   cend   = B->cmap->rend;
3638   rstart = B->rmap->rstart;
3639 
3640   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3641 
3642 #if defined(PETSC_USE_DEBUGGING)
3643   for (i=0; i<m; i++) {
3644     nnz     = Ii[i+1]- Ii[i];
3645     JJ      = J + Ii[i];
3646     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3647     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3648     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3649   }
3650 #endif
3651 
3652   for (i=0; i<m; i++) {
3653     nnz     = Ii[i+1]- Ii[i];
3654     JJ      = J + Ii[i];
3655     nnz_max = PetscMax(nnz_max,nnz);
3656     d       = 0;
3657     for (j=0; j<nnz; j++) {
3658       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3659     }
3660     d_nnz[i] = d;
3661     o_nnz[i] = nnz - d;
3662   }
3663   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3664   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3665 
3666   if (v) values = (PetscScalar*)v;
3667   else {
3668     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3669     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3670   }
3671 
3672   for (i=0; i<m; i++) {
3673     ii   = i + rstart;
3674     nnz  = Ii[i+1]- Ii[i];
3675     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3676   }
3677   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3678   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3679 
3680   if (!v) {
3681     ierr = PetscFree(values);CHKERRQ(ierr);
3682   }
3683   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3684   PetscFunctionReturn(0);
3685 }
3686 EXTERN_C_END
3687 
3688 #undef __FUNCT__
3689 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3690 /*@
3691    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3692    (the default parallel PETSc format).
3693 
3694    Collective on MPI_Comm
3695 
3696    Input Parameters:
3697 +  B - the matrix
3698 .  i - the indices into j for the start of each local row (starts with zero)
3699 .  j - the column indices for each local row (starts with zero)
3700 -  v - optional values in the matrix
3701 
3702    Level: developer
3703 
3704    Notes:
3705        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3706      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3707      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3708 
3709        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3710 
3711        The format which is used for the sparse matrix input, is equivalent to a
3712     row-major ordering.. i.e for the following matrix, the input data expected is
3713     as shown:
3714 
3715         1 0 0
3716         2 0 3     P0
3717        -------
3718         4 5 6     P1
3719 
3720      Process0 [P0]: rows_owned=[0,1]
3721         i =  {0,1,3}  [size = nrow+1  = 2+1]
3722         j =  {0,0,2}  [size = nz = 6]
3723         v =  {1,2,3}  [size = nz = 6]
3724 
3725      Process1 [P1]: rows_owned=[2]
3726         i =  {0,3}    [size = nrow+1  = 1+1]
3727         j =  {0,1,2}  [size = nz = 6]
3728         v =  {4,5,6}  [size = nz = 6]
3729 
3730 .keywords: matrix, aij, compressed row, sparse, parallel
3731 
3732 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3733           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3734 @*/
3735 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3736 {
3737   PetscErrorCode ierr;
3738 
3739   PetscFunctionBegin;
3740   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3741   PetscFunctionReturn(0);
3742 }
3743 
3744 #undef __FUNCT__
3745 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3746 /*@C
3747    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3748    (the default parallel PETSc format).  For good matrix assembly performance
3749    the user should preallocate the matrix storage by setting the parameters
3750    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3751    performance can be increased by more than a factor of 50.
3752 
3753    Collective on MPI_Comm
3754 
3755    Input Parameters:
3756 +  A - the matrix
3757 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3758            (same value is used for all local rows)
3759 .  d_nnz - array containing the number of nonzeros in the various rows of the
3760            DIAGONAL portion of the local submatrix (possibly different for each row)
3761            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3762            The size of this array is equal to the number of local rows, i.e 'm'.
3763            For matrices that will be factored, you must leave room for (and set)
3764            the diagonal entry even if it is zero.
3765 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3766            submatrix (same value is used for all local rows).
3767 -  o_nnz - array containing the number of nonzeros in the various rows of the
3768            OFF-DIAGONAL portion of the local submatrix (possibly different for
3769            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3770            structure. The size of this array is equal to the number
3771            of local rows, i.e 'm'.
3772 
3773    If the *_nnz parameter is given then the *_nz parameter is ignored
3774 
3775    The AIJ format (also called the Yale sparse matrix format or
3776    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3777    storage.  The stored row and column indices begin with zero.
3778    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3779 
3780    The parallel matrix is partitioned such that the first m0 rows belong to
3781    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3782    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3783 
3784    The DIAGONAL portion of the local submatrix of a processor can be defined
3785    as the submatrix which is obtained by extraction the part corresponding to
3786    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3787    first row that belongs to the processor, r2 is the last row belonging to
3788    the this processor, and c1-c2 is range of indices of the local part of a
3789    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3790    common case of a square matrix, the row and column ranges are the same and
3791    the DIAGONAL part is also square. The remaining portion of the local
3792    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3793 
3794    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3795 
3796    You can call MatGetInfo() to get information on how effective the preallocation was;
3797    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3798    You can also run with the option -info and look for messages with the string
3799    malloc in them to see if additional memory allocation was needed.
3800 
3801    Example usage:
3802 
3803    Consider the following 8x8 matrix with 34 non-zero values, that is
3804    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3805    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3806    as follows:
3807 
3808 .vb
3809             1  2  0  |  0  3  0  |  0  4
3810     Proc0   0  5  6  |  7  0  0  |  8  0
3811             9  0 10  | 11  0  0  | 12  0
3812     -------------------------------------
3813            13  0 14  | 15 16 17  |  0  0
3814     Proc1   0 18  0  | 19 20 21  |  0  0
3815             0  0  0  | 22 23  0  | 24  0
3816     -------------------------------------
3817     Proc2  25 26 27  |  0  0 28  | 29  0
3818            30  0  0  | 31 32 33  |  0 34
3819 .ve
3820 
3821    This can be represented as a collection of submatrices as:
3822 
3823 .vb
3824       A B C
3825       D E F
3826       G H I
3827 .ve
3828 
3829    Where the submatrices A,B,C are owned by proc0, D,E,F are
3830    owned by proc1, G,H,I are owned by proc2.
3831 
3832    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3833    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3834    The 'M','N' parameters are 8,8, and have the same values on all procs.
3835 
3836    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3837    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3838    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3839    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3840    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3841    matrix, ans [DF] as another SeqAIJ matrix.
3842 
3843    When d_nz, o_nz parameters are specified, d_nz storage elements are
3844    allocated for every row of the local diagonal submatrix, and o_nz
3845    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3846    One way to choose d_nz and o_nz is to use the max nonzerors per local
3847    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3848    In this case, the values of d_nz,o_nz are:
3849 .vb
3850      proc0 : dnz = 2, o_nz = 2
3851      proc1 : dnz = 3, o_nz = 2
3852      proc2 : dnz = 1, o_nz = 4
3853 .ve
3854    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3855    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3856    for proc3. i.e we are using 12+15+10=37 storage locations to store
3857    34 values.
3858 
3859    When d_nnz, o_nnz parameters are specified, the storage is specified
3860    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3861    In the above case the values for d_nnz,o_nnz are:
3862 .vb
3863      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3864      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3865      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3866 .ve
3867    Here the space allocated is sum of all the above values i.e 34, and
3868    hence pre-allocation is perfect.
3869 
3870    Level: intermediate
3871 
3872 .keywords: matrix, aij, compressed row, sparse, parallel
3873 
3874 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3875           MPIAIJ, MatGetInfo()
3876 @*/
3877 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3878 {
3879   PetscErrorCode ierr;
3880 
3881   PetscFunctionBegin;
3882   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3883   PetscValidType(B,1);
3884   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3885   PetscFunctionReturn(0);
3886 }
3887 
3888 #undef __FUNCT__
3889 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3890 /*@
3891      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3892          CSR format the local rows.
3893 
3894    Collective on MPI_Comm
3895 
3896    Input Parameters:
3897 +  comm - MPI communicator
3898 .  m - number of local rows (Cannot be PETSC_DECIDE)
3899 .  n - This value should be the same as the local size used in creating the
3900        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3901        calculated if N is given) For square matrices n is almost always m.
3902 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3903 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3904 .   i - row indices
3905 .   j - column indices
3906 -   a - matrix values
3907 
3908    Output Parameter:
3909 .   mat - the matrix
3910 
3911    Level: intermediate
3912 
3913    Notes:
3914        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3915      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3916      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3917 
3918        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3919 
3920        The format which is used for the sparse matrix input, is equivalent to a
3921     row-major ordering.. i.e for the following matrix, the input data expected is
3922     as shown:
3923 
3924         1 0 0
3925         2 0 3     P0
3926        -------
3927         4 5 6     P1
3928 
3929      Process0 [P0]: rows_owned=[0,1]
3930         i =  {0,1,3}  [size = nrow+1  = 2+1]
3931         j =  {0,0,2}  [size = nz = 6]
3932         v =  {1,2,3}  [size = nz = 6]
3933 
3934      Process1 [P1]: rows_owned=[2]
3935         i =  {0,3}    [size = nrow+1  = 1+1]
3936         j =  {0,1,2}  [size = nz = 6]
3937         v =  {4,5,6}  [size = nz = 6]
3938 
3939 .keywords: matrix, aij, compressed row, sparse, parallel
3940 
3941 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3942           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3943 @*/
3944 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3945 {
3946   PetscErrorCode ierr;
3947 
3948   PetscFunctionBegin;
3949   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3950   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3951   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3952   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3953   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3954   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3955   PetscFunctionReturn(0);
3956 }
3957 
3958 #undef __FUNCT__
3959 #define __FUNCT__ "MatCreateAIJ"
3960 /*@C
3961    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3962    (the default parallel PETSc format).  For good matrix assembly performance
3963    the user should preallocate the matrix storage by setting the parameters
3964    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3965    performance can be increased by more than a factor of 50.
3966 
3967    Collective on MPI_Comm
3968 
3969    Input Parameters:
3970 +  comm - MPI communicator
3971 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3972            This value should be the same as the local size used in creating the
3973            y vector for the matrix-vector product y = Ax.
3974 .  n - This value should be the same as the local size used in creating the
3975        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3976        calculated if N is given) For square matrices n is almost always m.
3977 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3978 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3979 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3980            (same value is used for all local rows)
3981 .  d_nnz - array containing the number of nonzeros in the various rows of the
3982            DIAGONAL portion of the local submatrix (possibly different for each row)
3983            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3984            The size of this array is equal to the number of local rows, i.e 'm'.
3985 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3986            submatrix (same value is used for all local rows).
3987 -  o_nnz - array containing the number of nonzeros in the various rows of the
3988            OFF-DIAGONAL portion of the local submatrix (possibly different for
3989            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3990            structure. The size of this array is equal to the number
3991            of local rows, i.e 'm'.
3992 
3993    Output Parameter:
3994 .  A - the matrix
3995 
3996    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3997    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3998    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3999 
4000    Notes:
4001    If the *_nnz parameter is given then the *_nz parameter is ignored
4002 
4003    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4004    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4005    storage requirements for this matrix.
4006 
4007    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4008    processor than it must be used on all processors that share the object for
4009    that argument.
4010 
4011    The user MUST specify either the local or global matrix dimensions
4012    (possibly both).
4013 
4014    The parallel matrix is partitioned across processors such that the
4015    first m0 rows belong to process 0, the next m1 rows belong to
4016    process 1, the next m2 rows belong to process 2 etc.. where
4017    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4018    values corresponding to [m x N] submatrix.
4019 
4020    The columns are logically partitioned with the n0 columns belonging
4021    to 0th partition, the next n1 columns belonging to the next
4022    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4023 
4024    The DIAGONAL portion of the local submatrix on any given processor
4025    is the submatrix corresponding to the rows and columns m,n
4026    corresponding to the given processor. i.e diagonal matrix on
4027    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4028    etc. The remaining portion of the local submatrix [m x (N-n)]
4029    constitute the OFF-DIAGONAL portion. The example below better
4030    illustrates this concept.
4031 
4032    For a square global matrix we define each processor's diagonal portion
4033    to be its local rows and the corresponding columns (a square submatrix);
4034    each processor's off-diagonal portion encompasses the remainder of the
4035    local matrix (a rectangular submatrix).
4036 
4037    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4038 
4039    When calling this routine with a single process communicator, a matrix of
4040    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4041    type of communicator, use the construction mechanism:
4042      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4043 
4044    By default, this format uses inodes (identical nodes) when possible.
4045    We search for consecutive rows with the same nonzero structure, thereby
4046    reusing matrix information to achieve increased efficiency.
4047 
4048    Options Database Keys:
4049 +  -mat_no_inode  - Do not use inodes
4050 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4051 -  -mat_aij_oneindex - Internally use indexing starting at 1
4052         rather than 0.  Note that when calling MatSetValues(),
4053         the user still MUST index entries starting at 0!
4054 
4055 
4056    Example usage:
4057 
4058    Consider the following 8x8 matrix with 34 non-zero values, that is
4059    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4060    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4061    as follows:
4062 
4063 .vb
4064             1  2  0  |  0  3  0  |  0  4
4065     Proc0   0  5  6  |  7  0  0  |  8  0
4066             9  0 10  | 11  0  0  | 12  0
4067     -------------------------------------
4068            13  0 14  | 15 16 17  |  0  0
4069     Proc1   0 18  0  | 19 20 21  |  0  0
4070             0  0  0  | 22 23  0  | 24  0
4071     -------------------------------------
4072     Proc2  25 26 27  |  0  0 28  | 29  0
4073            30  0  0  | 31 32 33  |  0 34
4074 .ve
4075 
4076    This can be represented as a collection of submatrices as:
4077 
4078 .vb
4079       A B C
4080       D E F
4081       G H I
4082 .ve
4083 
4084    Where the submatrices A,B,C are owned by proc0, D,E,F are
4085    owned by proc1, G,H,I are owned by proc2.
4086 
4087    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4088    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4089    The 'M','N' parameters are 8,8, and have the same values on all procs.
4090 
4091    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4092    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4093    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4094    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4095    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4096    matrix, ans [DF] as another SeqAIJ matrix.
4097 
4098    When d_nz, o_nz parameters are specified, d_nz storage elements are
4099    allocated for every row of the local diagonal submatrix, and o_nz
4100    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4101    One way to choose d_nz and o_nz is to use the max nonzerors per local
4102    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4103    In this case, the values of d_nz,o_nz are:
4104 .vb
4105      proc0 : dnz = 2, o_nz = 2
4106      proc1 : dnz = 3, o_nz = 2
4107      proc2 : dnz = 1, o_nz = 4
4108 .ve
4109    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4110    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4111    for proc3. i.e we are using 12+15+10=37 storage locations to store
4112    34 values.
4113 
4114    When d_nnz, o_nnz parameters are specified, the storage is specified
4115    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4116    In the above case the values for d_nnz,o_nnz are:
4117 .vb
4118      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4119      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4120      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4121 .ve
4122    Here the space allocated is sum of all the above values i.e 34, and
4123    hence pre-allocation is perfect.
4124 
4125    Level: intermediate
4126 
4127 .keywords: matrix, aij, compressed row, sparse, parallel
4128 
4129 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4130           MPIAIJ, MatCreateMPIAIJWithArrays()
4131 @*/
4132 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4133 {
4134   PetscErrorCode ierr;
4135   PetscMPIInt    size;
4136 
4137   PetscFunctionBegin;
4138   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4139   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4140   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4141   if (size > 1) {
4142     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4143     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4144   } else {
4145     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4146     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4147   }
4148   PetscFunctionReturn(0);
4149 }
4150 
4151 #undef __FUNCT__
4152 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4153 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4154 {
4155   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4156 
4157   PetscFunctionBegin;
4158   *Ad     = a->A;
4159   *Ao     = a->B;
4160   *colmap = a->garray;
4161   PetscFunctionReturn(0);
4162 }
4163 
4164 #undef __FUNCT__
4165 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4166 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4167 {
4168   PetscErrorCode ierr;
4169   PetscInt       i;
4170   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4171 
4172   PetscFunctionBegin;
4173   if (coloring->ctype == IS_COLORING_GLOBAL) {
4174     ISColoringValue *allcolors,*colors;
4175     ISColoring      ocoloring;
4176 
4177     /* set coloring for diagonal portion */
4178     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4179 
4180     /* set coloring for off-diagonal portion */
4181     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4182     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4183     for (i=0; i<a->B->cmap->n; i++) {
4184       colors[i] = allcolors[a->garray[i]];
4185     }
4186     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4187     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4188     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4189     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4190   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4191     ISColoringValue *colors;
4192     PetscInt        *larray;
4193     ISColoring      ocoloring;
4194 
4195     /* set coloring for diagonal portion */
4196     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4197     for (i=0; i<a->A->cmap->n; i++) {
4198       larray[i] = i + A->cmap->rstart;
4199     }
4200     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4201     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4202     for (i=0; i<a->A->cmap->n; i++) {
4203       colors[i] = coloring->colors[larray[i]];
4204     }
4205     ierr = PetscFree(larray);CHKERRQ(ierr);
4206     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4207     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4208     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4209 
4210     /* set coloring for off-diagonal portion */
4211     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4212     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4213     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4214     for (i=0; i<a->B->cmap->n; i++) {
4215       colors[i] = coloring->colors[larray[i]];
4216     }
4217     ierr = PetscFree(larray);CHKERRQ(ierr);
4218     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4219     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4220     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4221   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4222 
4223   PetscFunctionReturn(0);
4224 }
4225 
4226 #if defined(PETSC_HAVE_ADIC)
4227 #undef __FUNCT__
4228 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4229 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4230 {
4231   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4232   PetscErrorCode ierr;
4233 
4234   PetscFunctionBegin;
4235   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4236   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4237   PetscFunctionReturn(0);
4238 }
4239 #endif
4240 
4241 #undef __FUNCT__
4242 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4243 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4244 {
4245   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4246   PetscErrorCode ierr;
4247 
4248   PetscFunctionBegin;
4249   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4250   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4251   PetscFunctionReturn(0);
4252 }
4253 
4254 #undef __FUNCT__
4255 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4256 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4257 {
4258   PetscErrorCode ierr;
4259   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum;
4260   PetscInt       *indx;
4261 
4262   PetscFunctionBegin;
4263   /* This routine will ONLY return MPIAIJ type matrix */
4264   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4265   if (n == PETSC_DECIDE){
4266     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4267   }
4268   /* Check sum(n) = N */
4269   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPIU_SUM,comm);CHKERRQ(ierr);
4270   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4271 
4272   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4273   rstart -= m;
4274 
4275   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4276   for (i=0;i<m;i++) {
4277     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4278     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4279     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4280   }
4281 
4282   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4283   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4284   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4285   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4286   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4287   PetscFunctionReturn(0);
4288 }
4289 
4290 #undef __FUNCT__
4291 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4292 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4293 {
4294   PetscErrorCode ierr;
4295   PetscInt       m,N,i,rstart,nnz,Ii;
4296   PetscInt       *indx;
4297   PetscScalar    *values;
4298 
4299   PetscFunctionBegin;
4300   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4301   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4302   for (i=0;i<m;i++) {
4303     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4304     Ii    = i + rstart;
4305     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4306     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4307   }
4308   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4309   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4310   PetscFunctionReturn(0);
4311 }
4312 
4313 #undef __FUNCT__
4314 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4315 /*@
4316       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4317                  matrices from each processor
4318 
4319     Collective on MPI_Comm
4320 
4321    Input Parameters:
4322 +    comm - the communicators the parallel matrix will live on
4323 .    inmat - the input sequential matrices
4324 .    n - number of local columns (or PETSC_DECIDE)
4325 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4326 
4327    Output Parameter:
4328 .    outmat - the parallel matrix generated
4329 
4330     Level: advanced
4331 
4332    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4333 
4334 @*/
4335 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4336 {
4337   PetscErrorCode ierr;
4338 
4339   PetscFunctionBegin;
4340   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4341   if (scall == MAT_INITIAL_MATRIX){
4342     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4343   }
4344   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4345   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4346   PetscFunctionReturn(0);
4347 }
4348 
4349 #undef __FUNCT__
4350 #define __FUNCT__ "MatFileSplit"
4351 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4352 {
4353   PetscErrorCode    ierr;
4354   PetscMPIInt       rank;
4355   PetscInt          m,N,i,rstart,nnz;
4356   size_t            len;
4357   const PetscInt    *indx;
4358   PetscViewer       out;
4359   char              *name;
4360   Mat               B;
4361   const PetscScalar *values;
4362 
4363   PetscFunctionBegin;
4364   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4365   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4366   /* Should this be the type of the diagonal block of A? */
4367   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4368   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4369   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4370   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4371   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4372   for (i=0;i<m;i++) {
4373     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4374     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4375     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4376   }
4377   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4378   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4379 
4380   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4381   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4382   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4383   sprintf(name,"%s.%d",outfile,rank);
4384   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4385   ierr = PetscFree(name);
4386   ierr = MatView(B,out);CHKERRQ(ierr);
4387   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4388   ierr = MatDestroy(&B);CHKERRQ(ierr);
4389   PetscFunctionReturn(0);
4390 }
4391 
4392 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4393 #undef __FUNCT__
4394 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4395 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4396 {
4397   PetscErrorCode       ierr;
4398   Mat_Merge_SeqsToMPI  *merge;
4399   PetscContainer       container;
4400 
4401   PetscFunctionBegin;
4402   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4403   if (container) {
4404     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4405     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4406     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4407     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4408     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4409     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4410     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4411     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4412     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4413     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4414     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4415     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4416     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4417     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4418     ierr = PetscFree(merge);CHKERRQ(ierr);
4419     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4420   }
4421   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4422   PetscFunctionReturn(0);
4423 }
4424 
4425 #include <../src/mat/utils/freespace.h>
4426 #include <petscbt.h>
4427 
4428 #undef __FUNCT__
4429 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4430 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4431 {
4432   PetscErrorCode       ierr;
4433   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4434   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4435   PetscMPIInt          size,rank,taga,*len_s;
4436   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4437   PetscInt             proc,m;
4438   PetscInt             **buf_ri,**buf_rj;
4439   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4440   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4441   MPI_Request          *s_waits,*r_waits;
4442   MPI_Status           *status;
4443   MatScalar            *aa=a->a;
4444   MatScalar            **abuf_r,*ba_i;
4445   Mat_Merge_SeqsToMPI  *merge;
4446   PetscContainer       container;
4447 
4448   PetscFunctionBegin;
4449   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4450 
4451   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4452   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4453 
4454   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4455   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4456 
4457   bi     = merge->bi;
4458   bj     = merge->bj;
4459   buf_ri = merge->buf_ri;
4460   buf_rj = merge->buf_rj;
4461 
4462   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4463   owners = merge->rowmap->range;
4464   len_s  = merge->len_s;
4465 
4466   /* send and recv matrix values */
4467   /*-----------------------------*/
4468   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4469   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4470 
4471   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4472   for (proc=0,k=0; proc<size; proc++){
4473     if (!len_s[proc]) continue;
4474     i = owners[proc];
4475     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4476     k++;
4477   }
4478 
4479   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4480   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4481   ierr = PetscFree(status);CHKERRQ(ierr);
4482 
4483   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4484   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4485 
4486   /* insert mat values of mpimat */
4487   /*----------------------------*/
4488   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4489   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4490 
4491   for (k=0; k<merge->nrecv; k++){
4492     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4493     nrows = *(buf_ri_k[k]);
4494     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4495     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4496   }
4497 
4498   /* set values of ba */
4499   m = merge->rowmap->n;
4500   for (i=0; i<m; i++) {
4501     arow = owners[rank] + i;
4502     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4503     bnzi = bi[i+1] - bi[i];
4504     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4505 
4506     /* add local non-zero vals of this proc's seqmat into ba */
4507     anzi = ai[arow+1] - ai[arow];
4508     aj   = a->j + ai[arow];
4509     aa   = a->a + ai[arow];
4510     nextaj = 0;
4511     for (j=0; nextaj<anzi; j++){
4512       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4513         ba_i[j] += aa[nextaj++];
4514       }
4515     }
4516 
4517     /* add received vals into ba */
4518     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4519       /* i-th row */
4520       if (i == *nextrow[k]) {
4521         anzi = *(nextai[k]+1) - *nextai[k];
4522         aj   = buf_rj[k] + *(nextai[k]);
4523         aa   = abuf_r[k] + *(nextai[k]);
4524         nextaj = 0;
4525         for (j=0; nextaj<anzi; j++){
4526           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4527             ba_i[j] += aa[nextaj++];
4528           }
4529         }
4530         nextrow[k]++; nextai[k]++;
4531       }
4532     }
4533     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4534   }
4535   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4536   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4537 
4538   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4539   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4540   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4541   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4542   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4543   PetscFunctionReturn(0);
4544 }
4545 
4546 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4547 
4548 #undef __FUNCT__
4549 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4550 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4551 {
4552   PetscErrorCode       ierr;
4553   Mat                  B_mpi;
4554   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4555   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4556   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4557   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4558   PetscInt             len,proc,*dnz,*onz;
4559   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4560   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4561   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4562   MPI_Status           *status;
4563   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4564   PetscBT              lnkbt;
4565   Mat_Merge_SeqsToMPI  *merge;
4566   PetscContainer       container;
4567 
4568   PetscFunctionBegin;
4569   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4570 
4571   /* make sure it is a PETSc comm */
4572   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4573   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4574   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4575 
4576   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4577   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4578 
4579   /* determine row ownership */
4580   /*---------------------------------------------------------*/
4581   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4582   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4583   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4584   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4585   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4586   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4587   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4588 
4589   m      = merge->rowmap->n;
4590   M      = merge->rowmap->N;
4591   owners = merge->rowmap->range;
4592 
4593   /* determine the number of messages to send, their lengths */
4594   /*---------------------------------------------------------*/
4595   len_s  = merge->len_s;
4596 
4597   len = 0;  /* length of buf_si[] */
4598   merge->nsend = 0;
4599   for (proc=0; proc<size; proc++){
4600     len_si[proc] = 0;
4601     if (proc == rank){
4602       len_s[proc] = 0;
4603     } else {
4604       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4605       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4606     }
4607     if (len_s[proc]) {
4608       merge->nsend++;
4609       nrows = 0;
4610       for (i=owners[proc]; i<owners[proc+1]; i++){
4611         if (ai[i+1] > ai[i]) nrows++;
4612       }
4613       len_si[proc] = 2*(nrows+1);
4614       len += len_si[proc];
4615     }
4616   }
4617 
4618   /* determine the number and length of messages to receive for ij-structure */
4619   /*-------------------------------------------------------------------------*/
4620   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4621   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4622 
4623   /* post the Irecv of j-structure */
4624   /*-------------------------------*/
4625   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4626   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4627 
4628   /* post the Isend of j-structure */
4629   /*--------------------------------*/
4630   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4631 
4632   for (proc=0, k=0; proc<size; proc++){
4633     if (!len_s[proc]) continue;
4634     i = owners[proc];
4635     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4636     k++;
4637   }
4638 
4639   /* receives and sends of j-structure are complete */
4640   /*------------------------------------------------*/
4641   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4642   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4643 
4644   /* send and recv i-structure */
4645   /*---------------------------*/
4646   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4647   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4648 
4649   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4650   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4651   for (proc=0,k=0; proc<size; proc++){
4652     if (!len_s[proc]) continue;
4653     /* form outgoing message for i-structure:
4654          buf_si[0]:                 nrows to be sent
4655                [1:nrows]:           row index (global)
4656                [nrows+1:2*nrows+1]: i-structure index
4657     */
4658     /*-------------------------------------------*/
4659     nrows = len_si[proc]/2 - 1;
4660     buf_si_i    = buf_si + nrows+1;
4661     buf_si[0]   = nrows;
4662     buf_si_i[0] = 0;
4663     nrows = 0;
4664     for (i=owners[proc]; i<owners[proc+1]; i++){
4665       anzi = ai[i+1] - ai[i];
4666       if (anzi) {
4667         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4668         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4669         nrows++;
4670       }
4671     }
4672     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4673     k++;
4674     buf_si += len_si[proc];
4675   }
4676 
4677   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4678   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4679 
4680   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4681   for (i=0; i<merge->nrecv; i++){
4682     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4683   }
4684 
4685   ierr = PetscFree(len_si);CHKERRQ(ierr);
4686   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4687   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4688   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4689   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4690   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4691   ierr = PetscFree(status);CHKERRQ(ierr);
4692 
4693   /* compute a local seq matrix in each processor */
4694   /*----------------------------------------------*/
4695   /* allocate bi array and free space for accumulating nonzero column info */
4696   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4697   bi[0] = 0;
4698 
4699   /* create and initialize a linked list */
4700   nlnk = N+1;
4701   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4702 
4703   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4704   len = 0;
4705   len  = ai[owners[rank+1]] - ai[owners[rank]];
4706   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4707   current_space = free_space;
4708 
4709   /* determine symbolic info for each local row */
4710   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4711 
4712   for (k=0; k<merge->nrecv; k++){
4713     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4714     nrows = *buf_ri_k[k];
4715     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4716     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4717   }
4718 
4719   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4720   len = 0;
4721   for (i=0;i<m;i++) {
4722     bnzi   = 0;
4723     /* add local non-zero cols of this proc's seqmat into lnk */
4724     arow   = owners[rank] + i;
4725     anzi   = ai[arow+1] - ai[arow];
4726     aj     = a->j + ai[arow];
4727     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4728     bnzi += nlnk;
4729     /* add received col data into lnk */
4730     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4731       if (i == *nextrow[k]) { /* i-th row */
4732         anzi = *(nextai[k]+1) - *nextai[k];
4733         aj   = buf_rj[k] + *nextai[k];
4734         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4735         bnzi += nlnk;
4736         nextrow[k]++; nextai[k]++;
4737       }
4738     }
4739     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4740 
4741     /* if free space is not available, make more free space */
4742     if (current_space->local_remaining<bnzi) {
4743       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4744       nspacedouble++;
4745     }
4746     /* copy data into free space, then initialize lnk */
4747     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4748     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4749 
4750     current_space->array           += bnzi;
4751     current_space->local_used      += bnzi;
4752     current_space->local_remaining -= bnzi;
4753 
4754     bi[i+1] = bi[i] + bnzi;
4755   }
4756 
4757   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4758 
4759   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4760   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4761   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4762 
4763   /* create symbolic parallel matrix B_mpi */
4764   /*---------------------------------------*/
4765   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4766   if (n==PETSC_DECIDE) {
4767     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4768   } else {
4769     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4770   }
4771   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4772   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4773   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4774   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4775 
4776   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4777   B_mpi->assembled     = PETSC_FALSE;
4778   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4779   merge->bi            = bi;
4780   merge->bj            = bj;
4781   merge->buf_ri        = buf_ri;
4782   merge->buf_rj        = buf_rj;
4783   merge->coi           = PETSC_NULL;
4784   merge->coj           = PETSC_NULL;
4785   merge->owners_co     = PETSC_NULL;
4786 
4787   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4788 
4789   /* attach the supporting struct to B_mpi for reuse */
4790   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4791   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4792   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4793   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4794   *mpimat = B_mpi;
4795 
4796   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4797   PetscFunctionReturn(0);
4798 }
4799 
4800 #undef __FUNCT__
4801 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4802 /*@C
4803       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4804                  matrices from each processor
4805 
4806     Collective on MPI_Comm
4807 
4808    Input Parameters:
4809 +    comm - the communicators the parallel matrix will live on
4810 .    seqmat - the input sequential matrices
4811 .    m - number of local rows (or PETSC_DECIDE)
4812 .    n - number of local columns (or PETSC_DECIDE)
4813 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4814 
4815    Output Parameter:
4816 .    mpimat - the parallel matrix generated
4817 
4818     Level: advanced
4819 
4820    Notes:
4821      The dimensions of the sequential matrix in each processor MUST be the same.
4822      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4823      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4824 @*/
4825 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4826 {
4827   PetscErrorCode   ierr;
4828   PetscMPIInt     size;
4829 
4830   PetscFunctionBegin;
4831   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4832   if (size == 1){
4833      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4834      if (scall == MAT_INITIAL_MATRIX){
4835        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4836      } else {
4837        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4838      }
4839      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4840      PetscFunctionReturn(0);
4841   }
4842   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4843   if (scall == MAT_INITIAL_MATRIX){
4844     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4845   }
4846   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4847   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4848   PetscFunctionReturn(0);
4849 }
4850 
4851 #undef __FUNCT__
4852 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4853 /*@
4854      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4855           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4856           with MatGetSize()
4857 
4858     Not Collective
4859 
4860    Input Parameters:
4861 +    A - the matrix
4862 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4863 
4864    Output Parameter:
4865 .    A_loc - the local sequential matrix generated
4866 
4867     Level: developer
4868 
4869 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4870 
4871 @*/
4872 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4873 {
4874   PetscErrorCode  ierr;
4875   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4876   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4877   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4878   MatScalar       *aa=a->a,*ba=b->a,*cam;
4879   PetscScalar     *ca;
4880   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4881   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4882   PetscBool       match;
4883 
4884   PetscFunctionBegin;
4885   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4886   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4887   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4888   if (scall == MAT_INITIAL_MATRIX){
4889     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4890     ci[0] = 0;
4891     for (i=0; i<am; i++){
4892       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4893     }
4894     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4895     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4896     k = 0;
4897     for (i=0; i<am; i++) {
4898       ncols_o = bi[i+1] - bi[i];
4899       ncols_d = ai[i+1] - ai[i];
4900       /* off-diagonal portion of A */
4901       for (jo=0; jo<ncols_o; jo++) {
4902         col = cmap[*bj];
4903         if (col >= cstart) break;
4904         cj[k]   = col; bj++;
4905         ca[k++] = *ba++;
4906       }
4907       /* diagonal portion of A */
4908       for (j=0; j<ncols_d; j++) {
4909         cj[k]   = cstart + *aj++;
4910         ca[k++] = *aa++;
4911       }
4912       /* off-diagonal portion of A */
4913       for (j=jo; j<ncols_o; j++) {
4914         cj[k]   = cmap[*bj++];
4915         ca[k++] = *ba++;
4916       }
4917     }
4918     /* put together the new matrix */
4919     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4920     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4921     /* Since these are PETSc arrays, change flags to free them as necessary. */
4922     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4923     mat->free_a  = PETSC_TRUE;
4924     mat->free_ij = PETSC_TRUE;
4925     mat->nonew   = 0;
4926   } else if (scall == MAT_REUSE_MATRIX){
4927     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4928     ci = mat->i; cj = mat->j; cam = mat->a;
4929     for (i=0; i<am; i++) {
4930       /* off-diagonal portion of A */
4931       ncols_o = bi[i+1] - bi[i];
4932       for (jo=0; jo<ncols_o; jo++) {
4933         col = cmap[*bj];
4934         if (col >= cstart) break;
4935         *cam++ = *ba++; bj++;
4936       }
4937       /* diagonal portion of A */
4938       ncols_d = ai[i+1] - ai[i];
4939       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4940       /* off-diagonal portion of A */
4941       for (j=jo; j<ncols_o; j++) {
4942         *cam++ = *ba++; bj++;
4943       }
4944     }
4945   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4946   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4947   PetscFunctionReturn(0);
4948 }
4949 
4950 #undef __FUNCT__
4951 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4952 /*@C
4953      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4954 
4955     Not Collective
4956 
4957    Input Parameters:
4958 +    A - the matrix
4959 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4960 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4961 
4962    Output Parameter:
4963 .    A_loc - the local sequential matrix generated
4964 
4965     Level: developer
4966 
4967 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4968 
4969 @*/
4970 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4971 {
4972   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4973   PetscErrorCode    ierr;
4974   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4975   IS                isrowa,iscola;
4976   Mat               *aloc;
4977   PetscBool       match;
4978 
4979   PetscFunctionBegin;
4980   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4981   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4982   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4983   if (!row){
4984     start = A->rmap->rstart; end = A->rmap->rend;
4985     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4986   } else {
4987     isrowa = *row;
4988   }
4989   if (!col){
4990     start = A->cmap->rstart;
4991     cmap  = a->garray;
4992     nzA   = a->A->cmap->n;
4993     nzB   = a->B->cmap->n;
4994     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4995     ncols = 0;
4996     for (i=0; i<nzB; i++) {
4997       if (cmap[i] < start) idx[ncols++] = cmap[i];
4998       else break;
4999     }
5000     imark = i;
5001     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5002     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5003     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5004   } else {
5005     iscola = *col;
5006   }
5007   if (scall != MAT_INITIAL_MATRIX){
5008     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5009     aloc[0] = *A_loc;
5010   }
5011   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5012   *A_loc = aloc[0];
5013   ierr = PetscFree(aloc);CHKERRQ(ierr);
5014   if (!row){
5015     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5016   }
5017   if (!col){
5018     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5019   }
5020   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5021   PetscFunctionReturn(0);
5022 }
5023 
5024 #undef __FUNCT__
5025 #define __FUNCT__ "MatGetBrowsOfAcols"
5026 /*@C
5027     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5028 
5029     Collective on Mat
5030 
5031    Input Parameters:
5032 +    A,B - the matrices in mpiaij format
5033 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5034 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5035 
5036    Output Parameter:
5037 +    rowb, colb - index sets of rows and columns of B to extract
5038 -    B_seq - the sequential matrix generated
5039 
5040     Level: developer
5041 
5042 @*/
5043 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5044 {
5045   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5046   PetscErrorCode    ierr;
5047   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5048   IS                isrowb,iscolb;
5049   Mat               *bseq=PETSC_NULL;
5050 
5051   PetscFunctionBegin;
5052   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5053     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5054   }
5055   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5056 
5057   if (scall == MAT_INITIAL_MATRIX){
5058     start = A->cmap->rstart;
5059     cmap  = a->garray;
5060     nzA   = a->A->cmap->n;
5061     nzB   = a->B->cmap->n;
5062     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5063     ncols = 0;
5064     for (i=0; i<nzB; i++) {  /* row < local row index */
5065       if (cmap[i] < start) idx[ncols++] = cmap[i];
5066       else break;
5067     }
5068     imark = i;
5069     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5070     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5071     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5072     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5073   } else {
5074     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5075     isrowb = *rowb; iscolb = *colb;
5076     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5077     bseq[0] = *B_seq;
5078   }
5079   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5080   *B_seq = bseq[0];
5081   ierr = PetscFree(bseq);CHKERRQ(ierr);
5082   if (!rowb){
5083     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5084   } else {
5085     *rowb = isrowb;
5086   }
5087   if (!colb){
5088     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5089   } else {
5090     *colb = iscolb;
5091   }
5092   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5093   PetscFunctionReturn(0);
5094 }
5095 
5096 #undef __FUNCT__
5097 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5098 /*
5099     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5100     of the OFF-DIAGONAL portion of local A
5101 
5102     Collective on Mat
5103 
5104    Input Parameters:
5105 +    A,B - the matrices in mpiaij format
5106 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5107 
5108    Output Parameter:
5109 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5110 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5111 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5112 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5113 
5114     Level: developer
5115 
5116 */
5117 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5118 {
5119   VecScatter_MPI_General *gen_to,*gen_from;
5120   PetscErrorCode         ierr;
5121   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5122   Mat_SeqAIJ             *b_oth;
5123   VecScatter             ctx=a->Mvctx;
5124   MPI_Comm               comm=((PetscObject)ctx)->comm;
5125   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5126   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5127   PetscScalar            *rvalues,*svalues;
5128   MatScalar              *b_otha,*bufa,*bufA;
5129   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5130   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5131   MPI_Status             *sstatus,rstatus;
5132   PetscMPIInt            jj;
5133   PetscInt               *cols,sbs,rbs;
5134   PetscScalar            *vals;
5135 
5136   PetscFunctionBegin;
5137   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5138     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5139   }
5140   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5141   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5142 
5143   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5144   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5145   rvalues  = gen_from->values; /* holds the length of receiving row */
5146   svalues  = gen_to->values;   /* holds the length of sending row */
5147   nrecvs   = gen_from->n;
5148   nsends   = gen_to->n;
5149 
5150   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5151   srow     = gen_to->indices;   /* local row index to be sent */
5152   sstarts  = gen_to->starts;
5153   sprocs   = gen_to->procs;
5154   sstatus  = gen_to->sstatus;
5155   sbs      = gen_to->bs;
5156   rstarts  = gen_from->starts;
5157   rprocs   = gen_from->procs;
5158   rbs      = gen_from->bs;
5159 
5160   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5161   if (scall == MAT_INITIAL_MATRIX){
5162     /* i-array */
5163     /*---------*/
5164     /*  post receives */
5165     for (i=0; i<nrecvs; i++){
5166       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5167       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5168       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5169     }
5170 
5171     /* pack the outgoing message */
5172     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5173     sstartsj[0] = 0;  rstartsj[0] = 0;
5174     len = 0; /* total length of j or a array to be sent */
5175     k = 0;
5176     for (i=0; i<nsends; i++){
5177       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5178       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5179       for (j=0; j<nrows; j++) {
5180         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5181         for (l=0; l<sbs; l++){
5182           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5183           rowlen[j*sbs+l] = ncols;
5184           len += ncols;
5185           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5186         }
5187         k++;
5188       }
5189       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5190       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5191     }
5192     /* recvs and sends of i-array are completed */
5193     i = nrecvs;
5194     while (i--) {
5195       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5196     }
5197     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5198 
5199     /* allocate buffers for sending j and a arrays */
5200     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5201     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5202 
5203     /* create i-array of B_oth */
5204     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5205     b_othi[0] = 0;
5206     len = 0; /* total length of j or a array to be received */
5207     k = 0;
5208     for (i=0; i<nrecvs; i++){
5209       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5210       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5211       for (j=0; j<nrows; j++) {
5212         b_othi[k+1] = b_othi[k] + rowlen[j];
5213         len += rowlen[j]; k++;
5214       }
5215       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5216     }
5217 
5218     /* allocate space for j and a arrrays of B_oth */
5219     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5220     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5221 
5222     /* j-array */
5223     /*---------*/
5224     /*  post receives of j-array */
5225     for (i=0; i<nrecvs; i++){
5226       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5227       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5228     }
5229 
5230     /* pack the outgoing message j-array */
5231     k = 0;
5232     for (i=0; i<nsends; i++){
5233       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5234       bufJ = bufj+sstartsj[i];
5235       for (j=0; j<nrows; j++) {
5236         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5237         for (ll=0; ll<sbs; ll++){
5238           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5239           for (l=0; l<ncols; l++){
5240             *bufJ++ = cols[l];
5241           }
5242           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5243         }
5244       }
5245       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5246     }
5247 
5248     /* recvs and sends of j-array are completed */
5249     i = nrecvs;
5250     while (i--) {
5251       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5252     }
5253     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5254   } else if (scall == MAT_REUSE_MATRIX){
5255     sstartsj = *startsj_s;
5256     rstartsj = *startsj_r;
5257     bufa     = *bufa_ptr;
5258     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5259     b_otha   = b_oth->a;
5260   } else {
5261     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5262   }
5263 
5264   /* a-array */
5265   /*---------*/
5266   /*  post receives of a-array */
5267   for (i=0; i<nrecvs; i++){
5268     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5269     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5270   }
5271 
5272   /* pack the outgoing message a-array */
5273   k = 0;
5274   for (i=0; i<nsends; i++){
5275     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5276     bufA = bufa+sstartsj[i];
5277     for (j=0; j<nrows; j++) {
5278       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5279       for (ll=0; ll<sbs; ll++){
5280         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5281         for (l=0; l<ncols; l++){
5282           *bufA++ = vals[l];
5283         }
5284         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5285       }
5286     }
5287     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5288   }
5289   /* recvs and sends of a-array are completed */
5290   i = nrecvs;
5291   while (i--) {
5292     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5293   }
5294   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5295   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5296 
5297   if (scall == MAT_INITIAL_MATRIX){
5298     /* put together the new matrix */
5299     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5300 
5301     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5302     /* Since these are PETSc arrays, change flags to free them as necessary. */
5303     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5304     b_oth->free_a  = PETSC_TRUE;
5305     b_oth->free_ij = PETSC_TRUE;
5306     b_oth->nonew   = 0;
5307 
5308     ierr = PetscFree(bufj);CHKERRQ(ierr);
5309     if (!startsj_s || !bufa_ptr){
5310       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5311       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5312     } else {
5313       *startsj_s = sstartsj;
5314       *startsj_r = rstartsj;
5315       *bufa_ptr  = bufa;
5316     }
5317   }
5318   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5319   PetscFunctionReturn(0);
5320 }
5321 
5322 #undef __FUNCT__
5323 #define __FUNCT__ "MatGetCommunicationStructs"
5324 /*@C
5325   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5326 
5327   Not Collective
5328 
5329   Input Parameters:
5330 . A - The matrix in mpiaij format
5331 
5332   Output Parameter:
5333 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5334 . colmap - A map from global column index to local index into lvec
5335 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5336 
5337   Level: developer
5338 
5339 @*/
5340 #if defined (PETSC_USE_CTABLE)
5341 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5342 #else
5343 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5344 #endif
5345 {
5346   Mat_MPIAIJ *a;
5347 
5348   PetscFunctionBegin;
5349   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5350   PetscValidPointer(lvec, 2);
5351   PetscValidPointer(colmap, 3);
5352   PetscValidPointer(multScatter, 4);
5353   a = (Mat_MPIAIJ *) A->data;
5354   if (lvec) *lvec = a->lvec;
5355   if (colmap) *colmap = a->colmap;
5356   if (multScatter) *multScatter = a->Mvctx;
5357   PetscFunctionReturn(0);
5358 }
5359 
5360 EXTERN_C_BEGIN
5361 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5362 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5363 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5364 EXTERN_C_END
5365 
5366 #undef __FUNCT__
5367 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5368 /*
5369     Computes (B'*A')' since computing B*A directly is untenable
5370 
5371                n                       p                          p
5372         (              )       (              )         (                  )
5373       m (      A       )  *  n (       B      )   =   m (         C        )
5374         (              )       (              )         (                  )
5375 
5376 */
5377 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5378 {
5379   PetscErrorCode     ierr;
5380   Mat                At,Bt,Ct;
5381 
5382   PetscFunctionBegin;
5383   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5384   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5385   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5386   ierr = MatDestroy(&At);CHKERRQ(ierr);
5387   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5388   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5389   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5390   PetscFunctionReturn(0);
5391 }
5392 
5393 #undef __FUNCT__
5394 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5395 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5396 {
5397   PetscErrorCode ierr;
5398   PetscInt       m=A->rmap->n,n=B->cmap->n;
5399   Mat            Cmat;
5400 
5401   PetscFunctionBegin;
5402   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5403   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5404   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5405   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5406   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5407   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5408   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5409   *C   = Cmat;
5410   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5411   PetscFunctionReturn(0);
5412 }
5413 
5414 /* ----------------------------------------------------------------*/
5415 #undef __FUNCT__
5416 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5417 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5418 {
5419   PetscErrorCode ierr;
5420 
5421   PetscFunctionBegin;
5422   if (scall == MAT_INITIAL_MATRIX){
5423     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5424   }
5425   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5426   PetscFunctionReturn(0);
5427 }
5428 
5429 EXTERN_C_BEGIN
5430 #if defined(PETSC_HAVE_MUMPS)
5431 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5432 #endif
5433 #if defined(PETSC_HAVE_PASTIX)
5434 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5435 #endif
5436 #if defined(PETSC_HAVE_SUPERLU_DIST)
5437 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5438 #endif
5439 #if defined(PETSC_HAVE_SPOOLES)
5440 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5441 #endif
5442 EXTERN_C_END
5443 
5444 /*MC
5445    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5446 
5447    Options Database Keys:
5448 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5449 
5450   Level: beginner
5451 
5452 .seealso: MatCreateAIJ()
5453 M*/
5454 
5455 EXTERN_C_BEGIN
5456 #undef __FUNCT__
5457 #define __FUNCT__ "MatCreate_MPIAIJ"
5458 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5459 {
5460   Mat_MPIAIJ     *b;
5461   PetscErrorCode ierr;
5462   PetscMPIInt    size;
5463 
5464   PetscFunctionBegin;
5465   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5466 
5467   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5468   B->data         = (void*)b;
5469   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5470   B->assembled    = PETSC_FALSE;
5471 
5472   B->insertmode   = NOT_SET_VALUES;
5473   b->size         = size;
5474   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5475 
5476   /* build cache for off array entries formed */
5477   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5478   b->donotstash  = PETSC_FALSE;
5479   b->colmap      = 0;
5480   b->garray      = 0;
5481   b->roworiented = PETSC_TRUE;
5482 
5483   /* stuff used for matrix vector multiply */
5484   b->lvec      = PETSC_NULL;
5485   b->Mvctx     = PETSC_NULL;
5486 
5487   /* stuff for MatGetRow() */
5488   b->rowindices   = 0;
5489   b->rowvalues    = 0;
5490   b->getrowactive = PETSC_FALSE;
5491 
5492 #if defined(PETSC_HAVE_SPOOLES)
5493   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5494                                      "MatGetFactor_mpiaij_spooles",
5495                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5496 #endif
5497 #if defined(PETSC_HAVE_MUMPS)
5498   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5499                                      "MatGetFactor_aij_mumps",
5500                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5501 #endif
5502 #if defined(PETSC_HAVE_PASTIX)
5503   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5504 					   "MatGetFactor_mpiaij_pastix",
5505 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5506 #endif
5507 #if defined(PETSC_HAVE_SUPERLU_DIST)
5508   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5509                                      "MatGetFactor_mpiaij_superlu_dist",
5510                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5511 #endif
5512   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5513                                      "MatStoreValues_MPIAIJ",
5514                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5515   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5516                                      "MatRetrieveValues_MPIAIJ",
5517                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5518   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5519 				     "MatGetDiagonalBlock_MPIAIJ",
5520                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5521   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5522 				     "MatIsTranspose_MPIAIJ",
5523 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5524   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5525 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5526 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5527   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5528 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5529 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5530   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5531 				     "MatDiagonalScaleLocal_MPIAIJ",
5532 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5533   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5534                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5535                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5536   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5537                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5538                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5539   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5540                                      "MatConvert_MPIAIJ_MPISBAIJ",
5541                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5542   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5543                                      "MatMatMult_MPIDense_MPIAIJ",
5544                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5545   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5546                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5547                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5548   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5549                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5550                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5551   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5552   PetscFunctionReturn(0);
5553 }
5554 EXTERN_C_END
5555 
5556 #undef __FUNCT__
5557 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5558 /*@
5559      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5560          and "off-diagonal" part of the matrix in CSR format.
5561 
5562    Collective on MPI_Comm
5563 
5564    Input Parameters:
5565 +  comm - MPI communicator
5566 .  m - number of local rows (Cannot be PETSC_DECIDE)
5567 .  n - This value should be the same as the local size used in creating the
5568        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5569        calculated if N is given) For square matrices n is almost always m.
5570 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5571 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5572 .   i - row indices for "diagonal" portion of matrix
5573 .   j - column indices
5574 .   a - matrix values
5575 .   oi - row indices for "off-diagonal" portion of matrix
5576 .   oj - column indices
5577 -   oa - matrix values
5578 
5579    Output Parameter:
5580 .   mat - the matrix
5581 
5582    Level: advanced
5583 
5584    Notes:
5585        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5586        must free the arrays once the matrix has been destroyed and not before.
5587 
5588        The i and j indices are 0 based
5589 
5590        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5591 
5592        This sets local rows and cannot be used to set off-processor values.
5593 
5594        You cannot later use MatSetValues() to change values in this matrix.
5595 
5596 .keywords: matrix, aij, compressed row, sparse, parallel
5597 
5598 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5599           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5600 @*/
5601 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5602 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5603 {
5604   PetscErrorCode ierr;
5605   Mat_MPIAIJ     *maij;
5606 
5607  PetscFunctionBegin;
5608   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5609   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5610   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5611   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5612   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5613   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5614   maij = (Mat_MPIAIJ*) (*mat)->data;
5615   maij->donotstash     = PETSC_TRUE;
5616   (*mat)->preallocated = PETSC_TRUE;
5617 
5618   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5619   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5620   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5621   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5622 
5623   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5624   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5625 
5626   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5627   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5628   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5629   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5630 
5631   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5632   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5633   PetscFunctionReturn(0);
5634 }
5635 
5636 /*
5637     Special version for direct calls from Fortran
5638 */
5639 #include <petsc-private/fortranimpl.h>
5640 
5641 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5642 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5643 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5644 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5645 #endif
5646 
5647 /* Change these macros so can be used in void function */
5648 #undef CHKERRQ
5649 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5650 #undef SETERRQ2
5651 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5652 #undef SETERRQ3
5653 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5654 #undef SETERRQ
5655 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5656 
5657 EXTERN_C_BEGIN
5658 #undef __FUNCT__
5659 #define __FUNCT__ "matsetvaluesmpiaij_"
5660 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5661 {
5662   Mat             mat = *mmat;
5663   PetscInt        m = *mm, n = *mn;
5664   InsertMode      addv = *maddv;
5665   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5666   PetscScalar     value;
5667   PetscErrorCode  ierr;
5668 
5669   MatCheckPreallocated(mat,1);
5670   if (mat->insertmode == NOT_SET_VALUES) {
5671     mat->insertmode = addv;
5672   }
5673 #if defined(PETSC_USE_DEBUG)
5674   else if (mat->insertmode != addv) {
5675     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5676   }
5677 #endif
5678   {
5679   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5680   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5681   PetscBool       roworiented = aij->roworiented;
5682 
5683   /* Some Variables required in the macro */
5684   Mat             A = aij->A;
5685   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5686   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5687   MatScalar       *aa = a->a;
5688   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5689   Mat             B = aij->B;
5690   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5691   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5692   MatScalar       *ba = b->a;
5693 
5694   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5695   PetscInt        nonew = a->nonew;
5696   MatScalar       *ap1,*ap2;
5697 
5698   PetscFunctionBegin;
5699   for (i=0; i<m; i++) {
5700     if (im[i] < 0) continue;
5701 #if defined(PETSC_USE_DEBUG)
5702     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5703 #endif
5704     if (im[i] >= rstart && im[i] < rend) {
5705       row      = im[i] - rstart;
5706       lastcol1 = -1;
5707       rp1      = aj + ai[row];
5708       ap1      = aa + ai[row];
5709       rmax1    = aimax[row];
5710       nrow1    = ailen[row];
5711       low1     = 0;
5712       high1    = nrow1;
5713       lastcol2 = -1;
5714       rp2      = bj + bi[row];
5715       ap2      = ba + bi[row];
5716       rmax2    = bimax[row];
5717       nrow2    = bilen[row];
5718       low2     = 0;
5719       high2    = nrow2;
5720 
5721       for (j=0; j<n; j++) {
5722         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5723         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5724         if (in[j] >= cstart && in[j] < cend){
5725           col = in[j] - cstart;
5726           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5727         } else if (in[j] < 0) continue;
5728 #if defined(PETSC_USE_DEBUG)
5729         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5730 #endif
5731         else {
5732           if (mat->was_assembled) {
5733             if (!aij->colmap) {
5734               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5735             }
5736 #if defined (PETSC_USE_CTABLE)
5737             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5738 	    col--;
5739 #else
5740             col = aij->colmap[in[j]] - 1;
5741 #endif
5742             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5743               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5744               col =  in[j];
5745               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5746               B = aij->B;
5747               b = (Mat_SeqAIJ*)B->data;
5748               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5749               rp2      = bj + bi[row];
5750               ap2      = ba + bi[row];
5751               rmax2    = bimax[row];
5752               nrow2    = bilen[row];
5753               low2     = 0;
5754               high2    = nrow2;
5755               bm       = aij->B->rmap->n;
5756               ba = b->a;
5757             }
5758           } else col = in[j];
5759           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5760         }
5761       }
5762     } else {
5763       if (!aij->donotstash) {
5764         if (roworiented) {
5765           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5766         } else {
5767           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5768         }
5769       }
5770     }
5771   }}
5772   PetscFunctionReturnVoid();
5773 }
5774 EXTERN_C_END
5775 
5776