xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 9cc40f1f181ffe7740bc77b26c07c2a6b11877c3)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
187     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
188     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
189     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
190     rowners[0] = 0;
191     for (i=2; i<=size; i++) {
192       rowners[i] += rowners[i-1];
193     }
194     rstart = rowners[rank];
195     rend   = rowners[rank+1];
196     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
197     if (!rank) {
198       gmata = (Mat_SeqAIJ*) gmat->data;
199       /* send row lengths to all processors */
200       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201       for (i=1; i<size; i++) {
202 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
203       }
204       /* determine number diagonal and off-diagonal counts */
205       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
206       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
207       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
208       jj = 0;
209       for (i=0; i<m; i++) {
210 	for (j=0; j<dlens[i]; j++) {
211           if (gmata->j[jj] < rstart) ld[i]++;
212 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213 	  jj++;
214 	}
215       }
216       /* send column indices to other processes */
217       for (i=1; i<size; i++) {
218 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
220 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222 
223       /* send numerical values to other processes */
224       for (i=1; i<size; i++) {
225         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
227       }
228       gmataa = gmata->a;
229       gmataj = gmata->j;
230 
231     } else {
232       /* receive row lengths */
233       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
234       /* receive column indices */
235       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
236       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
237       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
238       /* determine number diagonal and off-diagonal counts */
239       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
240       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
241       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
242       jj = 0;
243       for (i=0; i<m; i++) {
244 	for (j=0; j<dlens[i]; j++) {
245           if (gmataj[jj] < rstart) ld[i]++;
246 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247 	  jj++;
248 	}
249       }
250       /* receive numerical values */
251       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
253     }
254     /* set preallocation */
255     for (i=0; i<m; i++) {
256       dlens[i] -= olens[i];
257     }
258     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
259     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
260 
261     for (i=0; i<m; i++) {
262       dlens[i] += olens[i];
263     }
264     cnt  = 0;
265     for (i=0; i<m; i++) {
266       row  = rstart + i;
267       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
268       cnt += dlens[i];
269     }
270     if (rank) {
271       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
272     }
273     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
274     ierr = PetscFree(rowners);CHKERRQ(ierr);
275     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276     *inmat = mat;
277   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
278     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280     mat   = *inmat;
281     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
282     if (!rank) {
283       /* send numerical values to other processes */
284       gmata = (Mat_SeqAIJ*) gmat->data;
285       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
286       gmataa = gmata->a;
287       for (i=1; i<size; i++) {
288         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
290       }
291       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292     } else {
293       /* receive numerical values from process 0*/
294       nz   = Ad->nz + Ao->nz;
295       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
296       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
297     }
298     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300     ad = Ad->a;
301     ao = Ao->a;
302     if (mat->rmap->n) {
303       i  = 0;
304       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
305       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
306     }
307     for (i=1; i<mat->rmap->n; i++) {
308       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
309       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
310     }
311     i--;
312     if (mat->rmap->n) {
313       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
314     }
315     if (rank) {
316       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
317     }
318   }
319   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   CHKMEMQ;
322   PetscFunctionReturn(0);
323 }
324 
325 /*
326   Local utility routine that creates a mapping from the global column
327 number to the local number in the off-diagonal part of the local
328 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
329 a slightly higher hash table cost; without it it is not scalable (each processor
330 has an order N integer array but is fast to acess.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
335 {
336   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
337   PetscErrorCode ierr;
338   PetscInt       n = aij->B->cmap->n,i;
339 
340   PetscFunctionBegin;
341 #if defined (PETSC_USE_CTABLE)
342   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
343   for (i=0; i<n; i++){
344     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
345   }
346 #else
347   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
348   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
349   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
350   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
351 #endif
352   PetscFunctionReturn(0);
353 }
354 
355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
356 { \
357     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
358     lastcol1 = col;\
359     while (high1-low1 > 5) { \
360       t = (low1+high1)/2; \
361       if (rp1[t] > col) high1 = t; \
362       else             low1  = t; \
363     } \
364       for (_i=low1; _i<high1; _i++) { \
365         if (rp1[_i] > col) break; \
366         if (rp1[_i] == col) { \
367           if (addv == ADD_VALUES) ap1[_i] += value;   \
368           else                    ap1[_i] = value; \
369           goto a_noinsert; \
370         } \
371       }  \
372       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
373       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
374       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
375       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
376       N = nrow1++ - 1; a->nz++; high1++; \
377       /* shift up all the later entries in this row */ \
378       for (ii=N; ii>=_i; ii--) { \
379         rp1[ii+1] = rp1[ii]; \
380         ap1[ii+1] = ap1[ii]; \
381       } \
382       rp1[_i] = col;  \
383       ap1[_i] = value;  \
384       a_noinsert: ; \
385       ailen[row] = nrow1; \
386 }
387 
388 
389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
390 { \
391     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
392     lastcol2 = col;\
393     while (high2-low2 > 5) { \
394       t = (low2+high2)/2; \
395       if (rp2[t] > col) high2 = t; \
396       else             low2  = t; \
397     } \
398     for (_i=low2; _i<high2; _i++) {		\
399       if (rp2[_i] > col) break;			\
400       if (rp2[_i] == col) {			      \
401 	if (addv == ADD_VALUES) ap2[_i] += value;     \
402 	else                    ap2[_i] = value;      \
403 	goto b_noinsert;			      \
404       }						      \
405     }							      \
406     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
407     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
408     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
409     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
410     N = nrow2++ - 1; b->nz++; high2++;					\
411     /* shift up all the later entries in this row */			\
412     for (ii=N; ii>=_i; ii--) {						\
413       rp2[ii+1] = rp2[ii];						\
414       ap2[ii+1] = ap2[ii];						\
415     }									\
416     rp2[_i] = col;							\
417     ap2[_i] = value;							\
418     b_noinsert: ;								\
419     bilen[row] = nrow2;							\
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
425 {
426   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
427   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
428   PetscErrorCode ierr;
429   PetscInt       l,*garray = mat->garray,diag;
430 
431   PetscFunctionBegin;
432   /* code only works for square matrices A */
433 
434   /* find size of row to the left of the diagonal part */
435   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
436   row  = row - diag;
437   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
438     if (garray[b->j[b->i[row]+l]] > diag) break;
439   }
440   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
441 
442   /* diagonal part */
443   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
444 
445   /* right of diagonal part */
446   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
447   PetscFunctionReturn(0);
448 }
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "MatSetValues_MPIAIJ"
452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
453 {
454   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
455   PetscScalar    value;
456   PetscErrorCode ierr;
457   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
458   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
459   PetscBool      roworiented = aij->roworiented;
460 
461   /* Some Variables required in the macro */
462   Mat            A = aij->A;
463   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
464   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
465   MatScalar      *aa = a->a;
466   PetscBool      ignorezeroentries = a->ignorezeroentries;
467   Mat            B = aij->B;
468   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
469   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
470   MatScalar      *ba = b->a;
471 
472   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
473   PetscInt       nonew = a->nonew;
474   MatScalar      *ap1,*ap2;
475 
476   PetscFunctionBegin;
477   if (v) PetscValidScalarPointer(v,6);
478   for (i=0; i<m; i++) {
479     if (im[i] < 0) continue;
480 #if defined(PETSC_USE_DEBUG)
481     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
482 #endif
483     if (im[i] >= rstart && im[i] < rend) {
484       row      = im[i] - rstart;
485       lastcol1 = -1;
486       rp1      = aj + ai[row];
487       ap1      = aa + ai[row];
488       rmax1    = aimax[row];
489       nrow1    = ailen[row];
490       low1     = 0;
491       high1    = nrow1;
492       lastcol2 = -1;
493       rp2      = bj + bi[row];
494       ap2      = ba + bi[row];
495       rmax2    = bimax[row];
496       nrow2    = bilen[row];
497       low2     = 0;
498       high2    = nrow2;
499 
500       for (j=0; j<n; j++) {
501         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
502         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
503         if (in[j] >= cstart && in[j] < cend){
504           col = in[j] - cstart;
505           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
506         } else if (in[j] < 0) continue;
507 #if defined(PETSC_USE_DEBUG)
508         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
509 #endif
510         else {
511           if (mat->was_assembled) {
512             if (!aij->colmap) {
513               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
514             }
515 #if defined (PETSC_USE_CTABLE)
516             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
517 	    col--;
518 #else
519             col = aij->colmap[in[j]] - 1;
520 #endif
521             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
522               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523               col =  in[j];
524               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
525               B = aij->B;
526               b = (Mat_SeqAIJ*)B->data;
527               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
528               rp2      = bj + bi[row];
529               ap2      = ba + bi[row];
530               rmax2    = bimax[row];
531               nrow2    = bilen[row];
532               low2     = 0;
533               high2    = nrow2;
534               bm       = aij->B->rmap->n;
535               ba = b->a;
536             }
537           } else col = in[j];
538           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
539         }
540       }
541     } else {
542       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
543       if (!aij->donotstash) {
544         if (roworiented) {
545           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
546         } else {
547           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
548         }
549       }
550     }
551   }
552   PetscFunctionReturn(0);
553 }
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "MatGetValues_MPIAIJ"
557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
558 {
559   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
560   PetscErrorCode ierr;
561   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
562   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
563 
564   PetscFunctionBegin;
565   for (i=0; i<m; i++) {
566     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
567     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
568     if (idxm[i] >= rstart && idxm[i] < rend) {
569       row = idxm[i] - rstart;
570       for (j=0; j<n; j++) {
571         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
572         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
573         if (idxn[j] >= cstart && idxn[j] < cend){
574           col = idxn[j] - cstart;
575           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
576         } else {
577           if (!aij->colmap) {
578             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
579           }
580 #if defined (PETSC_USE_CTABLE)
581           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
582           col --;
583 #else
584           col = aij->colmap[idxn[j]] - 1;
585 #endif
586           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
587           else {
588             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589           }
590         }
591       }
592     } else {
593       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
600 
601 #undef __FUNCT__
602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
604 {
605   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
606   PetscErrorCode ierr;
607   PetscInt       nstash,reallocs;
608   InsertMode     addv;
609 
610   PetscFunctionBegin;
611   if (aij->donotstash || mat->nooffprocentries) {
612     PetscFunctionReturn(0);
613   }
614 
615   /* make sure all processors are either in INSERTMODE or ADDMODE */
616   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
617   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
618   mat->insertmode = addv; /* in case this processor had no cache */
619 
620   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
621   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
622   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
632   PetscErrorCode ierr;
633   PetscMPIInt    n;
634   PetscInt       i,j,rstart,ncols,flg;
635   PetscInt       *row,*col;
636   PetscBool      other_disassembled;
637   PetscScalar    *val;
638   InsertMode     addv = mat->insertmode;
639 
640   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
641   PetscFunctionBegin;
642   if (!aij->donotstash && !mat->nooffprocentries) {
643     while (1) {
644       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
645       if (!flg) break;
646 
647       for (i=0; i<n;) {
648         /* Now identify the consecutive vals belonging to the same row */
649         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
650         if (j < n) ncols = j-i;
651         else       ncols = n-i;
652         /* Now assemble all these values with a single function call */
653         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
654         i = j;
655       }
656     }
657     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
658   }
659   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
660   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
661 
662   /* determine if any processor has disassembled, if so we must
663      also disassemble ourselfs, in order that we may reassemble. */
664   /*
665      if nonzero structure of submatrix B cannot change then we know that
666      no processor disassembled thus we can skip this stuff
667   */
668   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
669     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
670     if (mat->was_assembled && !other_disassembled) {
671       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
672     }
673   }
674   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
675     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
676   }
677   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
678   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
679   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
680   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
681 
682   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
683   aij->rowvalues = 0;
684 
685   /* used by MatAXPY() */
686   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
687   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
688 
689   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
690   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
697 {
698   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
699   PetscErrorCode ierr;
700 
701   PetscFunctionBegin;
702   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
703   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatZeroRows_MPIAIJ"
709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
710 {
711   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
712   PetscErrorCode    ierr;
713   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
714   PetscInt          i,*owners = A->rmap->range;
715   PetscInt          *nprocs,j,idx,nsends,row;
716   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
717   PetscInt          *rvalues,count,base,slen,*source;
718   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
719   MPI_Comm          comm = ((PetscObject)A)->comm;
720   MPI_Request       *send_waits,*recv_waits;
721   MPI_Status        recv_status,*send_status;
722   const PetscScalar *xx;
723   PetscScalar       *bb;
724 #if defined(PETSC_DEBUG)
725   PetscBool      found = PETSC_FALSE;
726 #endif
727 
728   PetscFunctionBegin;
729   /*  first count number of contributors to each processor */
730   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
731   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
732   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
733   j = 0;
734   for (i=0; i<N; i++) {
735     if (lastidx > (idx = rows[i])) j = 0;
736     lastidx = idx;
737     for (; j<size; j++) {
738       if (idx >= owners[j] && idx < owners[j+1]) {
739         nprocs[2*j]++;
740         nprocs[2*j+1] = 1;
741         owner[i] = j;
742 #if defined(PETSC_DEBUG)
743         found = PETSC_TRUE;
744 #endif
745         break;
746       }
747     }
748 #if defined(PETSC_DEBUG)
749     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
750     found = PETSC_FALSE;
751 #endif
752   }
753   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
754 
755   if (A->nooffproczerorows) {
756     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
757     nrecvs = nsends;
758     nmax   = N;
759   } else {
760     /* inform other processors of number of messages and max length*/
761     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
762   }
763 
764   /* post receives:   */
765   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
766   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
767   for (i=0; i<nrecvs; i++) {
768     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
769   }
770 
771   /* do sends:
772       1) starts[i] gives the starting index in svalues for stuff going to
773          the ith processor
774   */
775   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
776   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
777   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
778   starts[0] = 0;
779   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
780   for (i=0; i<N; i++) {
781     svalues[starts[owner[i]]++] = rows[i];
782   }
783 
784   starts[0] = 0;
785   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
786   count = 0;
787   for (i=0; i<size; i++) {
788     if (nprocs[2*i+1]) {
789       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
790     }
791   }
792   ierr = PetscFree(starts);CHKERRQ(ierr);
793 
794   base = owners[rank];
795 
796   /*  wait on receives */
797   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
798   count  = nrecvs; slen = 0;
799   while (count) {
800     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
801     /* unpack receives into our local space */
802     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
803     source[imdex]  = recv_status.MPI_SOURCE;
804     lens[imdex]    = n;
805     slen          += n;
806     count--;
807   }
808   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
809 
810   /* move the data into the send scatter */
811   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
812   count = 0;
813   for (i=0; i<nrecvs; i++) {
814     values = rvalues + i*nmax;
815     for (j=0; j<lens[i]; j++) {
816       lrows[count++] = values[j] - base;
817     }
818   }
819   ierr = PetscFree(rvalues);CHKERRQ(ierr);
820   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
821   ierr = PetscFree(owner);CHKERRQ(ierr);
822   ierr = PetscFree(nprocs);CHKERRQ(ierr);
823 
824   /* fix right hand side if needed */
825   if (x && b) {
826     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
827     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
828     for (i=0; i<slen; i++) {
829       bb[lrows[i]] = diag*xx[lrows[i]];
830     }
831     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
833   }
834   /*
835         Zero the required rows. If the "diagonal block" of the matrix
836      is square and the user wishes to set the diagonal we use separate
837      code so that MatSetValues() is not called for each diagonal allocating
838      new memory, thus calling lots of mallocs and slowing things down.
839 
840   */
841   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
842   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
843   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
844     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
845   } else if (diag != 0.0) {
846     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
847     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
848       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
850     }
851     for (i = 0; i < slen; i++) {
852       row  = lrows[i] + rstart;
853       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
854     }
855     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
856     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
857   } else {
858     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
859   }
860   ierr = PetscFree(lrows);CHKERRQ(ierr);
861 
862   /* wait on sends */
863   if (nsends) {
864     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
865     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
866     ierr = PetscFree(send_status);CHKERRQ(ierr);
867   }
868   ierr = PetscFree(send_waits);CHKERRQ(ierr);
869   ierr = PetscFree(svalues);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
876 {
877   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode    ierr;
879   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
880   PetscInt          i,*owners = A->rmap->range;
881   PetscInt          *nprocs,j,idx,nsends;
882   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
883   PetscInt          *rvalues,count,base,slen,*source;
884   PetscInt          *lens,*lrows,*values,m;
885   MPI_Comm          comm = ((PetscObject)A)->comm;
886   MPI_Request       *send_waits,*recv_waits;
887   MPI_Status        recv_status,*send_status;
888   const PetscScalar *xx;
889   PetscScalar       *bb,*mask;
890   Vec               xmask,lmask;
891   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
892   const PetscInt    *aj, *ii,*ridx;
893   PetscScalar       *aa;
894 #if defined(PETSC_DEBUG)
895   PetscBool         found = PETSC_FALSE;
896 #endif
897 
898   PetscFunctionBegin;
899   /*  first count number of contributors to each processor */
900   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
901   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
902   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
903   j = 0;
904   for (i=0; i<N; i++) {
905     if (lastidx > (idx = rows[i])) j = 0;
906     lastidx = idx;
907     for (; j<size; j++) {
908       if (idx >= owners[j] && idx < owners[j+1]) {
909         nprocs[2*j]++;
910         nprocs[2*j+1] = 1;
911         owner[i] = j;
912 #if defined(PETSC_DEBUG)
913         found = PETSC_TRUE;
914 #endif
915         break;
916       }
917     }
918 #if defined(PETSC_DEBUG)
919     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
920     found = PETSC_FALSE;
921 #endif
922   }
923   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
924 
925   /* inform other processors of number of messages and max length*/
926   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
927 
928   /* post receives:   */
929   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
930   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
931   for (i=0; i<nrecvs; i++) {
932     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
933   }
934 
935   /* do sends:
936       1) starts[i] gives the starting index in svalues for stuff going to
937          the ith processor
938   */
939   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
940   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
941   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
942   starts[0] = 0;
943   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
944   for (i=0; i<N; i++) {
945     svalues[starts[owner[i]]++] = rows[i];
946   }
947 
948   starts[0] = 0;
949   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
950   count = 0;
951   for (i=0; i<size; i++) {
952     if (nprocs[2*i+1]) {
953       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
954     }
955   }
956   ierr = PetscFree(starts);CHKERRQ(ierr);
957 
958   base = owners[rank];
959 
960   /*  wait on receives */
961   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
962   count  = nrecvs; slen = 0;
963   while (count) {
964     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
965     /* unpack receives into our local space */
966     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
967     source[imdex]  = recv_status.MPI_SOURCE;
968     lens[imdex]    = n;
969     slen          += n;
970     count--;
971   }
972   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
973 
974   /* move the data into the send scatter */
975   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
976   count = 0;
977   for (i=0; i<nrecvs; i++) {
978     values = rvalues + i*nmax;
979     for (j=0; j<lens[i]; j++) {
980       lrows[count++] = values[j] - base;
981     }
982   }
983   ierr = PetscFree(rvalues);CHKERRQ(ierr);
984   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
985   ierr = PetscFree(owner);CHKERRQ(ierr);
986   ierr = PetscFree(nprocs);CHKERRQ(ierr);
987   /* lrows are the local rows to be zeroed, slen is the number of local rows */
988 
989   /* zero diagonal part of matrix */
990   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
991 
992   /* handle off diagonal part of matrix */
993   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
994   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
995   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
996   for (i=0; i<slen; i++) {
997     bb[lrows[i]] = 1;
998   }
999   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1000   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1001   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1003   if (x) {
1004     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1007     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1008   }
1009   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1010 
1011   /* remove zeroed rows of off diagonal matrix */
1012   ii = aij->i;
1013   for (i=0; i<slen; i++) {
1014     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1015   }
1016 
1017   /* loop over all elements of off process part of matrix zeroing removed columns*/
1018   if (aij->compressedrow.use){
1019     m    = aij->compressedrow.nrows;
1020     ii   = aij->compressedrow.i;
1021     ridx = aij->compressedrow.rindex;
1022     for (i=0; i<m; i++){
1023       n   = ii[i+1] - ii[i];
1024       aj  = aij->j + ii[i];
1025       aa  = aij->a + ii[i];
1026 
1027       for (j=0; j<n; j++) {
1028         if (PetscAbsScalar(mask[*aj])) {
1029           if (b) bb[*ridx] -= *aa*xx[*aj];
1030           *aa        = 0.0;
1031         }
1032         aa++;
1033         aj++;
1034       }
1035       ridx++;
1036     }
1037   } else { /* do not use compressed row format */
1038     m = l->B->rmap->n;
1039     for (i=0; i<m; i++) {
1040       n   = ii[i+1] - ii[i];
1041       aj  = aij->j + ii[i];
1042       aa  = aij->a + ii[i];
1043       for (j=0; j<n; j++) {
1044         if (PetscAbsScalar(mask[*aj])) {
1045           if (b) bb[i] -= *aa*xx[*aj];
1046           *aa    = 0.0;
1047         }
1048         aa++;
1049         aj++;
1050       }
1051     }
1052   }
1053   if (x) {
1054     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1055     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1056   }
1057   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1058   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1059   ierr = PetscFree(lrows);CHKERRQ(ierr);
1060 
1061   /* wait on sends */
1062   if (nsends) {
1063     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1064     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1065     ierr = PetscFree(send_status);CHKERRQ(ierr);
1066   }
1067   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1068   ierr = PetscFree(svalues);CHKERRQ(ierr);
1069 
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 #undef __FUNCT__
1074 #define __FUNCT__ "MatMult_MPIAIJ"
1075 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1076 {
1077   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1078   PetscErrorCode ierr;
1079   PetscInt       nt;
1080 
1081   PetscFunctionBegin;
1082   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1083   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1084   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1085   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1086   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1087   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1093 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1094 {
1095   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1096   PetscErrorCode ierr;
1097 
1098   PetscFunctionBegin;
1099   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1105 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1106 {
1107   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1120 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124   PetscBool      merged;
1125 
1126   PetscFunctionBegin;
1127   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1128   /* do nondiagonal part */
1129   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1130   if (!merged) {
1131     /* send it on its way */
1132     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1133     /* do local part */
1134     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1135     /* receive remote parts: note this assumes the values are not actually */
1136     /* added in yy until the next line, */
1137     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138   } else {
1139     /* do local part */
1140     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1141     /* send it on its way */
1142     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143     /* values actually were received in the Begin() but we need to call this nop */
1144     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1145   }
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 EXTERN_C_BEGIN
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1152 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1153 {
1154   MPI_Comm       comm;
1155   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1156   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1157   IS             Me,Notme;
1158   PetscErrorCode ierr;
1159   PetscInt       M,N,first,last,*notme,i;
1160   PetscMPIInt    size;
1161 
1162   PetscFunctionBegin;
1163 
1164   /* Easy test: symmetric diagonal block */
1165   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1166   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1167   if (!*f) PetscFunctionReturn(0);
1168   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1169   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1170   if (size == 1) PetscFunctionReturn(0);
1171 
1172   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1173   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1174   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1175   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1176   for (i=0; i<first; i++) notme[i] = i;
1177   for (i=last; i<M; i++) notme[i-last+first] = i;
1178   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1179   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1180   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1181   Aoff = Aoffs[0];
1182   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1183   Boff = Boffs[0];
1184   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1185   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1186   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1187   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1188   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1189   ierr = PetscFree(notme);CHKERRQ(ierr);
1190    PetscFunctionReturn(0);
1191 }
1192 EXTERN_C_END
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1196 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1197 {
1198   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1199   PetscErrorCode ierr;
1200 
1201   PetscFunctionBegin;
1202   /* do nondiagonal part */
1203   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1204   /* send it on its way */
1205   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1206   /* do local part */
1207   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1208   /* receive remote parts */
1209   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 /*
1214   This only works correctly for square matrices where the subblock A->A is the
1215    diagonal block
1216 */
1217 #undef __FUNCT__
1218 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1219 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1220 {
1221   PetscErrorCode ierr;
1222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1223 
1224   PetscFunctionBegin;
1225   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1226   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1227   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatScale_MPIAIJ"
1233 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1234 {
1235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1240   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatDestroy_MPIAIJ"
1246 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1247 {
1248   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1249   PetscErrorCode ierr;
1250 
1251   PetscFunctionBegin;
1252 #if defined(PETSC_USE_LOG)
1253   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1254 #endif
1255   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1256   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1257   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1258   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1259 #if defined (PETSC_USE_CTABLE)
1260   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1261 #else
1262   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1263 #endif
1264   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1265   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1266   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1267   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1268   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1269   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1270 
1271   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 #undef __FUNCT__
1284 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1285 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1286 {
1287   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1288   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1289   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1290   PetscErrorCode    ierr;
1291   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1292   int               fd;
1293   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1294   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1295   PetscScalar       *column_values;
1296   PetscInt          message_count,flowcontrolcount;
1297 
1298   PetscFunctionBegin;
1299   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1300   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1301   nz   = A->nz + B->nz;
1302   if (!rank) {
1303     header[0] = MAT_FILE_CLASSID;
1304     header[1] = mat->rmap->N;
1305     header[2] = mat->cmap->N;
1306     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1307     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1308     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1309     /* get largest number of rows any processor has */
1310     rlen = mat->rmap->n;
1311     range = mat->rmap->range;
1312     for (i=1; i<size; i++) {
1313       rlen = PetscMax(rlen,range[i+1] - range[i]);
1314     }
1315   } else {
1316     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1317     rlen = mat->rmap->n;
1318   }
1319 
1320   /* load up the local row counts */
1321   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1322   for (i=0; i<mat->rmap->n; i++) {
1323     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1324   }
1325 
1326   /* store the row lengths to the file */
1327   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1328   if (!rank) {
1329     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1330     for (i=1; i<size; i++) {
1331       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1332       rlen = range[i+1] - range[i];
1333       ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1334       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     }
1336     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1337   } else {
1338     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1339     ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1340     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1341   }
1342   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1343 
1344   /* load up the local column indices */
1345   nzmax = nz; /* )th processor needs space a largest processor needs */
1346   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1347   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1348   cnt  = 0;
1349   for (i=0; i<mat->rmap->n; i++) {
1350     for (j=B->i[i]; j<B->i[i+1]; j++) {
1351       if ( (col = garray[B->j[j]]) > cstart) break;
1352       column_indices[cnt++] = col;
1353     }
1354     for (k=A->i[i]; k<A->i[i+1]; k++) {
1355       column_indices[cnt++] = A->j[k] + cstart;
1356     }
1357     for (; j<B->i[i+1]; j++) {
1358       column_indices[cnt++] = garray[B->j[j]];
1359     }
1360   }
1361   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1362 
1363   /* store the column indices to the file */
1364    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1365   if (!rank) {
1366     MPI_Status status;
1367     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1368     for (i=1; i<size; i++) {
1369       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1370       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1371       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1372       ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1373       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1374     }
1375      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1376   } else {
1377     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1378     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1379     ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1380     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1381   }
1382   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1383 
1384   /* load up the local column values */
1385   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1386   cnt  = 0;
1387   for (i=0; i<mat->rmap->n; i++) {
1388     for (j=B->i[i]; j<B->i[i+1]; j++) {
1389       if ( garray[B->j[j]] > cstart) break;
1390       column_values[cnt++] = B->a[j];
1391     }
1392     for (k=A->i[i]; k<A->i[i+1]; k++) {
1393       column_values[cnt++] = A->a[k];
1394     }
1395     for (; j<B->i[i+1]; j++) {
1396       column_values[cnt++] = B->a[j];
1397     }
1398   }
1399   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1400 
1401   /* store the column values to the file */
1402    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1403   if (!rank) {
1404     MPI_Status status;
1405     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1406     for (i=1; i<size; i++) {
1407        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1408       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1409       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1410       ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1411       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1412     }
1413     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1414   } else {
1415     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1416     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1417     ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1418     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1419   }
1420   ierr = PetscFree(column_values);CHKERRQ(ierr);
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1426 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1427 {
1428   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1429   PetscErrorCode    ierr;
1430   PetscMPIInt       rank = aij->rank,size = aij->size;
1431   PetscBool         isdraw,iascii,isbinary;
1432   PetscViewer       sviewer;
1433   PetscViewerFormat format;
1434 
1435   PetscFunctionBegin;
1436   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1437   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1438   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1439   if (iascii) {
1440     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1441     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1442       MatInfo    info;
1443       PetscBool  inodes;
1444 
1445       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1446       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1447       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1448       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1449       if (!inodes) {
1450         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1451 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1452       } else {
1453         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1454 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1455       }
1456       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1457       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1458       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1459       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1460       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1461       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1462       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1463       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1464       PetscFunctionReturn(0);
1465     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1466       PetscInt   inodecount,inodelimit,*inodes;
1467       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1468       if (inodes) {
1469         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1470       } else {
1471         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1472       }
1473       PetscFunctionReturn(0);
1474     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1475       PetscFunctionReturn(0);
1476     }
1477   } else if (isbinary) {
1478     if (size == 1) {
1479       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1480       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1481     } else {
1482       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1483     }
1484     PetscFunctionReturn(0);
1485   } else if (isdraw) {
1486     PetscDraw  draw;
1487     PetscBool  isnull;
1488     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1489     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1490   }
1491 
1492   if (size == 1) {
1493     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1494     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1495   } else {
1496     /* assemble the entire matrix onto first processor. */
1497     Mat         A;
1498     Mat_SeqAIJ  *Aloc;
1499     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1500     MatScalar   *a;
1501 
1502     if (mat->rmap->N > 1024) {
1503       PetscBool  flg = PETSC_FALSE;
1504 
1505       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1506       if (!flg) {
1507         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1508       }
1509     }
1510 
1511     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1512     if (!rank) {
1513       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1514     } else {
1515       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1516     }
1517     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1518     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1519     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1520     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1521 
1522     /* copy over the A part */
1523     Aloc = (Mat_SeqAIJ*)aij->A->data;
1524     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1525     row = mat->rmap->rstart;
1526     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1527     for (i=0; i<m; i++) {
1528       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1529       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1530     }
1531     aj = Aloc->j;
1532     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1533 
1534     /* copy over the B part */
1535     Aloc = (Mat_SeqAIJ*)aij->B->data;
1536     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1537     row  = mat->rmap->rstart;
1538     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1539     ct   = cols;
1540     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1541     for (i=0; i<m; i++) {
1542       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1543       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1544     }
1545     ierr = PetscFree(ct);CHKERRQ(ierr);
1546     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1547     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1548     /*
1549        Everyone has to call to draw the matrix since the graphics waits are
1550        synchronized across all processors that share the PetscDraw object
1551     */
1552     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1553     if (!rank) {
1554       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1555       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1556       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1557       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1558     }
1559     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1560     ierr = MatDestroy(&A);CHKERRQ(ierr);
1561   }
1562   PetscFunctionReturn(0);
1563 }
1564 
1565 #undef __FUNCT__
1566 #define __FUNCT__ "MatView_MPIAIJ"
1567 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1568 {
1569   PetscErrorCode ierr;
1570   PetscBool      iascii,isdraw,issocket,isbinary;
1571 
1572   PetscFunctionBegin;
1573   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1574   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1575   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1576   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1577   if (iascii || isdraw || isbinary || issocket) {
1578     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1579   } else {
1580     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1581   }
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 #undef __FUNCT__
1586 #define __FUNCT__ "MatSOR_MPIAIJ"
1587 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1588 {
1589   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1590   PetscErrorCode ierr;
1591   Vec            bb1 = 0;
1592   PetscBool      hasop;
1593 
1594   PetscFunctionBegin;
1595   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1596     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1597   }
1598 
1599   if (flag == SOR_APPLY_UPPER) {
1600     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1601     PetscFunctionReturn(0);
1602   }
1603 
1604   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1605     if (flag & SOR_ZERO_INITIAL_GUESS) {
1606       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1607       its--;
1608     }
1609 
1610     while (its--) {
1611       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1612       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1613 
1614       /* update rhs: bb1 = bb - B*x */
1615       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1616       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1617 
1618       /* local sweep */
1619       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1620     }
1621   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1622     if (flag & SOR_ZERO_INITIAL_GUESS) {
1623       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1624       its--;
1625     }
1626     while (its--) {
1627       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1628       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629 
1630       /* update rhs: bb1 = bb - B*x */
1631       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1632       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1633 
1634       /* local sweep */
1635       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1636     }
1637   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1638     if (flag & SOR_ZERO_INITIAL_GUESS) {
1639       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1640       its--;
1641     }
1642     while (its--) {
1643       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1644       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645 
1646       /* update rhs: bb1 = bb - B*x */
1647       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1648       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1649 
1650       /* local sweep */
1651       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1652     }
1653   }  else if (flag & SOR_EISENSTAT) {
1654     Vec         xx1;
1655 
1656     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1657     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1658 
1659     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1660     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661     if (!mat->diag) {
1662       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1663       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1664     }
1665     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1666     if (hasop) {
1667       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1668     } else {
1669       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1670     }
1671     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1672 
1673     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1674 
1675     /* local sweep */
1676     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1677     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1678     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1679   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1680 
1681   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatPermute_MPIAIJ"
1687 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1688 {
1689   MPI_Comm       comm;
1690   PetscInt       first,local_rowsize,local_colsize;
1691   const PetscInt *rows;
1692   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1693   PetscErrorCode ierr;
1694 
1695   PetscFunctionBegin;
1696   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1697   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1698   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1699   /* collect the global row permutation and invert it */
1700   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1701   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1702   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1703   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1704   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1705   /* get the local target indices */
1706   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1707   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1708   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1709   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1710   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1711   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1712   /* the column permutation is so much easier;
1713      make a local version of 'colp' and invert it */
1714   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1715   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1716   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1717   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1718   /* now we just get the submatrix */
1719   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1720   /* clean up */
1721   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1722   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1723   PetscFunctionReturn(0);
1724 }
1725 
1726 #undef __FUNCT__
1727 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1728 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1729 {
1730   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1731   Mat            A = mat->A,B = mat->B;
1732   PetscErrorCode ierr;
1733   PetscReal      isend[5],irecv[5];
1734 
1735   PetscFunctionBegin;
1736   info->block_size     = 1.0;
1737   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1738   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1739   isend[3] = info->memory;  isend[4] = info->mallocs;
1740   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1741   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1742   isend[3] += info->memory;  isend[4] += info->mallocs;
1743   if (flag == MAT_LOCAL) {
1744     info->nz_used      = isend[0];
1745     info->nz_allocated = isend[1];
1746     info->nz_unneeded  = isend[2];
1747     info->memory       = isend[3];
1748     info->mallocs      = isend[4];
1749   } else if (flag == MAT_GLOBAL_MAX) {
1750     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1751     info->nz_used      = irecv[0];
1752     info->nz_allocated = irecv[1];
1753     info->nz_unneeded  = irecv[2];
1754     info->memory       = irecv[3];
1755     info->mallocs      = irecv[4];
1756   } else if (flag == MAT_GLOBAL_SUM) {
1757     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1758     info->nz_used      = irecv[0];
1759     info->nz_allocated = irecv[1];
1760     info->nz_unneeded  = irecv[2];
1761     info->memory       = irecv[3];
1762     info->mallocs      = irecv[4];
1763   }
1764   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1765   info->fill_ratio_needed = 0;
1766   info->factor_mallocs    = 0;
1767 
1768   PetscFunctionReturn(0);
1769 }
1770 
1771 #undef __FUNCT__
1772 #define __FUNCT__ "MatSetOption_MPIAIJ"
1773 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1774 {
1775   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1776   PetscErrorCode ierr;
1777 
1778   PetscFunctionBegin;
1779   switch (op) {
1780   case MAT_NEW_NONZERO_LOCATIONS:
1781   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1782   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1783   case MAT_KEEP_NONZERO_PATTERN:
1784   case MAT_NEW_NONZERO_LOCATION_ERR:
1785   case MAT_USE_INODES:
1786   case MAT_IGNORE_ZERO_ENTRIES:
1787     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1788     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1789     break;
1790   case MAT_ROW_ORIENTED:
1791     a->roworiented = flg;
1792     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1793     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1794     break;
1795   case MAT_NEW_DIAGONALS:
1796     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1797     break;
1798   case MAT_IGNORE_OFF_PROC_ENTRIES:
1799     a->donotstash = flg;
1800     break;
1801   case MAT_SPD:
1802     A->spd_set                         = PETSC_TRUE;
1803     A->spd                             = flg;
1804     if (flg) {
1805       A->symmetric                     = PETSC_TRUE;
1806       A->structurally_symmetric        = PETSC_TRUE;
1807       A->symmetric_set                 = PETSC_TRUE;
1808       A->structurally_symmetric_set    = PETSC_TRUE;
1809     }
1810     break;
1811   case MAT_SYMMETRIC:
1812     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1813     break;
1814   case MAT_STRUCTURALLY_SYMMETRIC:
1815     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1816     break;
1817   case MAT_HERMITIAN:
1818     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1819     break;
1820   case MAT_SYMMETRY_ETERNAL:
1821     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1822     break;
1823   default:
1824     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1825   }
1826   PetscFunctionReturn(0);
1827 }
1828 
1829 #undef __FUNCT__
1830 #define __FUNCT__ "MatGetRow_MPIAIJ"
1831 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1832 {
1833   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1834   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1835   PetscErrorCode ierr;
1836   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1837   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1838   PetscInt       *cmap,*idx_p;
1839 
1840   PetscFunctionBegin;
1841   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1842   mat->getrowactive = PETSC_TRUE;
1843 
1844   if (!mat->rowvalues && (idx || v)) {
1845     /*
1846         allocate enough space to hold information from the longest row.
1847     */
1848     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1849     PetscInt   max = 1,tmp;
1850     for (i=0; i<matin->rmap->n; i++) {
1851       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1852       if (max < tmp) { max = tmp; }
1853     }
1854     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1855   }
1856 
1857   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1858   lrow = row - rstart;
1859 
1860   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1861   if (!v)   {pvA = 0; pvB = 0;}
1862   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1863   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1864   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1865   nztot = nzA + nzB;
1866 
1867   cmap  = mat->garray;
1868   if (v  || idx) {
1869     if (nztot) {
1870       /* Sort by increasing column numbers, assuming A and B already sorted */
1871       PetscInt imark = -1;
1872       if (v) {
1873         *v = v_p = mat->rowvalues;
1874         for (i=0; i<nzB; i++) {
1875           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1876           else break;
1877         }
1878         imark = i;
1879         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1880         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1881       }
1882       if (idx) {
1883         *idx = idx_p = mat->rowindices;
1884         if (imark > -1) {
1885           for (i=0; i<imark; i++) {
1886             idx_p[i] = cmap[cworkB[i]];
1887           }
1888         } else {
1889           for (i=0; i<nzB; i++) {
1890             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1891             else break;
1892           }
1893           imark = i;
1894         }
1895         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1896         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1897       }
1898     } else {
1899       if (idx) *idx = 0;
1900       if (v)   *v   = 0;
1901     }
1902   }
1903   *nz = nztot;
1904   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1905   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1906   PetscFunctionReturn(0);
1907 }
1908 
1909 #undef __FUNCT__
1910 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1911 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1912 {
1913   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1914 
1915   PetscFunctionBegin;
1916   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1917   aij->getrowactive = PETSC_FALSE;
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 #undef __FUNCT__
1922 #define __FUNCT__ "MatNorm_MPIAIJ"
1923 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1924 {
1925   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1926   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1927   PetscErrorCode ierr;
1928   PetscInt       i,j,cstart = mat->cmap->rstart;
1929   PetscReal      sum = 0.0;
1930   MatScalar      *v;
1931 
1932   PetscFunctionBegin;
1933   if (aij->size == 1) {
1934     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1935   } else {
1936     if (type == NORM_FROBENIUS) {
1937       v = amat->a;
1938       for (i=0; i<amat->nz; i++) {
1939 #if defined(PETSC_USE_COMPLEX)
1940         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1941 #else
1942         sum += (*v)*(*v); v++;
1943 #endif
1944       }
1945       v = bmat->a;
1946       for (i=0; i<bmat->nz; i++) {
1947 #if defined(PETSC_USE_COMPLEX)
1948         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1949 #else
1950         sum += (*v)*(*v); v++;
1951 #endif
1952       }
1953       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1954       *norm = PetscSqrtReal(*norm);
1955     } else if (type == NORM_1) { /* max column norm */
1956       PetscReal *tmp,*tmp2;
1957       PetscInt  *jj,*garray = aij->garray;
1958       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1959       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1960       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1961       *norm = 0.0;
1962       v = amat->a; jj = amat->j;
1963       for (j=0; j<amat->nz; j++) {
1964         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1965       }
1966       v = bmat->a; jj = bmat->j;
1967       for (j=0; j<bmat->nz; j++) {
1968         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1969       }
1970       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1971       for (j=0; j<mat->cmap->N; j++) {
1972         if (tmp2[j] > *norm) *norm = tmp2[j];
1973       }
1974       ierr = PetscFree(tmp);CHKERRQ(ierr);
1975       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1976     } else if (type == NORM_INFINITY) { /* max row norm */
1977       PetscReal ntemp = 0.0;
1978       for (j=0; j<aij->A->rmap->n; j++) {
1979         v = amat->a + amat->i[j];
1980         sum = 0.0;
1981         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1982           sum += PetscAbsScalar(*v); v++;
1983         }
1984         v = bmat->a + bmat->i[j];
1985         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1986           sum += PetscAbsScalar(*v); v++;
1987         }
1988         if (sum > ntemp) ntemp = sum;
1989       }
1990       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1991     } else {
1992       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1993     }
1994   }
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatTranspose_MPIAIJ"
2000 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2001 {
2002   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2003   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2004   PetscErrorCode ierr;
2005   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2006   PetscInt       cstart=A->cmap->rstart,ncol;
2007   Mat            B;
2008   MatScalar      *array;
2009 
2010   PetscFunctionBegin;
2011   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2012 
2013   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2014   ai = Aloc->i; aj = Aloc->j;
2015   bi = Bloc->i; bj = Bloc->j;
2016   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2017     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2018     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2019     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2020     for (i=0; i<ai[ma]; i++){
2021       d_nnz[aj[i]] ++;
2022       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2023     }
2024 
2025     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2026     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2027     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2028     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2029     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2030   } else {
2031     B = *matout;
2032   }
2033 
2034   /* copy over the A part */
2035   array = Aloc->a;
2036   row = A->rmap->rstart;
2037   for (i=0; i<ma; i++) {
2038     ncol = ai[i+1]-ai[i];
2039     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2040     row++; array += ncol; aj += ncol;
2041   }
2042   aj = Aloc->j;
2043   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2044 
2045   /* copy over the B part */
2046   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2047   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2048   array = Bloc->a;
2049   row = A->rmap->rstart;
2050   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2051   cols_tmp = cols;
2052   for (i=0; i<mb; i++) {
2053     ncol = bi[i+1]-bi[i];
2054     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2055     row++; array += ncol; cols_tmp += ncol;
2056   }
2057   ierr = PetscFree(cols);CHKERRQ(ierr);
2058 
2059   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2060   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2061   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2062     *matout = B;
2063   } else {
2064     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2065   }
2066   PetscFunctionReturn(0);
2067 }
2068 
2069 #undef __FUNCT__
2070 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2071 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2072 {
2073   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2074   Mat            a = aij->A,b = aij->B;
2075   PetscErrorCode ierr;
2076   PetscInt       s1,s2,s3;
2077 
2078   PetscFunctionBegin;
2079   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2080   if (rr) {
2081     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2082     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2083     /* Overlap communication with computation. */
2084     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2085   }
2086   if (ll) {
2087     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2088     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2089     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2090   }
2091   /* scale  the diagonal block */
2092   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2093 
2094   if (rr) {
2095     /* Do a scatter end and then right scale the off-diagonal block */
2096     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2097     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2098   }
2099 
2100   PetscFunctionReturn(0);
2101 }
2102 
2103 #undef __FUNCT__
2104 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2105 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2106 {
2107   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2108   PetscErrorCode ierr;
2109 
2110   PetscFunctionBegin;
2111   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2112   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2113   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2114   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2115   PetscFunctionReturn(0);
2116 }
2117 #undef __FUNCT__
2118 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2119 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2120 {
2121   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2122   PetscErrorCode ierr;
2123 
2124   PetscFunctionBegin;
2125   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2126   PetscFunctionReturn(0);
2127 }
2128 
2129 #undef __FUNCT__
2130 #define __FUNCT__ "MatEqual_MPIAIJ"
2131 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2132 {
2133   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2134   Mat            a,b,c,d;
2135   PetscBool      flg;
2136   PetscErrorCode ierr;
2137 
2138   PetscFunctionBegin;
2139   a = matA->A; b = matA->B;
2140   c = matB->A; d = matB->B;
2141 
2142   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2143   if (flg) {
2144     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2145   }
2146   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 #undef __FUNCT__
2151 #define __FUNCT__ "MatCopy_MPIAIJ"
2152 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2153 {
2154   PetscErrorCode ierr;
2155   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2156   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2157 
2158   PetscFunctionBegin;
2159   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2160   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2161     /* because of the column compression in the off-processor part of the matrix a->B,
2162        the number of columns in a->B and b->B may be different, hence we cannot call
2163        the MatCopy() directly on the two parts. If need be, we can provide a more
2164        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2165        then copying the submatrices */
2166     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2167   } else {
2168     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2169     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2170   }
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 #undef __FUNCT__
2175 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2176 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2177 {
2178   PetscErrorCode ierr;
2179 
2180   PetscFunctionBegin;
2181   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2182   PetscFunctionReturn(0);
2183 }
2184 
2185 #undef __FUNCT__
2186 #define __FUNCT__ "MatAXPY_MPIAIJ"
2187 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2188 {
2189   PetscErrorCode ierr;
2190   PetscInt       i;
2191   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2192   PetscBLASInt   bnz,one=1;
2193   Mat_SeqAIJ     *x,*y;
2194 
2195   PetscFunctionBegin;
2196   if (str == SAME_NONZERO_PATTERN) {
2197     PetscScalar alpha = a;
2198     x = (Mat_SeqAIJ *)xx->A->data;
2199     y = (Mat_SeqAIJ *)yy->A->data;
2200     bnz = PetscBLASIntCast(x->nz);
2201     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2202     x = (Mat_SeqAIJ *)xx->B->data;
2203     y = (Mat_SeqAIJ *)yy->B->data;
2204     bnz = PetscBLASIntCast(x->nz);
2205     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2206   } else if (str == SUBSET_NONZERO_PATTERN) {
2207     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2208 
2209     x = (Mat_SeqAIJ *)xx->B->data;
2210     y = (Mat_SeqAIJ *)yy->B->data;
2211     if (y->xtoy && y->XtoY != xx->B) {
2212       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2213       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2214     }
2215     if (!y->xtoy) { /* get xtoy */
2216       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2217       y->XtoY = xx->B;
2218       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2219     }
2220     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2221   } else {
2222     Mat B;
2223     PetscInt *nnz_d,*nnz_o;
2224     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2225     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2226     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2227     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2228     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2229     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2230     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2231     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2232     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2233     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2234     ierr = MatHeaderReplace(Y,B);
2235     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2236     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2237   }
2238   PetscFunctionReturn(0);
2239 }
2240 
2241 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2242 
2243 #undef __FUNCT__
2244 #define __FUNCT__ "MatConjugate_MPIAIJ"
2245 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2246 {
2247 #if defined(PETSC_USE_COMPLEX)
2248   PetscErrorCode ierr;
2249   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2250 
2251   PetscFunctionBegin;
2252   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2253   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2254 #else
2255   PetscFunctionBegin;
2256 #endif
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 #undef __FUNCT__
2261 #define __FUNCT__ "MatRealPart_MPIAIJ"
2262 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2263 {
2264   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2265   PetscErrorCode ierr;
2266 
2267   PetscFunctionBegin;
2268   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2269   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2270   PetscFunctionReturn(0);
2271 }
2272 
2273 #undef __FUNCT__
2274 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2275 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2276 {
2277   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2278   PetscErrorCode ierr;
2279 
2280   PetscFunctionBegin;
2281   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2282   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2283   PetscFunctionReturn(0);
2284 }
2285 
2286 #ifdef PETSC_HAVE_PBGL
2287 
2288 #include <boost/parallel/mpi/bsp_process_group.hpp>
2289 #include <boost/graph/distributed/ilu_default_graph.hpp>
2290 #include <boost/graph/distributed/ilu_0_block.hpp>
2291 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2292 #include <boost/graph/distributed/petsc/interface.hpp>
2293 #include <boost/multi_array.hpp>
2294 #include <boost/parallel/distributed_property_map->hpp>
2295 
2296 #undef __FUNCT__
2297 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2298 /*
2299   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2300 */
2301 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2302 {
2303   namespace petsc = boost::distributed::petsc;
2304 
2305   namespace graph_dist = boost::graph::distributed;
2306   using boost::graph::distributed::ilu_default::process_group_type;
2307   using boost::graph::ilu_permuted;
2308 
2309   PetscBool       row_identity, col_identity;
2310   PetscContainer  c;
2311   PetscInt        m, n, M, N;
2312   PetscErrorCode  ierr;
2313 
2314   PetscFunctionBegin;
2315   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2316   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2317   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2318   if (!row_identity || !col_identity) {
2319     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2320   }
2321 
2322   process_group_type pg;
2323   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2324   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2325   lgraph_type&   level_graph = *lgraph_p;
2326   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2327 
2328   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2329   ilu_permuted(level_graph);
2330 
2331   /* put together the new matrix */
2332   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2333   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2334   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2335   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2336   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2337   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2338   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2339 
2340   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2341   ierr = PetscContainerSetPointer(c, lgraph_p);
2342   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2343   ierr = PetscContainerDestroy(&c);
2344   PetscFunctionReturn(0);
2345 }
2346 
2347 #undef __FUNCT__
2348 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2349 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2350 {
2351   PetscFunctionBegin;
2352   PetscFunctionReturn(0);
2353 }
2354 
2355 #undef __FUNCT__
2356 #define __FUNCT__ "MatSolve_MPIAIJ"
2357 /*
2358   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2359 */
2360 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2361 {
2362   namespace graph_dist = boost::graph::distributed;
2363 
2364   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2365   lgraph_type*   lgraph_p;
2366   PetscContainer c;
2367   PetscErrorCode ierr;
2368 
2369   PetscFunctionBegin;
2370   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2371   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2372   ierr = VecCopy(b, x);CHKERRQ(ierr);
2373 
2374   PetscScalar* array_x;
2375   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2376   PetscInt sx;
2377   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2378 
2379   PetscScalar* array_b;
2380   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2381   PetscInt sb;
2382   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2383 
2384   lgraph_type&   level_graph = *lgraph_p;
2385   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2386 
2387   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2388   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2389                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2390 
2391   typedef boost::iterator_property_map<array_ref_type::iterator,
2392                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2393   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2394                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2395 
2396   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2397 
2398   PetscFunctionReturn(0);
2399 }
2400 #endif
2401 
2402 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2403   PetscInt       nzlocal,nsends,nrecvs;
2404   PetscMPIInt    *send_rank,*recv_rank;
2405   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2406   PetscScalar    *sbuf_a,**rbuf_a;
2407   PetscErrorCode (*Destroy)(Mat);
2408 } Mat_Redundant;
2409 
2410 #undef __FUNCT__
2411 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2412 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2413 {
2414   PetscErrorCode       ierr;
2415   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2416   PetscInt             i;
2417 
2418   PetscFunctionBegin;
2419   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2420   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2421   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2422   for (i=0; i<redund->nrecvs; i++){
2423     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2424     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2425   }
2426   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2427   ierr = PetscFree(redund);CHKERRQ(ierr);
2428   PetscFunctionReturn(0);
2429 }
2430 
2431 #undef __FUNCT__
2432 #define __FUNCT__ "MatDestroy_MatRedundant"
2433 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2434 {
2435   PetscErrorCode  ierr;
2436   PetscContainer  container;
2437   Mat_Redundant   *redund=PETSC_NULL;
2438 
2439   PetscFunctionBegin;
2440   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2441   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2442   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2443   A->ops->destroy = redund->Destroy;
2444   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2445   if (A->ops->destroy) {
2446     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2447   }
2448   PetscFunctionReturn(0);
2449 }
2450 
2451 #undef __FUNCT__
2452 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2453 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2454 {
2455   PetscMPIInt    rank,size;
2456   MPI_Comm       comm=((PetscObject)mat)->comm;
2457   PetscErrorCode ierr;
2458   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2459   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2460   PetscInt       *rowrange=mat->rmap->range;
2461   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2462   Mat            A=aij->A,B=aij->B,C=*matredundant;
2463   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2464   PetscScalar    *sbuf_a;
2465   PetscInt       nzlocal=a->nz+b->nz;
2466   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2467   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2468   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2469   MatScalar      *aworkA,*aworkB;
2470   PetscScalar    *vals;
2471   PetscMPIInt    tag1,tag2,tag3,imdex;
2472   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2473                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2474   MPI_Status     recv_status,*send_status;
2475   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2476   PetscInt       **rbuf_j=PETSC_NULL;
2477   PetscScalar    **rbuf_a=PETSC_NULL;
2478   Mat_Redundant  *redund=PETSC_NULL;
2479   PetscContainer container;
2480 
2481   PetscFunctionBegin;
2482   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2483   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2484 
2485   if (reuse == MAT_REUSE_MATRIX) {
2486     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2487     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2488     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2489     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2490     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2491     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2492     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2493     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2494 
2495     nsends    = redund->nsends;
2496     nrecvs    = redund->nrecvs;
2497     send_rank = redund->send_rank;
2498     recv_rank = redund->recv_rank;
2499     sbuf_nz   = redund->sbuf_nz;
2500     rbuf_nz   = redund->rbuf_nz;
2501     sbuf_j    = redund->sbuf_j;
2502     sbuf_a    = redund->sbuf_a;
2503     rbuf_j    = redund->rbuf_j;
2504     rbuf_a    = redund->rbuf_a;
2505   }
2506 
2507   if (reuse == MAT_INITIAL_MATRIX){
2508     PetscMPIInt  subrank,subsize;
2509     PetscInt     nleftover,np_subcomm;
2510     /* get the destination processors' id send_rank, nsends and nrecvs */
2511     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2512     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2513     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2514     np_subcomm = size/nsubcomm;
2515     nleftover  = size - nsubcomm*np_subcomm;
2516     nsends = 0; nrecvs = 0;
2517     for (i=0; i<size; i++){ /* i=rank*/
2518       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2519         send_rank[nsends] = i; nsends++;
2520         recv_rank[nrecvs++] = i;
2521       }
2522     }
2523     if (rank >= size - nleftover){/* this proc is a leftover processor */
2524       i = size-nleftover-1;
2525       j = 0;
2526       while (j < nsubcomm - nleftover){
2527         send_rank[nsends++] = i;
2528         i--; j++;
2529       }
2530     }
2531 
2532     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2533       for (i=0; i<nleftover; i++){
2534         recv_rank[nrecvs++] = size-nleftover+i;
2535       }
2536     }
2537 
2538     /* allocate sbuf_j, sbuf_a */
2539     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2540     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2541     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2542   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2543 
2544   /* copy mat's local entries into the buffers */
2545   if (reuse == MAT_INITIAL_MATRIX){
2546     rownz_max = 0;
2547     rptr = sbuf_j;
2548     cols = sbuf_j + rend-rstart + 1;
2549     vals = sbuf_a;
2550     rptr[0] = 0;
2551     for (i=0; i<rend-rstart; i++){
2552       row = i + rstart;
2553       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2554       ncols  = nzA + nzB;
2555       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2556       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2557       /* load the column indices for this row into cols */
2558       lwrite = 0;
2559       for (l=0; l<nzB; l++) {
2560         if ((ctmp = bmap[cworkB[l]]) < cstart){
2561           vals[lwrite]   = aworkB[l];
2562           cols[lwrite++] = ctmp;
2563         }
2564       }
2565       for (l=0; l<nzA; l++){
2566         vals[lwrite]   = aworkA[l];
2567         cols[lwrite++] = cstart + cworkA[l];
2568       }
2569       for (l=0; l<nzB; l++) {
2570         if ((ctmp = bmap[cworkB[l]]) >= cend){
2571           vals[lwrite]   = aworkB[l];
2572           cols[lwrite++] = ctmp;
2573         }
2574       }
2575       vals += ncols;
2576       cols += ncols;
2577       rptr[i+1] = rptr[i] + ncols;
2578       if (rownz_max < ncols) rownz_max = ncols;
2579     }
2580     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2581   } else { /* only copy matrix values into sbuf_a */
2582     rptr = sbuf_j;
2583     vals = sbuf_a;
2584     rptr[0] = 0;
2585     for (i=0; i<rend-rstart; i++){
2586       row = i + rstart;
2587       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2588       ncols  = nzA + nzB;
2589       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2590       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2591       lwrite = 0;
2592       for (l=0; l<nzB; l++) {
2593         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2594       }
2595       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2596       for (l=0; l<nzB; l++) {
2597         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2598       }
2599       vals += ncols;
2600       rptr[i+1] = rptr[i] + ncols;
2601     }
2602   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2603 
2604   /* send nzlocal to others, and recv other's nzlocal */
2605   /*--------------------------------------------------*/
2606   if (reuse == MAT_INITIAL_MATRIX){
2607     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2608     s_waits2 = s_waits3 + nsends;
2609     s_waits1 = s_waits2 + nsends;
2610     r_waits1 = s_waits1 + nsends;
2611     r_waits2 = r_waits1 + nrecvs;
2612     r_waits3 = r_waits2 + nrecvs;
2613   } else {
2614     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2615     r_waits3 = s_waits3 + nsends;
2616   }
2617 
2618   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2619   if (reuse == MAT_INITIAL_MATRIX){
2620     /* get new tags to keep the communication clean */
2621     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2622     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2623     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2624 
2625     /* post receives of other's nzlocal */
2626     for (i=0; i<nrecvs; i++){
2627       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2628     }
2629     /* send nzlocal to others */
2630     for (i=0; i<nsends; i++){
2631       sbuf_nz[i] = nzlocal;
2632       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2633     }
2634     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2635     count = nrecvs;
2636     while (count) {
2637       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2638       recv_rank[imdex] = recv_status.MPI_SOURCE;
2639       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2640       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2641 
2642       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2643       rbuf_nz[imdex] += i + 2;
2644       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2645       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2646       count--;
2647     }
2648     /* wait on sends of nzlocal */
2649     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2650     /* send mat->i,j to others, and recv from other's */
2651     /*------------------------------------------------*/
2652     for (i=0; i<nsends; i++){
2653       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2654       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2655     }
2656     /* wait on receives of mat->i,j */
2657     /*------------------------------*/
2658     count = nrecvs;
2659     while (count) {
2660       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2661       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2662       count--;
2663     }
2664     /* wait on sends of mat->i,j */
2665     /*---------------------------*/
2666     if (nsends) {
2667       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2668     }
2669   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2670 
2671   /* post receives, send and receive mat->a */
2672   /*----------------------------------------*/
2673   for (imdex=0; imdex<nrecvs; imdex++) {
2674     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2675   }
2676   for (i=0; i<nsends; i++){
2677     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2678   }
2679   count = nrecvs;
2680   while (count) {
2681     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2682     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2683     count--;
2684   }
2685   if (nsends) {
2686     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2687   }
2688 
2689   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2690 
2691   /* create redundant matrix */
2692   /*-------------------------*/
2693   if (reuse == MAT_INITIAL_MATRIX){
2694     /* compute rownz_max for preallocation */
2695     for (imdex=0; imdex<nrecvs; imdex++){
2696       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2697       rptr = rbuf_j[imdex];
2698       for (i=0; i<j; i++){
2699         ncols = rptr[i+1] - rptr[i];
2700         if (rownz_max < ncols) rownz_max = ncols;
2701       }
2702     }
2703 
2704     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2705     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2706     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2707     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2708     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2709   } else {
2710     C = *matredundant;
2711   }
2712 
2713   /* insert local matrix entries */
2714   rptr = sbuf_j;
2715   cols = sbuf_j + rend-rstart + 1;
2716   vals = sbuf_a;
2717   for (i=0; i<rend-rstart; i++){
2718     row   = i + rstart;
2719     ncols = rptr[i+1] - rptr[i];
2720     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2721     vals += ncols;
2722     cols += ncols;
2723   }
2724   /* insert received matrix entries */
2725   for (imdex=0; imdex<nrecvs; imdex++){
2726     rstart = rowrange[recv_rank[imdex]];
2727     rend   = rowrange[recv_rank[imdex]+1];
2728     rptr = rbuf_j[imdex];
2729     cols = rbuf_j[imdex] + rend-rstart + 1;
2730     vals = rbuf_a[imdex];
2731     for (i=0; i<rend-rstart; i++){
2732       row   = i + rstart;
2733       ncols = rptr[i+1] - rptr[i];
2734       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2735       vals += ncols;
2736       cols += ncols;
2737     }
2738   }
2739   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2740   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2741   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2742   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2743   if (reuse == MAT_INITIAL_MATRIX) {
2744     PetscContainer container;
2745     *matredundant = C;
2746     /* create a supporting struct and attach it to C for reuse */
2747     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2748     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2749     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2750     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2751     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2752     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2753 
2754     redund->nzlocal = nzlocal;
2755     redund->nsends  = nsends;
2756     redund->nrecvs  = nrecvs;
2757     redund->send_rank = send_rank;
2758     redund->recv_rank = recv_rank;
2759     redund->sbuf_nz = sbuf_nz;
2760     redund->rbuf_nz = rbuf_nz;
2761     redund->sbuf_j  = sbuf_j;
2762     redund->sbuf_a  = sbuf_a;
2763     redund->rbuf_j  = rbuf_j;
2764     redund->rbuf_a  = rbuf_a;
2765 
2766     redund->Destroy = C->ops->destroy;
2767     C->ops->destroy = MatDestroy_MatRedundant;
2768   }
2769   PetscFunctionReturn(0);
2770 }
2771 
2772 #undef __FUNCT__
2773 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2774 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2775 {
2776   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2777   PetscErrorCode ierr;
2778   PetscInt       i,*idxb = 0;
2779   PetscScalar    *va,*vb;
2780   Vec            vtmp;
2781 
2782   PetscFunctionBegin;
2783   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2784   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2785   if (idx) {
2786     for (i=0; i<A->rmap->n; i++) {
2787       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2788     }
2789   }
2790 
2791   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2792   if (idx) {
2793     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2794   }
2795   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2796   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2797 
2798   for (i=0; i<A->rmap->n; i++){
2799     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2800       va[i] = vb[i];
2801       if (idx) idx[i] = a->garray[idxb[i]];
2802     }
2803   }
2804 
2805   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2806   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2807   ierr = PetscFree(idxb);CHKERRQ(ierr);
2808   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2809   PetscFunctionReturn(0);
2810 }
2811 
2812 #undef __FUNCT__
2813 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2814 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2815 {
2816   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2817   PetscErrorCode ierr;
2818   PetscInt       i,*idxb = 0;
2819   PetscScalar    *va,*vb;
2820   Vec            vtmp;
2821 
2822   PetscFunctionBegin;
2823   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2824   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2825   if (idx) {
2826     for (i=0; i<A->cmap->n; i++) {
2827       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2828     }
2829   }
2830 
2831   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2832   if (idx) {
2833     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2834   }
2835   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2836   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2837 
2838   for (i=0; i<A->rmap->n; i++){
2839     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2840       va[i] = vb[i];
2841       if (idx) idx[i] = a->garray[idxb[i]];
2842     }
2843   }
2844 
2845   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2846   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2847   ierr = PetscFree(idxb);CHKERRQ(ierr);
2848   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2849   PetscFunctionReturn(0);
2850 }
2851 
2852 #undef __FUNCT__
2853 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2854 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2855 {
2856   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2857   PetscInt       n      = A->rmap->n;
2858   PetscInt       cstart = A->cmap->rstart;
2859   PetscInt      *cmap   = mat->garray;
2860   PetscInt      *diagIdx, *offdiagIdx;
2861   Vec            diagV, offdiagV;
2862   PetscScalar   *a, *diagA, *offdiagA;
2863   PetscInt       r;
2864   PetscErrorCode ierr;
2865 
2866   PetscFunctionBegin;
2867   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2868   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2869   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2870   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2871   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2872   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2873   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2874   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2875   for(r = 0; r < n; ++r) {
2876     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2877       a[r]   = diagA[r];
2878       idx[r] = cstart + diagIdx[r];
2879     } else {
2880       a[r]   = offdiagA[r];
2881       idx[r] = cmap[offdiagIdx[r]];
2882     }
2883   }
2884   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2885   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2886   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2887   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2888   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2889   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2890   PetscFunctionReturn(0);
2891 }
2892 
2893 #undef __FUNCT__
2894 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2895 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2896 {
2897   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2898   PetscInt       n      = A->rmap->n;
2899   PetscInt       cstart = A->cmap->rstart;
2900   PetscInt      *cmap   = mat->garray;
2901   PetscInt      *diagIdx, *offdiagIdx;
2902   Vec            diagV, offdiagV;
2903   PetscScalar   *a, *diagA, *offdiagA;
2904   PetscInt       r;
2905   PetscErrorCode ierr;
2906 
2907   PetscFunctionBegin;
2908   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2909   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2910   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2911   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2912   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2913   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2914   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2915   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2916   for(r = 0; r < n; ++r) {
2917     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2918       a[r]   = diagA[r];
2919       idx[r] = cstart + diagIdx[r];
2920     } else {
2921       a[r]   = offdiagA[r];
2922       idx[r] = cmap[offdiagIdx[r]];
2923     }
2924   }
2925   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2926   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2927   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2928   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2929   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2930   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2931   PetscFunctionReturn(0);
2932 }
2933 
2934 #undef __FUNCT__
2935 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2936 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2937 {
2938   PetscErrorCode ierr;
2939   Mat            *dummy;
2940 
2941   PetscFunctionBegin;
2942   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2943   *newmat = *dummy;
2944   ierr = PetscFree(dummy);CHKERRQ(ierr);
2945   PetscFunctionReturn(0);
2946 }
2947 
2948 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2949 
2950 #undef __FUNCT__
2951 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2952 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2953 {
2954   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2955   PetscErrorCode ierr;
2956 
2957   PetscFunctionBegin;
2958   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2959   PetscFunctionReturn(0);
2960 }
2961 
2962 
2963 /* -------------------------------------------------------------------*/
2964 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2965        MatGetRow_MPIAIJ,
2966        MatRestoreRow_MPIAIJ,
2967        MatMult_MPIAIJ,
2968 /* 4*/ MatMultAdd_MPIAIJ,
2969        MatMultTranspose_MPIAIJ,
2970        MatMultTransposeAdd_MPIAIJ,
2971 #ifdef PETSC_HAVE_PBGL
2972        MatSolve_MPIAIJ,
2973 #else
2974        0,
2975 #endif
2976        0,
2977        0,
2978 /*10*/ 0,
2979        0,
2980        0,
2981        MatSOR_MPIAIJ,
2982        MatTranspose_MPIAIJ,
2983 /*15*/ MatGetInfo_MPIAIJ,
2984        MatEqual_MPIAIJ,
2985        MatGetDiagonal_MPIAIJ,
2986        MatDiagonalScale_MPIAIJ,
2987        MatNorm_MPIAIJ,
2988 /*20*/ MatAssemblyBegin_MPIAIJ,
2989        MatAssemblyEnd_MPIAIJ,
2990        MatSetOption_MPIAIJ,
2991        MatZeroEntries_MPIAIJ,
2992 /*24*/ MatZeroRows_MPIAIJ,
2993        0,
2994 #ifdef PETSC_HAVE_PBGL
2995        0,
2996 #else
2997        0,
2998 #endif
2999        0,
3000        0,
3001 /*29*/ MatSetUpPreallocation_MPIAIJ,
3002 #ifdef PETSC_HAVE_PBGL
3003        0,
3004 #else
3005        0,
3006 #endif
3007        0,
3008        0,
3009        0,
3010 /*34*/ MatDuplicate_MPIAIJ,
3011        0,
3012        0,
3013        0,
3014        0,
3015 /*39*/ MatAXPY_MPIAIJ,
3016        MatGetSubMatrices_MPIAIJ,
3017        MatIncreaseOverlap_MPIAIJ,
3018        MatGetValues_MPIAIJ,
3019        MatCopy_MPIAIJ,
3020 /*44*/ MatGetRowMax_MPIAIJ,
3021        MatScale_MPIAIJ,
3022        0,
3023        0,
3024        MatZeroRowsColumns_MPIAIJ,
3025 /*49*/ MatSetBlockSize_MPIAIJ,
3026        0,
3027        0,
3028        0,
3029        0,
3030 /*54*/ MatFDColoringCreate_MPIAIJ,
3031        0,
3032        MatSetUnfactored_MPIAIJ,
3033        MatPermute_MPIAIJ,
3034        0,
3035 /*59*/ MatGetSubMatrix_MPIAIJ,
3036        MatDestroy_MPIAIJ,
3037        MatView_MPIAIJ,
3038        0,
3039        0,
3040 /*64*/ 0,
3041        0,
3042        0,
3043        0,
3044        0,
3045 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3046        MatGetRowMinAbs_MPIAIJ,
3047        0,
3048        MatSetColoring_MPIAIJ,
3049 #if defined(PETSC_HAVE_ADIC)
3050        MatSetValuesAdic_MPIAIJ,
3051 #else
3052        0,
3053 #endif
3054        MatSetValuesAdifor_MPIAIJ,
3055 /*75*/ MatFDColoringApply_AIJ,
3056        0,
3057        0,
3058        0,
3059        0,
3060 /*80*/ 0,
3061        0,
3062        0,
3063 /*83*/ MatLoad_MPIAIJ,
3064        0,
3065        0,
3066        0,
3067        0,
3068        0,
3069 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3070        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3071        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3072        MatPtAP_Basic,
3073        MatPtAPSymbolic_MPIAIJ,
3074 /*94*/ MatPtAPNumeric_MPIAIJ,
3075        0,
3076        0,
3077        0,
3078        0,
3079 /*99*/ 0,
3080        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3081        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3082        MatConjugate_MPIAIJ,
3083        0,
3084 /*104*/MatSetValuesRow_MPIAIJ,
3085        MatRealPart_MPIAIJ,
3086        MatImaginaryPart_MPIAIJ,
3087        0,
3088        0,
3089 /*109*/0,
3090        MatGetRedundantMatrix_MPIAIJ,
3091        MatGetRowMin_MPIAIJ,
3092        0,
3093        0,
3094 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3095        0,
3096        0,
3097        0,
3098        0,
3099 /*119*/0,
3100        0,
3101        0,
3102        0,
3103        MatGetMultiProcBlock_MPIAIJ,
3104 /*124*/MatFindNonZeroRows_MPIAIJ,
3105        MatGetColumnNorms_MPIAIJ,
3106        MatInvertBlockDiagonal_MPIAIJ,
3107        0,
3108        MatGetSubMatricesParallel_MPIAIJ,
3109 /*129*/0,
3110        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3111        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3112        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3113        0,
3114 /*134*/0,
3115        0,
3116        0,
3117        0,
3118        0
3119 };
3120 
3121 /* ----------------------------------------------------------------------------------------*/
3122 
3123 EXTERN_C_BEGIN
3124 #undef __FUNCT__
3125 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3126 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3127 {
3128   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3129   PetscErrorCode ierr;
3130 
3131   PetscFunctionBegin;
3132   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3133   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3134   PetscFunctionReturn(0);
3135 }
3136 EXTERN_C_END
3137 
3138 EXTERN_C_BEGIN
3139 #undef __FUNCT__
3140 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3141 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3142 {
3143   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3144   PetscErrorCode ierr;
3145 
3146   PetscFunctionBegin;
3147   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3148   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3149   PetscFunctionReturn(0);
3150 }
3151 EXTERN_C_END
3152 
3153 EXTERN_C_BEGIN
3154 #undef __FUNCT__
3155 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3156 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3157 {
3158   Mat_MPIAIJ     *b;
3159   PetscErrorCode ierr;
3160   PetscInt       i;
3161 
3162   PetscFunctionBegin;
3163   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3164   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3165   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3166   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3167 
3168   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3169   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3170   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3171   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3172   if (d_nnz) {
3173     for (i=0; i<B->rmap->n; i++) {
3174       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3175     }
3176   }
3177   if (o_nnz) {
3178     for (i=0; i<B->rmap->n; i++) {
3179       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3180     }
3181   }
3182   b = (Mat_MPIAIJ*)B->data;
3183 
3184   if (!B->preallocated) {
3185     /* Explicitly create 2 MATSEQAIJ matrices. */
3186     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3187     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3188     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3189     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3190     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3191     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3192     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3193     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3194   }
3195 
3196   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3197   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3198   B->preallocated = PETSC_TRUE;
3199   PetscFunctionReturn(0);
3200 }
3201 EXTERN_C_END
3202 
3203 #undef __FUNCT__
3204 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3205 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3206 {
3207   Mat            mat;
3208   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3209   PetscErrorCode ierr;
3210 
3211   PetscFunctionBegin;
3212   *newmat       = 0;
3213   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3214   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3215   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3216   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3217   a    = (Mat_MPIAIJ*)mat->data;
3218 
3219   mat->factortype    = matin->factortype;
3220   mat->rmap->bs      = matin->rmap->bs;
3221   mat->assembled    = PETSC_TRUE;
3222   mat->insertmode   = NOT_SET_VALUES;
3223   mat->preallocated = PETSC_TRUE;
3224 
3225   a->size           = oldmat->size;
3226   a->rank           = oldmat->rank;
3227   a->donotstash     = oldmat->donotstash;
3228   a->roworiented    = oldmat->roworiented;
3229   a->rowindices     = 0;
3230   a->rowvalues      = 0;
3231   a->getrowactive   = PETSC_FALSE;
3232 
3233   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3234   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3235 
3236   if (oldmat->colmap) {
3237 #if defined (PETSC_USE_CTABLE)
3238     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3239 #else
3240     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3241     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3242     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3243 #endif
3244   } else a->colmap = 0;
3245   if (oldmat->garray) {
3246     PetscInt len;
3247     len  = oldmat->B->cmap->n;
3248     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3249     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3250     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3251   } else a->garray = 0;
3252 
3253   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3254   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3255   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3256   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3257   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3258   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3259   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3260   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3261   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3262   *newmat = mat;
3263   PetscFunctionReturn(0);
3264 }
3265 
3266 
3267 
3268 #undef __FUNCT__
3269 #define __FUNCT__ "MatLoad_MPIAIJ"
3270 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3271 {
3272   PetscScalar    *vals,*svals;
3273   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3274   PetscErrorCode ierr;
3275   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3276   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3277   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3278   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3279   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3280   int            fd;
3281 
3282   PetscFunctionBegin;
3283   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3284   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3285   if (!rank) {
3286     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3287     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3288     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3289   }
3290 
3291   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3292 
3293   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3294   M = header[1]; N = header[2];
3295   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3296   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3297   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3298 
3299   /* If global sizes are set, check if they are consistent with that given in the file */
3300   if (sizesset) {
3301     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3302   }
3303   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3304   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3305 
3306   /* determine ownership of all rows */
3307   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3308   else m = newMat->rmap->n; /* Set by user */
3309 
3310   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3311   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3312 
3313   /* First process needs enough room for process with most rows */
3314   if (!rank) {
3315     mmax       = rowners[1];
3316     for (i=2; i<size; i++) {
3317       mmax = PetscMax(mmax,rowners[i]);
3318     }
3319   } else mmax = m;
3320 
3321   rowners[0] = 0;
3322   for (i=2; i<=size; i++) {
3323     rowners[i] += rowners[i-1];
3324   }
3325   rstart = rowners[rank];
3326   rend   = rowners[rank+1];
3327 
3328   /* distribute row lengths to all processors */
3329   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3330   if (!rank) {
3331     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3332     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3333     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3334     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3335     for (j=0; j<m; j++) {
3336       procsnz[0] += ourlens[j];
3337     }
3338     for (i=1; i<size; i++) {
3339       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3340       /* calculate the number of nonzeros on each processor */
3341       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3342         procsnz[i] += rowlengths[j];
3343       }
3344       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3345     }
3346     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3347   } else {
3348     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3349   }
3350 
3351   if (!rank) {
3352     /* determine max buffer needed and allocate it */
3353     maxnz = 0;
3354     for (i=0; i<size; i++) {
3355       maxnz = PetscMax(maxnz,procsnz[i]);
3356     }
3357     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3358 
3359     /* read in my part of the matrix column indices  */
3360     nz   = procsnz[0];
3361     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3362     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3363 
3364     /* read in every one elses and ship off */
3365     for (i=1; i<size; i++) {
3366       nz     = procsnz[i];
3367       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3368       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3369     }
3370     ierr = PetscFree(cols);CHKERRQ(ierr);
3371   } else {
3372     /* determine buffer space needed for message */
3373     nz = 0;
3374     for (i=0; i<m; i++) {
3375       nz += ourlens[i];
3376     }
3377     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3378 
3379     /* receive message of column indices*/
3380     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3381   }
3382 
3383   /* determine column ownership if matrix is not square */
3384   if (N != M) {
3385     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3386     else n = newMat->cmap->n;
3387     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3388     cstart = cend - n;
3389   } else {
3390     cstart = rstart;
3391     cend   = rend;
3392     n      = cend - cstart;
3393   }
3394 
3395   /* loop over local rows, determining number of off diagonal entries */
3396   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3397   jj = 0;
3398   for (i=0; i<m; i++) {
3399     for (j=0; j<ourlens[i]; j++) {
3400       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3401       jj++;
3402     }
3403   }
3404 
3405   for (i=0; i<m; i++) {
3406     ourlens[i] -= offlens[i];
3407   }
3408   if (!sizesset) {
3409     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3410   }
3411   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3412 
3413   for (i=0; i<m; i++) {
3414     ourlens[i] += offlens[i];
3415   }
3416 
3417   if (!rank) {
3418     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3419 
3420     /* read in my part of the matrix numerical values  */
3421     nz   = procsnz[0];
3422     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3423 
3424     /* insert into matrix */
3425     jj      = rstart;
3426     smycols = mycols;
3427     svals   = vals;
3428     for (i=0; i<m; i++) {
3429       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3430       smycols += ourlens[i];
3431       svals   += ourlens[i];
3432       jj++;
3433     }
3434 
3435     /* read in other processors and ship out */
3436     for (i=1; i<size; i++) {
3437       nz     = procsnz[i];
3438       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3439       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3440     }
3441     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3442   } else {
3443     /* receive numeric values */
3444     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3445 
3446     /* receive message of values*/
3447     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3448 
3449     /* insert into matrix */
3450     jj      = rstart;
3451     smycols = mycols;
3452     svals   = vals;
3453     for (i=0; i<m; i++) {
3454       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3455       smycols += ourlens[i];
3456       svals   += ourlens[i];
3457       jj++;
3458     }
3459   }
3460   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3461   ierr = PetscFree(vals);CHKERRQ(ierr);
3462   ierr = PetscFree(mycols);CHKERRQ(ierr);
3463   ierr = PetscFree(rowners);CHKERRQ(ierr);
3464 
3465   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3466   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3467   PetscFunctionReturn(0);
3468 }
3469 
3470 #undef __FUNCT__
3471 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3472 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3473 {
3474   PetscErrorCode ierr;
3475   IS             iscol_local;
3476   PetscInt       csize;
3477 
3478   PetscFunctionBegin;
3479   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3480   if (call == MAT_REUSE_MATRIX) {
3481     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3482     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3483   } else {
3484     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3485   }
3486   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3487   if (call == MAT_INITIAL_MATRIX) {
3488     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3489     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3490   }
3491   PetscFunctionReturn(0);
3492 }
3493 
3494 #undef __FUNCT__
3495 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3496 /*
3497     Not great since it makes two copies of the submatrix, first an SeqAIJ
3498   in local and then by concatenating the local matrices the end result.
3499   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3500 
3501   Note: This requires a sequential iscol with all indices.
3502 */
3503 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3504 {
3505   PetscErrorCode ierr;
3506   PetscMPIInt    rank,size;
3507   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3508   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3509   Mat            *local,M,Mreuse;
3510   MatScalar      *vwork,*aa;
3511   MPI_Comm       comm = ((PetscObject)mat)->comm;
3512   Mat_SeqAIJ     *aij;
3513 
3514 
3515   PetscFunctionBegin;
3516   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3517   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3518 
3519   if (call ==  MAT_REUSE_MATRIX) {
3520     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3521     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3522     local = &Mreuse;
3523     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3524   } else {
3525     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3526     Mreuse = *local;
3527     ierr   = PetscFree(local);CHKERRQ(ierr);
3528   }
3529 
3530   /*
3531       m - number of local rows
3532       n - number of columns (same on all processors)
3533       rstart - first row in new global matrix generated
3534   */
3535   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3536   if (call == MAT_INITIAL_MATRIX) {
3537     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3538     ii  = aij->i;
3539     jj  = aij->j;
3540 
3541     /*
3542         Determine the number of non-zeros in the diagonal and off-diagonal
3543         portions of the matrix in order to do correct preallocation
3544     */
3545 
3546     /* first get start and end of "diagonal" columns */
3547     if (csize == PETSC_DECIDE) {
3548       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3549       if (mglobal == n) { /* square matrix */
3550 	nlocal = m;
3551       } else {
3552         nlocal = n/size + ((n % size) > rank);
3553       }
3554     } else {
3555       nlocal = csize;
3556     }
3557     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3558     rstart = rend - nlocal;
3559     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3560 
3561     /* next, compute all the lengths */
3562     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3563     olens = dlens + m;
3564     for (i=0; i<m; i++) {
3565       jend = ii[i+1] - ii[i];
3566       olen = 0;
3567       dlen = 0;
3568       for (j=0; j<jend; j++) {
3569         if (*jj < rstart || *jj >= rend) olen++;
3570         else dlen++;
3571         jj++;
3572       }
3573       olens[i] = olen;
3574       dlens[i] = dlen;
3575     }
3576     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3577     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3578     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3579     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3580     ierr = PetscFree(dlens);CHKERRQ(ierr);
3581   } else {
3582     PetscInt ml,nl;
3583 
3584     M = *newmat;
3585     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3586     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3587     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3588     /*
3589          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3590        rather than the slower MatSetValues().
3591     */
3592     M->was_assembled = PETSC_TRUE;
3593     M->assembled     = PETSC_FALSE;
3594   }
3595   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3596   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3597   ii  = aij->i;
3598   jj  = aij->j;
3599   aa  = aij->a;
3600   for (i=0; i<m; i++) {
3601     row   = rstart + i;
3602     nz    = ii[i+1] - ii[i];
3603     cwork = jj;     jj += nz;
3604     vwork = aa;     aa += nz;
3605     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3606   }
3607 
3608   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3609   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3610   *newmat = M;
3611 
3612   /* save submatrix used in processor for next request */
3613   if (call ==  MAT_INITIAL_MATRIX) {
3614     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3615     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3616   }
3617 
3618   PetscFunctionReturn(0);
3619 }
3620 
3621 EXTERN_C_BEGIN
3622 #undef __FUNCT__
3623 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3624 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3625 {
3626   PetscInt       m,cstart, cend,j,nnz,i,d;
3627   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3628   const PetscInt *JJ;
3629   PetscScalar    *values;
3630   PetscErrorCode ierr;
3631 
3632   PetscFunctionBegin;
3633   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3634 
3635   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3636   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3637   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3638   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3639   m      = B->rmap->n;
3640   cstart = B->cmap->rstart;
3641   cend   = B->cmap->rend;
3642   rstart = B->rmap->rstart;
3643 
3644   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3645 
3646 #if defined(PETSC_USE_DEBUGGING)
3647   for (i=0; i<m; i++) {
3648     nnz     = Ii[i+1]- Ii[i];
3649     JJ      = J + Ii[i];
3650     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3651     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3652     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3653   }
3654 #endif
3655 
3656   for (i=0; i<m; i++) {
3657     nnz     = Ii[i+1]- Ii[i];
3658     JJ      = J + Ii[i];
3659     nnz_max = PetscMax(nnz_max,nnz);
3660     d       = 0;
3661     for (j=0; j<nnz; j++) {
3662       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3663     }
3664     d_nnz[i] = d;
3665     o_nnz[i] = nnz - d;
3666   }
3667   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3668   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3669 
3670   if (v) values = (PetscScalar*)v;
3671   else {
3672     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3673     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3674   }
3675 
3676   for (i=0; i<m; i++) {
3677     ii   = i + rstart;
3678     nnz  = Ii[i+1]- Ii[i];
3679     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3680   }
3681   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3682   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3683 
3684   if (!v) {
3685     ierr = PetscFree(values);CHKERRQ(ierr);
3686   }
3687   PetscFunctionReturn(0);
3688 }
3689 EXTERN_C_END
3690 
3691 #undef __FUNCT__
3692 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3693 /*@
3694    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3695    (the default parallel PETSc format).
3696 
3697    Collective on MPI_Comm
3698 
3699    Input Parameters:
3700 +  B - the matrix
3701 .  i - the indices into j for the start of each local row (starts with zero)
3702 .  j - the column indices for each local row (starts with zero)
3703 -  v - optional values in the matrix
3704 
3705    Level: developer
3706 
3707    Notes:
3708        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3709      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3710      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3711 
3712        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3713 
3714        The format which is used for the sparse matrix input, is equivalent to a
3715     row-major ordering.. i.e for the following matrix, the input data expected is
3716     as shown:
3717 
3718         1 0 0
3719         2 0 3     P0
3720        -------
3721         4 5 6     P1
3722 
3723      Process0 [P0]: rows_owned=[0,1]
3724         i =  {0,1,3}  [size = nrow+1  = 2+1]
3725         j =  {0,0,2}  [size = nz = 6]
3726         v =  {1,2,3}  [size = nz = 6]
3727 
3728      Process1 [P1]: rows_owned=[2]
3729         i =  {0,3}    [size = nrow+1  = 1+1]
3730         j =  {0,1,2}  [size = nz = 6]
3731         v =  {4,5,6}  [size = nz = 6]
3732 
3733 .keywords: matrix, aij, compressed row, sparse, parallel
3734 
3735 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3736           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3737 @*/
3738 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3739 {
3740   PetscErrorCode ierr;
3741 
3742   PetscFunctionBegin;
3743   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3744   PetscFunctionReturn(0);
3745 }
3746 
3747 #undef __FUNCT__
3748 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3749 /*@C
3750    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3751    (the default parallel PETSc format).  For good matrix assembly performance
3752    the user should preallocate the matrix storage by setting the parameters
3753    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3754    performance can be increased by more than a factor of 50.
3755 
3756    Collective on MPI_Comm
3757 
3758    Input Parameters:
3759 +  A - the matrix
3760 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3761            (same value is used for all local rows)
3762 .  d_nnz - array containing the number of nonzeros in the various rows of the
3763            DIAGONAL portion of the local submatrix (possibly different for each row)
3764            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3765            The size of this array is equal to the number of local rows, i.e 'm'.
3766            You must leave room for the diagonal entry even if it is zero.
3767 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3768            submatrix (same value is used for all local rows).
3769 -  o_nnz - array containing the number of nonzeros in the various rows of the
3770            OFF-DIAGONAL portion of the local submatrix (possibly different for
3771            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3772            structure. The size of this array is equal to the number
3773            of local rows, i.e 'm'.
3774 
3775    If the *_nnz parameter is given then the *_nz parameter is ignored
3776 
3777    The AIJ format (also called the Yale sparse matrix format or
3778    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3779    storage.  The stored row and column indices begin with zero.
3780    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3781 
3782    The parallel matrix is partitioned such that the first m0 rows belong to
3783    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3784    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3785 
3786    The DIAGONAL portion of the local submatrix of a processor can be defined
3787    as the submatrix which is obtained by extraction the part corresponding to
3788    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3789    first row that belongs to the processor, r2 is the last row belonging to
3790    the this processor, and c1-c2 is range of indices of the local part of a
3791    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3792    common case of a square matrix, the row and column ranges are the same and
3793    the DIAGONAL part is also square. The remaining portion of the local
3794    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3795 
3796    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3797 
3798    You can call MatGetInfo() to get information on how effective the preallocation was;
3799    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3800    You can also run with the option -info and look for messages with the string
3801    malloc in them to see if additional memory allocation was needed.
3802 
3803    Example usage:
3804 
3805    Consider the following 8x8 matrix with 34 non-zero values, that is
3806    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3807    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3808    as follows:
3809 
3810 .vb
3811             1  2  0  |  0  3  0  |  0  4
3812     Proc0   0  5  6  |  7  0  0  |  8  0
3813             9  0 10  | 11  0  0  | 12  0
3814     -------------------------------------
3815            13  0 14  | 15 16 17  |  0  0
3816     Proc1   0 18  0  | 19 20 21  |  0  0
3817             0  0  0  | 22 23  0  | 24  0
3818     -------------------------------------
3819     Proc2  25 26 27  |  0  0 28  | 29  0
3820            30  0  0  | 31 32 33  |  0 34
3821 .ve
3822 
3823    This can be represented as a collection of submatrices as:
3824 
3825 .vb
3826       A B C
3827       D E F
3828       G H I
3829 .ve
3830 
3831    Where the submatrices A,B,C are owned by proc0, D,E,F are
3832    owned by proc1, G,H,I are owned by proc2.
3833 
3834    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3835    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3836    The 'M','N' parameters are 8,8, and have the same values on all procs.
3837 
3838    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3839    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3840    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3841    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3842    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3843    matrix, ans [DF] as another SeqAIJ matrix.
3844 
3845    When d_nz, o_nz parameters are specified, d_nz storage elements are
3846    allocated for every row of the local diagonal submatrix, and o_nz
3847    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3848    One way to choose d_nz and o_nz is to use the max nonzerors per local
3849    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3850    In this case, the values of d_nz,o_nz are:
3851 .vb
3852      proc0 : dnz = 2, o_nz = 2
3853      proc1 : dnz = 3, o_nz = 2
3854      proc2 : dnz = 1, o_nz = 4
3855 .ve
3856    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3857    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3858    for proc3. i.e we are using 12+15+10=37 storage locations to store
3859    34 values.
3860 
3861    When d_nnz, o_nnz parameters are specified, the storage is specified
3862    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3863    In the above case the values for d_nnz,o_nnz are:
3864 .vb
3865      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3866      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3867      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3868 .ve
3869    Here the space allocated is sum of all the above values i.e 34, and
3870    hence pre-allocation is perfect.
3871 
3872    Level: intermediate
3873 
3874 .keywords: matrix, aij, compressed row, sparse, parallel
3875 
3876 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3877           MPIAIJ, MatGetInfo()
3878 @*/
3879 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3880 {
3881   PetscErrorCode ierr;
3882 
3883   PetscFunctionBegin;
3884   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3885   PetscValidType(B,1);
3886   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3887   PetscFunctionReturn(0);
3888 }
3889 
3890 #undef __FUNCT__
3891 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3892 /*@
3893      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3894          CSR format the local rows.
3895 
3896    Collective on MPI_Comm
3897 
3898    Input Parameters:
3899 +  comm - MPI communicator
3900 .  m - number of local rows (Cannot be PETSC_DECIDE)
3901 .  n - This value should be the same as the local size used in creating the
3902        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3903        calculated if N is given) For square matrices n is almost always m.
3904 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3905 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3906 .   i - row indices
3907 .   j - column indices
3908 -   a - matrix values
3909 
3910    Output Parameter:
3911 .   mat - the matrix
3912 
3913    Level: intermediate
3914 
3915    Notes:
3916        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3917      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3918      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3919 
3920        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3921 
3922        The format which is used for the sparse matrix input, is equivalent to a
3923     row-major ordering.. i.e for the following matrix, the input data expected is
3924     as shown:
3925 
3926         1 0 0
3927         2 0 3     P0
3928        -------
3929         4 5 6     P1
3930 
3931      Process0 [P0]: rows_owned=[0,1]
3932         i =  {0,1,3}  [size = nrow+1  = 2+1]
3933         j =  {0,0,2}  [size = nz = 6]
3934         v =  {1,2,3}  [size = nz = 6]
3935 
3936      Process1 [P1]: rows_owned=[2]
3937         i =  {0,3}    [size = nrow+1  = 1+1]
3938         j =  {0,1,2}  [size = nz = 6]
3939         v =  {4,5,6}  [size = nz = 6]
3940 
3941 .keywords: matrix, aij, compressed row, sparse, parallel
3942 
3943 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3944           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3945 @*/
3946 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3947 {
3948   PetscErrorCode ierr;
3949 
3950  PetscFunctionBegin;
3951   if (i[0]) {
3952     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3953   }
3954   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3955   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3956   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3957   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3958   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3959   PetscFunctionReturn(0);
3960 }
3961 
3962 #undef __FUNCT__
3963 #define __FUNCT__ "MatCreateMPIAIJ"
3964 /*@C
3965    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3966    (the default parallel PETSc format).  For good matrix assembly performance
3967    the user should preallocate the matrix storage by setting the parameters
3968    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3969    performance can be increased by more than a factor of 50.
3970 
3971    Collective on MPI_Comm
3972 
3973    Input Parameters:
3974 +  comm - MPI communicator
3975 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3976            This value should be the same as the local size used in creating the
3977            y vector for the matrix-vector product y = Ax.
3978 .  n - This value should be the same as the local size used in creating the
3979        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3980        calculated if N is given) For square matrices n is almost always m.
3981 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3982 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3983 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3984            (same value is used for all local rows)
3985 .  d_nnz - array containing the number of nonzeros in the various rows of the
3986            DIAGONAL portion of the local submatrix (possibly different for each row)
3987            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3988            The size of this array is equal to the number of local rows, i.e 'm'.
3989            You must leave room for the diagonal entry even if it is zero.
3990 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3991            submatrix (same value is used for all local rows).
3992 -  o_nnz - array containing the number of nonzeros in the various rows of the
3993            OFF-DIAGONAL portion of the local submatrix (possibly different for
3994            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3995            structure. The size of this array is equal to the number
3996            of local rows, i.e 'm'.
3997 
3998    Output Parameter:
3999 .  A - the matrix
4000 
4001    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4002    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4003    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4004 
4005    Notes:
4006    If the *_nnz parameter is given then the *_nz parameter is ignored
4007 
4008    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4009    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4010    storage requirements for this matrix.
4011 
4012    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4013    processor than it must be used on all processors that share the object for
4014    that argument.
4015 
4016    The user MUST specify either the local or global matrix dimensions
4017    (possibly both).
4018 
4019    The parallel matrix is partitioned across processors such that the
4020    first m0 rows belong to process 0, the next m1 rows belong to
4021    process 1, the next m2 rows belong to process 2 etc.. where
4022    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4023    values corresponding to [m x N] submatrix.
4024 
4025    The columns are logically partitioned with the n0 columns belonging
4026    to 0th partition, the next n1 columns belonging to the next
4027    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4028 
4029    The DIAGONAL portion of the local submatrix on any given processor
4030    is the submatrix corresponding to the rows and columns m,n
4031    corresponding to the given processor. i.e diagonal matrix on
4032    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4033    etc. The remaining portion of the local submatrix [m x (N-n)]
4034    constitute the OFF-DIAGONAL portion. The example below better
4035    illustrates this concept.
4036 
4037    For a square global matrix we define each processor's diagonal portion
4038    to be its local rows and the corresponding columns (a square submatrix);
4039    each processor's off-diagonal portion encompasses the remainder of the
4040    local matrix (a rectangular submatrix).
4041 
4042    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4043 
4044    When calling this routine with a single process communicator, a matrix of
4045    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4046    type of communicator, use the construction mechanism:
4047      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4048 
4049    By default, this format uses inodes (identical nodes) when possible.
4050    We search for consecutive rows with the same nonzero structure, thereby
4051    reusing matrix information to achieve increased efficiency.
4052 
4053    Options Database Keys:
4054 +  -mat_no_inode  - Do not use inodes
4055 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4056 -  -mat_aij_oneindex - Internally use indexing starting at 1
4057         rather than 0.  Note that when calling MatSetValues(),
4058         the user still MUST index entries starting at 0!
4059 
4060 
4061    Example usage:
4062 
4063    Consider the following 8x8 matrix with 34 non-zero values, that is
4064    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4065    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4066    as follows:
4067 
4068 .vb
4069             1  2  0  |  0  3  0  |  0  4
4070     Proc0   0  5  6  |  7  0  0  |  8  0
4071             9  0 10  | 11  0  0  | 12  0
4072     -------------------------------------
4073            13  0 14  | 15 16 17  |  0  0
4074     Proc1   0 18  0  | 19 20 21  |  0  0
4075             0  0  0  | 22 23  0  | 24  0
4076     -------------------------------------
4077     Proc2  25 26 27  |  0  0 28  | 29  0
4078            30  0  0  | 31 32 33  |  0 34
4079 .ve
4080 
4081    This can be represented as a collection of submatrices as:
4082 
4083 .vb
4084       A B C
4085       D E F
4086       G H I
4087 .ve
4088 
4089    Where the submatrices A,B,C are owned by proc0, D,E,F are
4090    owned by proc1, G,H,I are owned by proc2.
4091 
4092    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4093    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4094    The 'M','N' parameters are 8,8, and have the same values on all procs.
4095 
4096    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4097    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4098    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4099    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4100    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4101    matrix, ans [DF] as another SeqAIJ matrix.
4102 
4103    When d_nz, o_nz parameters are specified, d_nz storage elements are
4104    allocated for every row of the local diagonal submatrix, and o_nz
4105    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4106    One way to choose d_nz and o_nz is to use the max nonzerors per local
4107    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4108    In this case, the values of d_nz,o_nz are:
4109 .vb
4110      proc0 : dnz = 2, o_nz = 2
4111      proc1 : dnz = 3, o_nz = 2
4112      proc2 : dnz = 1, o_nz = 4
4113 .ve
4114    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4115    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4116    for proc3. i.e we are using 12+15+10=37 storage locations to store
4117    34 values.
4118 
4119    When d_nnz, o_nnz parameters are specified, the storage is specified
4120    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4121    In the above case the values for d_nnz,o_nnz are:
4122 .vb
4123      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4124      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4125      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4126 .ve
4127    Here the space allocated is sum of all the above values i.e 34, and
4128    hence pre-allocation is perfect.
4129 
4130    Level: intermediate
4131 
4132 .keywords: matrix, aij, compressed row, sparse, parallel
4133 
4134 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4135           MPIAIJ, MatCreateMPIAIJWithArrays()
4136 @*/
4137 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4138 {
4139   PetscErrorCode ierr;
4140   PetscMPIInt    size;
4141 
4142   PetscFunctionBegin;
4143   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4144   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4145   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4146   if (size > 1) {
4147     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4148     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4149   } else {
4150     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4151     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4152   }
4153   PetscFunctionReturn(0);
4154 }
4155 
4156 #undef __FUNCT__
4157 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4158 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4159 {
4160   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4161 
4162   PetscFunctionBegin;
4163   *Ad     = a->A;
4164   *Ao     = a->B;
4165   *colmap = a->garray;
4166   PetscFunctionReturn(0);
4167 }
4168 
4169 #undef __FUNCT__
4170 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4171 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4172 {
4173   PetscErrorCode ierr;
4174   PetscInt       i;
4175   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4176 
4177   PetscFunctionBegin;
4178   if (coloring->ctype == IS_COLORING_GLOBAL) {
4179     ISColoringValue *allcolors,*colors;
4180     ISColoring      ocoloring;
4181 
4182     /* set coloring for diagonal portion */
4183     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4184 
4185     /* set coloring for off-diagonal portion */
4186     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4187     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4188     for (i=0; i<a->B->cmap->n; i++) {
4189       colors[i] = allcolors[a->garray[i]];
4190     }
4191     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4192     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4193     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4194     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4195   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4196     ISColoringValue *colors;
4197     PetscInt        *larray;
4198     ISColoring      ocoloring;
4199 
4200     /* set coloring for diagonal portion */
4201     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4202     for (i=0; i<a->A->cmap->n; i++) {
4203       larray[i] = i + A->cmap->rstart;
4204     }
4205     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4206     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4207     for (i=0; i<a->A->cmap->n; i++) {
4208       colors[i] = coloring->colors[larray[i]];
4209     }
4210     ierr = PetscFree(larray);CHKERRQ(ierr);
4211     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4212     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4213     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4214 
4215     /* set coloring for off-diagonal portion */
4216     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4217     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4218     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4219     for (i=0; i<a->B->cmap->n; i++) {
4220       colors[i] = coloring->colors[larray[i]];
4221     }
4222     ierr = PetscFree(larray);CHKERRQ(ierr);
4223     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4224     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4225     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4226   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4227 
4228   PetscFunctionReturn(0);
4229 }
4230 
4231 #if defined(PETSC_HAVE_ADIC)
4232 #undef __FUNCT__
4233 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4234 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4235 {
4236   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4237   PetscErrorCode ierr;
4238 
4239   PetscFunctionBegin;
4240   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4241   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4242   PetscFunctionReturn(0);
4243 }
4244 #endif
4245 
4246 #undef __FUNCT__
4247 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4248 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4249 {
4250   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4251   PetscErrorCode ierr;
4252 
4253   PetscFunctionBegin;
4254   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4255   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4256   PetscFunctionReturn(0);
4257 }
4258 
4259 #undef __FUNCT__
4260 #define __FUNCT__ "MatMergeSymbolic"
4261 PetscErrorCode  MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4262 {
4263   PetscErrorCode ierr;
4264   PetscInt       m,N,i,rstart,nnz,*dnz,*onz;
4265   PetscInt       *indx;
4266 
4267   PetscFunctionBegin;
4268   /* This routine will ONLY return MPIAIJ type matrix */
4269   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4270   if (n == PETSC_DECIDE){
4271     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4272   }
4273   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4274   rstart -= m;
4275 
4276   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4277   for (i=0;i<m;i++) {
4278     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4279     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4280     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4281   }
4282 
4283   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4284   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4285   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4286   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4287   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4288   PetscFunctionReturn(0);
4289 }
4290 
4291 #undef __FUNCT__
4292 #define __FUNCT__ "MatMergeNumeric"
4293 PetscErrorCode  MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4294 {
4295   PetscErrorCode ierr;
4296   PetscInt       m,N,i,rstart,nnz,Ii;
4297   PetscInt       *indx;
4298   PetscScalar    *values;
4299 
4300   PetscFunctionBegin;
4301   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4302   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4303   for (i=0;i<m;i++) {
4304     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4305     Ii    = i + rstart;
4306     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4307     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4308   }
4309   ierr = MatDestroy(&inmat);CHKERRQ(ierr);
4310   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4311   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4312   PetscFunctionReturn(0);
4313 }
4314 
4315 #undef __FUNCT__
4316 #define __FUNCT__ "MatMerge"
4317 /*@
4318       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4319                  matrices from each processor
4320 
4321     Collective on MPI_Comm
4322 
4323    Input Parameters:
4324 +    comm - the communicators the parallel matrix will live on
4325 .    inmat - the input sequential matrices
4326 .    n - number of local columns (or PETSC_DECIDE)
4327 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4328 
4329    Output Parameter:
4330 .    outmat - the parallel matrix generated
4331 
4332     Level: advanced
4333 
4334    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4335 
4336 @*/
4337 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4338 {
4339   PetscErrorCode ierr;
4340 
4341   PetscFunctionBegin;
4342   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4343   if (scall == MAT_INITIAL_MATRIX){
4344     ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4345   }
4346   ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4347   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4348   PetscFunctionReturn(0);
4349 }
4350 
4351 #undef __FUNCT__
4352 #define __FUNCT__ "MatFileSplit"
4353 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4354 {
4355   PetscErrorCode    ierr;
4356   PetscMPIInt       rank;
4357   PetscInt          m,N,i,rstart,nnz;
4358   size_t            len;
4359   const PetscInt    *indx;
4360   PetscViewer       out;
4361   char              *name;
4362   Mat               B;
4363   const PetscScalar *values;
4364 
4365   PetscFunctionBegin;
4366   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4367   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4368   /* Should this be the type of the diagonal block of A? */
4369   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4370   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4371   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4372   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4373   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4374   for (i=0;i<m;i++) {
4375     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4376     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4377     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4378   }
4379   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4380   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4381 
4382   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4383   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4384   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4385   sprintf(name,"%s.%d",outfile,rank);
4386   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4387   ierr = PetscFree(name);
4388   ierr = MatView(B,out);CHKERRQ(ierr);
4389   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4390   ierr = MatDestroy(&B);CHKERRQ(ierr);
4391   PetscFunctionReturn(0);
4392 }
4393 
4394 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4395 #undef __FUNCT__
4396 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4397 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4398 {
4399   PetscErrorCode       ierr;
4400   Mat_Merge_SeqsToMPI  *merge;
4401   PetscContainer       container;
4402 
4403   PetscFunctionBegin;
4404   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4405   if (container) {
4406     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4407     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4408     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4409     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4410     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4411     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4412     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4413     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4414     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4415     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4416     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4417     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4418     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4419     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4420     ierr = PetscFree(merge);CHKERRQ(ierr);
4421     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4422   }
4423   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4424   PetscFunctionReturn(0);
4425 }
4426 
4427 #include <../src/mat/utils/freespace.h>
4428 #include <petscbt.h>
4429 
4430 #undef __FUNCT__
4431 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4432 /*@C
4433       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4434                  matrices from each processor
4435 
4436     Collective on MPI_Comm
4437 
4438    Input Parameters:
4439 +    comm - the communicators the parallel matrix will live on
4440 .    seqmat - the input sequential matrices
4441 .    m - number of local rows (or PETSC_DECIDE)
4442 .    n - number of local columns (or PETSC_DECIDE)
4443 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4444 
4445    Output Parameter:
4446 .    mpimat - the parallel matrix generated
4447 
4448     Level: advanced
4449 
4450    Notes:
4451      The dimensions of the sequential matrix in each processor MUST be the same.
4452      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4453      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4454 @*/
4455 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4456 {
4457   PetscErrorCode       ierr;
4458   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4459   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4460   PetscMPIInt          size,rank,taga,*len_s;
4461   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4462   PetscInt             proc,m;
4463   PetscInt             **buf_ri,**buf_rj;
4464   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4465   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4466   MPI_Request          *s_waits,*r_waits;
4467   MPI_Status           *status;
4468   MatScalar            *aa=a->a;
4469   MatScalar            **abuf_r,*ba_i;
4470   Mat_Merge_SeqsToMPI  *merge;
4471   PetscContainer       container;
4472 
4473   PetscFunctionBegin;
4474   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4475 
4476   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4477   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4478 
4479   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4480   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4481 
4482   bi     = merge->bi;
4483   bj     = merge->bj;
4484   buf_ri = merge->buf_ri;
4485   buf_rj = merge->buf_rj;
4486 
4487   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4488   owners = merge->rowmap->range;
4489   len_s  = merge->len_s;
4490 
4491   /* send and recv matrix values */
4492   /*-----------------------------*/
4493   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4494   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4495 
4496   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4497   for (proc=0,k=0; proc<size; proc++){
4498     if (!len_s[proc]) continue;
4499     i = owners[proc];
4500     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4501     k++;
4502   }
4503 
4504   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4505   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4506   ierr = PetscFree(status);CHKERRQ(ierr);
4507 
4508   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4509   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4510 
4511   /* insert mat values of mpimat */
4512   /*----------------------------*/
4513   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4514   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4515 
4516   for (k=0; k<merge->nrecv; k++){
4517     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4518     nrows = *(buf_ri_k[k]);
4519     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4520     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4521   }
4522 
4523   /* set values of ba */
4524   m = merge->rowmap->n;
4525   for (i=0; i<m; i++) {
4526     arow = owners[rank] + i;
4527     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4528     bnzi = bi[i+1] - bi[i];
4529     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4530 
4531     /* add local non-zero vals of this proc's seqmat into ba */
4532     anzi = ai[arow+1] - ai[arow];
4533     aj   = a->j + ai[arow];
4534     aa   = a->a + ai[arow];
4535     nextaj = 0;
4536     for (j=0; nextaj<anzi; j++){
4537       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4538         ba_i[j] += aa[nextaj++];
4539       }
4540     }
4541 
4542     /* add received vals into ba */
4543     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4544       /* i-th row */
4545       if (i == *nextrow[k]) {
4546         anzi = *(nextai[k]+1) - *nextai[k];
4547         aj   = buf_rj[k] + *(nextai[k]);
4548         aa   = abuf_r[k] + *(nextai[k]);
4549         nextaj = 0;
4550         for (j=0; nextaj<anzi; j++){
4551           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4552             ba_i[j] += aa[nextaj++];
4553           }
4554         }
4555         nextrow[k]++; nextai[k]++;
4556       }
4557     }
4558     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4559   }
4560   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4561   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4562 
4563   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4564   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4565   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4566   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4567   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4568   PetscFunctionReturn(0);
4569 }
4570 
4571 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4572 
4573 #undef __FUNCT__
4574 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4575 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4576 {
4577   PetscErrorCode       ierr;
4578   Mat                  B_mpi;
4579   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4580   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4581   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4582   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4583   PetscInt             len,proc,*dnz,*onz;
4584   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4585   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4586   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4587   MPI_Status           *status;
4588   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4589   PetscBT              lnkbt;
4590   Mat_Merge_SeqsToMPI  *merge;
4591   PetscContainer       container;
4592 
4593   PetscFunctionBegin;
4594   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4595 
4596   /* make sure it is a PETSc comm */
4597   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4598   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4599   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4600 
4601   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4602   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4603 
4604   /* determine row ownership */
4605   /*---------------------------------------------------------*/
4606   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4607   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4608   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4609   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4610   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4611   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4612   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4613 
4614   m      = merge->rowmap->n;
4615   M      = merge->rowmap->N;
4616   owners = merge->rowmap->range;
4617 
4618   /* determine the number of messages to send, their lengths */
4619   /*---------------------------------------------------------*/
4620   len_s  = merge->len_s;
4621 
4622   len = 0;  /* length of buf_si[] */
4623   merge->nsend = 0;
4624   for (proc=0; proc<size; proc++){
4625     len_si[proc] = 0;
4626     if (proc == rank){
4627       len_s[proc] = 0;
4628     } else {
4629       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4630       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4631     }
4632     if (len_s[proc]) {
4633       merge->nsend++;
4634       nrows = 0;
4635       for (i=owners[proc]; i<owners[proc+1]; i++){
4636         if (ai[i+1] > ai[i]) nrows++;
4637       }
4638       len_si[proc] = 2*(nrows+1);
4639       len += len_si[proc];
4640     }
4641   }
4642 
4643   /* determine the number and length of messages to receive for ij-structure */
4644   /*-------------------------------------------------------------------------*/
4645   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4646   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4647 
4648   /* post the Irecv of j-structure */
4649   /*-------------------------------*/
4650   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4651   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4652 
4653   /* post the Isend of j-structure */
4654   /*--------------------------------*/
4655   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4656 
4657   for (proc=0, k=0; proc<size; proc++){
4658     if (!len_s[proc]) continue;
4659     i = owners[proc];
4660     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4661     k++;
4662   }
4663 
4664   /* receives and sends of j-structure are complete */
4665   /*------------------------------------------------*/
4666   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4667   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4668 
4669   /* send and recv i-structure */
4670   /*---------------------------*/
4671   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4672   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4673 
4674   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4675   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4676   for (proc=0,k=0; proc<size; proc++){
4677     if (!len_s[proc]) continue;
4678     /* form outgoing message for i-structure:
4679          buf_si[0]:                 nrows to be sent
4680                [1:nrows]:           row index (global)
4681                [nrows+1:2*nrows+1]: i-structure index
4682     */
4683     /*-------------------------------------------*/
4684     nrows = len_si[proc]/2 - 1;
4685     buf_si_i    = buf_si + nrows+1;
4686     buf_si[0]   = nrows;
4687     buf_si_i[0] = 0;
4688     nrows = 0;
4689     for (i=owners[proc]; i<owners[proc+1]; i++){
4690       anzi = ai[i+1] - ai[i];
4691       if (anzi) {
4692         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4693         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4694         nrows++;
4695       }
4696     }
4697     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4698     k++;
4699     buf_si += len_si[proc];
4700   }
4701 
4702   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4703   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4704 
4705   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4706   for (i=0; i<merge->nrecv; i++){
4707     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4708   }
4709 
4710   ierr = PetscFree(len_si);CHKERRQ(ierr);
4711   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4712   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4713   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4714   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4715   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4716   ierr = PetscFree(status);CHKERRQ(ierr);
4717 
4718   /* compute a local seq matrix in each processor */
4719   /*----------------------------------------------*/
4720   /* allocate bi array and free space for accumulating nonzero column info */
4721   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4722   bi[0] = 0;
4723 
4724   /* create and initialize a linked list */
4725   nlnk = N+1;
4726   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4727 
4728   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4729   len = 0;
4730   len  = ai[owners[rank+1]] - ai[owners[rank]];
4731   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4732   current_space = free_space;
4733 
4734   /* determine symbolic info for each local row */
4735   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4736 
4737   for (k=0; k<merge->nrecv; k++){
4738     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4739     nrows = *buf_ri_k[k];
4740     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4741     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4742   }
4743 
4744   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4745   len = 0;
4746   for (i=0;i<m;i++) {
4747     bnzi   = 0;
4748     /* add local non-zero cols of this proc's seqmat into lnk */
4749     arow   = owners[rank] + i;
4750     anzi   = ai[arow+1] - ai[arow];
4751     aj     = a->j + ai[arow];
4752     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4753     bnzi += nlnk;
4754     /* add received col data into lnk */
4755     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4756       if (i == *nextrow[k]) { /* i-th row */
4757         anzi = *(nextai[k]+1) - *nextai[k];
4758         aj   = buf_rj[k] + *nextai[k];
4759         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4760         bnzi += nlnk;
4761         nextrow[k]++; nextai[k]++;
4762       }
4763     }
4764     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4765 
4766     /* if free space is not available, make more free space */
4767     if (current_space->local_remaining<bnzi) {
4768       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4769       nspacedouble++;
4770     }
4771     /* copy data into free space, then initialize lnk */
4772     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4773     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4774 
4775     current_space->array           += bnzi;
4776     current_space->local_used      += bnzi;
4777     current_space->local_remaining -= bnzi;
4778 
4779     bi[i+1] = bi[i] + bnzi;
4780   }
4781 
4782   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4783 
4784   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4785   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4786   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4787 
4788   /* create symbolic parallel matrix B_mpi */
4789   /*---------------------------------------*/
4790   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4791   if (n==PETSC_DECIDE) {
4792     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4793   } else {
4794     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4795   }
4796   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4797   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4798   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4799 
4800   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4801   B_mpi->assembled     = PETSC_FALSE;
4802   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4803   merge->bi            = bi;
4804   merge->bj            = bj;
4805   merge->buf_ri        = buf_ri;
4806   merge->buf_rj        = buf_rj;
4807   merge->coi           = PETSC_NULL;
4808   merge->coj           = PETSC_NULL;
4809   merge->owners_co     = PETSC_NULL;
4810 
4811   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4812 
4813   /* attach the supporting struct to B_mpi for reuse */
4814   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4815   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4816   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4817   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4818   *mpimat = B_mpi;
4819 
4820   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4821   PetscFunctionReturn(0);
4822 }
4823 
4824 #undef __FUNCT__
4825 #define __FUNCT__ "MatMerge_SeqsToMPI"
4826 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4827 {
4828   PetscErrorCode   ierr;
4829 
4830   PetscFunctionBegin;
4831   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4832   if (scall == MAT_INITIAL_MATRIX){
4833     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4834   }
4835   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4836   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4837   PetscFunctionReturn(0);
4838 }
4839 
4840 #undef __FUNCT__
4841 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4842 /*@
4843      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4844           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4845           with MatGetSize()
4846 
4847     Not Collective
4848 
4849    Input Parameters:
4850 +    A - the matrix
4851 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4852 
4853    Output Parameter:
4854 .    A_loc - the local sequential matrix generated
4855 
4856     Level: developer
4857 
4858 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4859 
4860 @*/
4861 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4862 {
4863   PetscErrorCode  ierr;
4864   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4865   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4866   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4867   MatScalar       *aa=a->a,*ba=b->a,*cam;
4868   PetscScalar     *ca;
4869   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4870   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4871   PetscBool       match;
4872 
4873   PetscFunctionBegin;
4874   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4875   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4876   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4877   if (scall == MAT_INITIAL_MATRIX){
4878     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4879     ci[0] = 0;
4880     for (i=0; i<am; i++){
4881       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4882     }
4883     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4884     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4885     k = 0;
4886     for (i=0; i<am; i++) {
4887       ncols_o = bi[i+1] - bi[i];
4888       ncols_d = ai[i+1] - ai[i];
4889       /* off-diagonal portion of A */
4890       for (jo=0; jo<ncols_o; jo++) {
4891         col = cmap[*bj];
4892         if (col >= cstart) break;
4893         cj[k]   = col; bj++;
4894         ca[k++] = *ba++;
4895       }
4896       /* diagonal portion of A */
4897       for (j=0; j<ncols_d; j++) {
4898         cj[k]   = cstart + *aj++;
4899         ca[k++] = *aa++;
4900       }
4901       /* off-diagonal portion of A */
4902       for (j=jo; j<ncols_o; j++) {
4903         cj[k]   = cmap[*bj++];
4904         ca[k++] = *ba++;
4905       }
4906     }
4907     /* put together the new matrix */
4908     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4909     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4910     /* Since these are PETSc arrays, change flags to free them as necessary. */
4911     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4912     mat->free_a  = PETSC_TRUE;
4913     mat->free_ij = PETSC_TRUE;
4914     mat->nonew   = 0;
4915   } else if (scall == MAT_REUSE_MATRIX){
4916     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4917     ci = mat->i; cj = mat->j; cam = mat->a;
4918     for (i=0; i<am; i++) {
4919       /* off-diagonal portion of A */
4920       ncols_o = bi[i+1] - bi[i];
4921       for (jo=0; jo<ncols_o; jo++) {
4922         col = cmap[*bj];
4923         if (col >= cstart) break;
4924         *cam++ = *ba++; bj++;
4925       }
4926       /* diagonal portion of A */
4927       ncols_d = ai[i+1] - ai[i];
4928       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4929       /* off-diagonal portion of A */
4930       for (j=jo; j<ncols_o; j++) {
4931         *cam++ = *ba++; bj++;
4932       }
4933     }
4934   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4935   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4936   PetscFunctionReturn(0);
4937 }
4938 
4939 #undef __FUNCT__
4940 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4941 /*@C
4942      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4943 
4944     Not Collective
4945 
4946    Input Parameters:
4947 +    A - the matrix
4948 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4949 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4950 
4951    Output Parameter:
4952 .    A_loc - the local sequential matrix generated
4953 
4954     Level: developer
4955 
4956 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4957 
4958 @*/
4959 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4960 {
4961   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4962   PetscErrorCode    ierr;
4963   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4964   IS                isrowa,iscola;
4965   Mat               *aloc;
4966   PetscBool       match;
4967 
4968   PetscFunctionBegin;
4969   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4970   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4971   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4972   if (!row){
4973     start = A->rmap->rstart; end = A->rmap->rend;
4974     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4975   } else {
4976     isrowa = *row;
4977   }
4978   if (!col){
4979     start = A->cmap->rstart;
4980     cmap  = a->garray;
4981     nzA   = a->A->cmap->n;
4982     nzB   = a->B->cmap->n;
4983     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4984     ncols = 0;
4985     for (i=0; i<nzB; i++) {
4986       if (cmap[i] < start) idx[ncols++] = cmap[i];
4987       else break;
4988     }
4989     imark = i;
4990     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4991     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4992     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4993   } else {
4994     iscola = *col;
4995   }
4996   if (scall != MAT_INITIAL_MATRIX){
4997     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4998     aloc[0] = *A_loc;
4999   }
5000   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5001   *A_loc = aloc[0];
5002   ierr = PetscFree(aloc);CHKERRQ(ierr);
5003   if (!row){
5004     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5005   }
5006   if (!col){
5007     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5008   }
5009   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5010   PetscFunctionReturn(0);
5011 }
5012 
5013 #undef __FUNCT__
5014 #define __FUNCT__ "MatGetBrowsOfAcols"
5015 /*@C
5016     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5017 
5018     Collective on Mat
5019 
5020    Input Parameters:
5021 +    A,B - the matrices in mpiaij format
5022 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5023 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5024 
5025    Output Parameter:
5026 +    rowb, colb - index sets of rows and columns of B to extract
5027 -    B_seq - the sequential matrix generated
5028 
5029     Level: developer
5030 
5031 @*/
5032 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5033 {
5034   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5035   PetscErrorCode    ierr;
5036   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5037   IS                isrowb,iscolb;
5038   Mat               *bseq=PETSC_NULL;
5039 
5040   PetscFunctionBegin;
5041   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5042     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5043   }
5044   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5045 
5046   if (scall == MAT_INITIAL_MATRIX){
5047     start = A->cmap->rstart;
5048     cmap  = a->garray;
5049     nzA   = a->A->cmap->n;
5050     nzB   = a->B->cmap->n;
5051     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5052     ncols = 0;
5053     for (i=0; i<nzB; i++) {  /* row < local row index */
5054       if (cmap[i] < start) idx[ncols++] = cmap[i];
5055       else break;
5056     }
5057     imark = i;
5058     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5059     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5060     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5061     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5062   } else {
5063     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5064     isrowb = *rowb; iscolb = *colb;
5065     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5066     bseq[0] = *B_seq;
5067   }
5068   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5069   *B_seq = bseq[0];
5070   ierr = PetscFree(bseq);CHKERRQ(ierr);
5071   if (!rowb){
5072     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5073   } else {
5074     *rowb = isrowb;
5075   }
5076   if (!colb){
5077     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5078   } else {
5079     *colb = iscolb;
5080   }
5081   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5082   PetscFunctionReturn(0);
5083 }
5084 
5085 #undef __FUNCT__
5086 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5087 /*
5088     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5089     of the OFF-DIAGONAL portion of local A
5090 
5091     Collective on Mat
5092 
5093    Input Parameters:
5094 +    A,B - the matrices in mpiaij format
5095 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5096 
5097    Output Parameter:
5098 +    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5099 .    startsj_r - similar to startsj for receives
5100 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5101 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5102 
5103     Level: developer
5104 
5105 */
5106 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5107 {
5108   VecScatter_MPI_General *gen_to,*gen_from;
5109   PetscErrorCode         ierr;
5110   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5111   Mat_SeqAIJ             *b_oth;
5112   VecScatter             ctx=a->Mvctx;
5113   MPI_Comm               comm=((PetscObject)ctx)->comm;
5114   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5115   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5116   PetscScalar            *rvalues,*svalues;
5117   MatScalar              *b_otha,*bufa,*bufA;
5118   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5119   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5120   MPI_Status             *sstatus,rstatus;
5121   PetscMPIInt            jj;
5122   PetscInt               *cols,sbs,rbs;
5123   PetscScalar            *vals;
5124 
5125   PetscFunctionBegin;
5126   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5127     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5128   }
5129   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5130   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5131 
5132   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5133   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5134   rvalues  = gen_from->values; /* holds the length of receiving row */
5135   svalues  = gen_to->values;   /* holds the length of sending row */
5136   nrecvs   = gen_from->n;
5137   nsends   = gen_to->n;
5138 
5139   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5140   srow     = gen_to->indices;   /* local row index to be sent */
5141   sstarts  = gen_to->starts;
5142   sprocs   = gen_to->procs;
5143   sstatus  = gen_to->sstatus;
5144   sbs      = gen_to->bs;
5145   rstarts  = gen_from->starts;
5146   rprocs   = gen_from->procs;
5147   rbs      = gen_from->bs;
5148 
5149   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5150   if (scall == MAT_INITIAL_MATRIX){
5151     /* i-array */
5152     /*---------*/
5153     /*  post receives */
5154     for (i=0; i<nrecvs; i++){
5155       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5156       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5157       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5158     }
5159 
5160     /* pack the outgoing message */
5161     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5162     sstartsj[0] = 0;  rstartsj[0] = 0;
5163     len = 0; /* total length of j or a array to be sent */
5164     k = 0;
5165     for (i=0; i<nsends; i++){
5166       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5167       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5168       for (j=0; j<nrows; j++) {
5169         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5170         for (l=0; l<sbs; l++){
5171           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5172           rowlen[j*sbs+l] = ncols;
5173           len += ncols;
5174           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5175         }
5176         k++;
5177       }
5178       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5179       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5180     }
5181     /* recvs and sends of i-array are completed */
5182     i = nrecvs;
5183     while (i--) {
5184       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5185     }
5186     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5187 
5188     /* allocate buffers for sending j and a arrays */
5189     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5190     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5191 
5192     /* create i-array of B_oth */
5193     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5194     b_othi[0] = 0;
5195     len = 0; /* total length of j or a array to be received */
5196     k = 0;
5197     for (i=0; i<nrecvs; i++){
5198       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5199       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5200       for (j=0; j<nrows; j++) {
5201         b_othi[k+1] = b_othi[k] + rowlen[j];
5202         len += rowlen[j]; k++;
5203       }
5204       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5205     }
5206 
5207     /* allocate space for j and a arrrays of B_oth */
5208     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5209     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5210 
5211     /* j-array */
5212     /*---------*/
5213     /*  post receives of j-array */
5214     for (i=0; i<nrecvs; i++){
5215       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5216       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5217     }
5218 
5219     /* pack the outgoing message j-array */
5220     k = 0;
5221     for (i=0; i<nsends; i++){
5222       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5223       bufJ = bufj+sstartsj[i];
5224       for (j=0; j<nrows; j++) {
5225         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5226         for (ll=0; ll<sbs; ll++){
5227           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5228           for (l=0; l<ncols; l++){
5229             *bufJ++ = cols[l];
5230           }
5231           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5232         }
5233       }
5234       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5235     }
5236 
5237     /* recvs and sends of j-array are completed */
5238     i = nrecvs;
5239     while (i--) {
5240       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5241     }
5242     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5243   } else if (scall == MAT_REUSE_MATRIX){
5244     sstartsj = *startsj;
5245     rstartsj = *startsj_r;
5246     bufa     = *bufa_ptr;
5247     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5248     b_otha   = b_oth->a;
5249   } else {
5250     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5251   }
5252 
5253   /* a-array */
5254   /*---------*/
5255   /*  post receives of a-array */
5256   for (i=0; i<nrecvs; i++){
5257     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5258     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5259   }
5260 
5261   /* pack the outgoing message a-array */
5262   k = 0;
5263   for (i=0; i<nsends; i++){
5264     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5265     bufA = bufa+sstartsj[i];
5266     for (j=0; j<nrows; j++) {
5267       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5268       for (ll=0; ll<sbs; ll++){
5269         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5270         for (l=0; l<ncols; l++){
5271           *bufA++ = vals[l];
5272         }
5273         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5274       }
5275     }
5276     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5277   }
5278   /* recvs and sends of a-array are completed */
5279   i = nrecvs;
5280   while (i--) {
5281     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5282   }
5283   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5284   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5285 
5286   if (scall == MAT_INITIAL_MATRIX){
5287     /* put together the new matrix */
5288     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5289 
5290     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5291     /* Since these are PETSc arrays, change flags to free them as necessary. */
5292     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5293     b_oth->free_a  = PETSC_TRUE;
5294     b_oth->free_ij = PETSC_TRUE;
5295     b_oth->nonew   = 0;
5296 
5297     ierr = PetscFree(bufj);CHKERRQ(ierr);
5298     if (!startsj || !bufa_ptr){
5299       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5300       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5301     } else {
5302       *startsj   = sstartsj;
5303       *startsj_r = rstartsj;
5304       *bufa_ptr  = bufa;
5305     }
5306   }
5307   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5308   PetscFunctionReturn(0);
5309 }
5310 
5311 #undef __FUNCT__
5312 #define __FUNCT__ "MatGetCommunicationStructs"
5313 /*@C
5314   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5315 
5316   Not Collective
5317 
5318   Input Parameters:
5319 . A - The matrix in mpiaij format
5320 
5321   Output Parameter:
5322 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5323 . colmap - A map from global column index to local index into lvec
5324 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5325 
5326   Level: developer
5327 
5328 @*/
5329 #if defined (PETSC_USE_CTABLE)
5330 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5331 #else
5332 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5333 #endif
5334 {
5335   Mat_MPIAIJ *a;
5336 
5337   PetscFunctionBegin;
5338   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5339   PetscValidPointer(lvec, 2);
5340   PetscValidPointer(colmap, 3);
5341   PetscValidPointer(multScatter, 4);
5342   a = (Mat_MPIAIJ *) A->data;
5343   if (lvec) *lvec = a->lvec;
5344   if (colmap) *colmap = a->colmap;
5345   if (multScatter) *multScatter = a->Mvctx;
5346   PetscFunctionReturn(0);
5347 }
5348 
5349 EXTERN_C_BEGIN
5350 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5351 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5352 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5353 EXTERN_C_END
5354 
5355 #undef __FUNCT__
5356 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5357 /*
5358     Computes (B'*A')' since computing B*A directly is untenable
5359 
5360                n                       p                          p
5361         (              )       (              )         (                  )
5362       m (      A       )  *  n (       B      )   =   m (         C        )
5363         (              )       (              )         (                  )
5364 
5365 */
5366 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5367 {
5368   PetscErrorCode     ierr;
5369   Mat                At,Bt,Ct;
5370 
5371   PetscFunctionBegin;
5372   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5373   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5374   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5375   ierr = MatDestroy(&At);CHKERRQ(ierr);
5376   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5377   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5378   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5379   PetscFunctionReturn(0);
5380 }
5381 
5382 #undef __FUNCT__
5383 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5384 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5385 {
5386   PetscErrorCode ierr;
5387   PetscInt       m=A->rmap->n,n=B->cmap->n;
5388   Mat            Cmat;
5389 
5390   PetscFunctionBegin;
5391   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5392   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5393   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5394   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5395   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5396   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5397   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5398   *C   = Cmat;
5399   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5400   PetscFunctionReturn(0);
5401 }
5402 
5403 /* ----------------------------------------------------------------*/
5404 #undef __FUNCT__
5405 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5406 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5407 {
5408   PetscErrorCode ierr;
5409 
5410   PetscFunctionBegin;
5411   if (scall == MAT_INITIAL_MATRIX){
5412     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5413   }
5414   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5415   PetscFunctionReturn(0);
5416 }
5417 
5418 EXTERN_C_BEGIN
5419 #if defined(PETSC_HAVE_MUMPS)
5420 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5421 #endif
5422 #if defined(PETSC_HAVE_PASTIX)
5423 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5424 #endif
5425 #if defined(PETSC_HAVE_SUPERLU_DIST)
5426 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5427 #endif
5428 #if defined(PETSC_HAVE_SPOOLES)
5429 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5430 #endif
5431 EXTERN_C_END
5432 
5433 /*MC
5434    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5435 
5436    Options Database Keys:
5437 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5438 
5439   Level: beginner
5440 
5441 .seealso: MatCreateMPIAIJ()
5442 M*/
5443 
5444 EXTERN_C_BEGIN
5445 #undef __FUNCT__
5446 #define __FUNCT__ "MatCreate_MPIAIJ"
5447 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5448 {
5449   Mat_MPIAIJ     *b;
5450   PetscErrorCode ierr;
5451   PetscMPIInt    size;
5452 
5453   PetscFunctionBegin;
5454   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5455 
5456   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5457   B->data         = (void*)b;
5458   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5459   B->rmap->bs     = 1;
5460   B->assembled    = PETSC_FALSE;
5461 
5462   B->insertmode   = NOT_SET_VALUES;
5463   b->size         = size;
5464   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5465 
5466   /* build cache for off array entries formed */
5467   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5468   b->donotstash  = PETSC_FALSE;
5469   b->colmap      = 0;
5470   b->garray      = 0;
5471   b->roworiented = PETSC_TRUE;
5472 
5473   /* stuff used for matrix vector multiply */
5474   b->lvec      = PETSC_NULL;
5475   b->Mvctx     = PETSC_NULL;
5476 
5477   /* stuff for MatGetRow() */
5478   b->rowindices   = 0;
5479   b->rowvalues    = 0;
5480   b->getrowactive = PETSC_FALSE;
5481 
5482 #if defined(PETSC_HAVE_SPOOLES)
5483   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5484                                      "MatGetFactor_mpiaij_spooles",
5485                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5486 #endif
5487 #if defined(PETSC_HAVE_MUMPS)
5488   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5489                                      "MatGetFactor_aij_mumps",
5490                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5491 #endif
5492 #if defined(PETSC_HAVE_PASTIX)
5493   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5494 					   "MatGetFactor_mpiaij_pastix",
5495 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5496 #endif
5497 #if defined(PETSC_HAVE_SUPERLU_DIST)
5498   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5499                                      "MatGetFactor_mpiaij_superlu_dist",
5500                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5501 #endif
5502   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5503                                      "MatStoreValues_MPIAIJ",
5504                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5505   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5506                                      "MatRetrieveValues_MPIAIJ",
5507                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5508   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5509 				     "MatGetDiagonalBlock_MPIAIJ",
5510                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5511   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5512 				     "MatIsTranspose_MPIAIJ",
5513 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5514   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5515 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5516 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5517   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5518 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5519 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5520   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5521 				     "MatDiagonalScaleLocal_MPIAIJ",
5522 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5523   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5524                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5525                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5526   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5527                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5528                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5529   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5530                                      "MatConvert_MPIAIJ_MPISBAIJ",
5531                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5532   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5533                                      "MatMatMult_MPIDense_MPIAIJ",
5534                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5535   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5536                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5537                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5538   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5539                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5540                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5541   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5542   PetscFunctionReturn(0);
5543 }
5544 EXTERN_C_END
5545 
5546 #undef __FUNCT__
5547 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5548 /*@
5549      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5550          and "off-diagonal" part of the matrix in CSR format.
5551 
5552    Collective on MPI_Comm
5553 
5554    Input Parameters:
5555 +  comm - MPI communicator
5556 .  m - number of local rows (Cannot be PETSC_DECIDE)
5557 .  n - This value should be the same as the local size used in creating the
5558        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5559        calculated if N is given) For square matrices n is almost always m.
5560 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5561 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5562 .   i - row indices for "diagonal" portion of matrix
5563 .   j - column indices
5564 .   a - matrix values
5565 .   oi - row indices for "off-diagonal" portion of matrix
5566 .   oj - column indices
5567 -   oa - matrix values
5568 
5569    Output Parameter:
5570 .   mat - the matrix
5571 
5572    Level: advanced
5573 
5574    Notes:
5575        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5576        must free the arrays once the matrix has been destroyed and not before.
5577 
5578        The i and j indices are 0 based
5579 
5580        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5581 
5582        This sets local rows and cannot be used to set off-processor values.
5583 
5584        You cannot later use MatSetValues() to change values in this matrix.
5585 
5586 .keywords: matrix, aij, compressed row, sparse, parallel
5587 
5588 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5589           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5590 @*/
5591 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5592 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5593 {
5594   PetscErrorCode ierr;
5595   Mat_MPIAIJ     *maij;
5596 
5597  PetscFunctionBegin;
5598   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5599   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5600   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5601   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5602   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5603   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5604   maij = (Mat_MPIAIJ*) (*mat)->data;
5605   maij->donotstash     = PETSC_TRUE;
5606   (*mat)->preallocated = PETSC_TRUE;
5607 
5608   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5609   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5610   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5611   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5612 
5613   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5614   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5615 
5616   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5617   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5618   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5619   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5620 
5621   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5622   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5623   PetscFunctionReturn(0);
5624 }
5625 
5626 /*
5627     Special version for direct calls from Fortran
5628 */
5629 #include <private/fortranimpl.h>
5630 
5631 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5632 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5633 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5634 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5635 #endif
5636 
5637 /* Change these macros so can be used in void function */
5638 #undef CHKERRQ
5639 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5640 #undef SETERRQ2
5641 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5642 #undef SETERRQ
5643 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5644 
5645 EXTERN_C_BEGIN
5646 #undef __FUNCT__
5647 #define __FUNCT__ "matsetvaluesmpiaij_"
5648 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5649 {
5650   Mat             mat = *mmat;
5651   PetscInt        m = *mm, n = *mn;
5652   InsertMode      addv = *maddv;
5653   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5654   PetscScalar     value;
5655   PetscErrorCode  ierr;
5656 
5657   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5658   if (mat->insertmode == NOT_SET_VALUES) {
5659     mat->insertmode = addv;
5660   }
5661 #if defined(PETSC_USE_DEBUG)
5662   else if (mat->insertmode != addv) {
5663     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5664   }
5665 #endif
5666   {
5667   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5668   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5669   PetscBool       roworiented = aij->roworiented;
5670 
5671   /* Some Variables required in the macro */
5672   Mat             A = aij->A;
5673   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5674   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5675   MatScalar       *aa = a->a;
5676   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5677   Mat             B = aij->B;
5678   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5679   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5680   MatScalar       *ba = b->a;
5681 
5682   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5683   PetscInt        nonew = a->nonew;
5684   MatScalar       *ap1,*ap2;
5685 
5686   PetscFunctionBegin;
5687   for (i=0; i<m; i++) {
5688     if (im[i] < 0) continue;
5689 #if defined(PETSC_USE_DEBUG)
5690     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5691 #endif
5692     if (im[i] >= rstart && im[i] < rend) {
5693       row      = im[i] - rstart;
5694       lastcol1 = -1;
5695       rp1      = aj + ai[row];
5696       ap1      = aa + ai[row];
5697       rmax1    = aimax[row];
5698       nrow1    = ailen[row];
5699       low1     = 0;
5700       high1    = nrow1;
5701       lastcol2 = -1;
5702       rp2      = bj + bi[row];
5703       ap2      = ba + bi[row];
5704       rmax2    = bimax[row];
5705       nrow2    = bilen[row];
5706       low2     = 0;
5707       high2    = nrow2;
5708 
5709       for (j=0; j<n; j++) {
5710         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5711         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5712         if (in[j] >= cstart && in[j] < cend){
5713           col = in[j] - cstart;
5714           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5715         } else if (in[j] < 0) continue;
5716 #if defined(PETSC_USE_DEBUG)
5717         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5718 #endif
5719         else {
5720           if (mat->was_assembled) {
5721             if (!aij->colmap) {
5722               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5723             }
5724 #if defined (PETSC_USE_CTABLE)
5725             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5726 	    col--;
5727 #else
5728             col = aij->colmap[in[j]] - 1;
5729 #endif
5730             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5731               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5732               col =  in[j];
5733               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5734               B = aij->B;
5735               b = (Mat_SeqAIJ*)B->data;
5736               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5737               rp2      = bj + bi[row];
5738               ap2      = ba + bi[row];
5739               rmax2    = bimax[row];
5740               nrow2    = bilen[row];
5741               low2     = 0;
5742               high2    = nrow2;
5743               bm       = aij->B->rmap->n;
5744               ba = b->a;
5745             }
5746           } else col = in[j];
5747           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5748         }
5749       }
5750     } else {
5751       if (!aij->donotstash) {
5752         if (roworiented) {
5753           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5754         } else {
5755           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5756         }
5757       }
5758     }
5759   }}
5760   PetscFunctionReturnVoid();
5761 }
5762 EXTERN_C_END
5763 
5764