xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 0716a85f2ec13166f862a3128e4c2e598c442603)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
7 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
8 {
9   PetscErrorCode  ierr;
10   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
11   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
12   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
13   const PetscInt  *ia,*ib;
14   const MatScalar *aa,*bb;
15   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
16   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
17 
18   PetscFunctionBegin;
19   *keptrows = 0;
20   ia = a->i;
21   ib = b->i;
22   for (i=0; i<m; i++) {
23     na = ia[i+1] - ia[i];
24     nb = ib[i+1] - ib[i];
25     if (!na && !nb) {
26       cnt++;
27       goto ok1;
28     }
29     aa = a->a + ia[i];
30     for (j=0; j<na; j++) {
31       if (aa[j] != 0.0) goto ok1;
32     }
33     bb = b->a + ib[i];
34     for (j=0; j <nb; j++) {
35       if (bb[j] != 0.0) goto ok1;
36     }
37     cnt++;
38     ok1:;
39   }
40   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
41   if (!n0rows) PetscFunctionReturn(0);
42   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
43   cnt = 0;
44   for (i=0; i<m; i++) {
45     na = ia[i+1] - ia[i];
46     nb = ib[i+1] - ib[i];
47     if (!na && !nb) continue;
48     aa = a->a + ia[i];
49     for(j=0; j<na;j++) {
50       if (aa[j] != 0.0) {
51         rows[cnt++] = rstart + i;
52         goto ok2;
53       }
54     }
55     bb = b->a + ib[i];
56     for (j=0; j<nb; j++) {
57       if (bb[j] != 0.0) {
58         rows[cnt++] = rstart + i;
59         goto ok2;
60       }
61     }
62     ok2:;
63   }
64   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
65   PetscFunctionReturn(0);
66 }
67 
68 #undef __FUNCT__
69 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
70 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
71 {
72   PetscErrorCode ierr;
73   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
74   PetscInt       i,n,*garray = aij->garray;
75   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
76   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
77   PetscReal      *work;
78 
79   PetscFunctionBegin;
80   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
81   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
82   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
83   if (type == NORM_2) {
84     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
85       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
86     }
87     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
88       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
89     }
90   } else if (type == NORM_1) {
91     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
92       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
93     }
94     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
95       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
96     }
97   } else if (type == NORM_INFINITY) {
98     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
99       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
100     }
101     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
102       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
103     }
104 
105   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
106   if (type == NORM_INFINITY) {
107     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
108   } else {
109     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
110   }
111   ierr = PetscFree(work);CHKERRQ(ierr);
112   if (type == NORM_2) {
113     for (i=0; i<n; i++) norms[i] = sqrt(norms[i]);
114   }
115   PetscFunctionReturn(0);
116 }
117 
118 #undef __FUNCT__
119 #define __FUNCT__ "MatDistribute_MPIAIJ"
120 /*
121     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
122     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
123 
124     Only for square matrices
125 */
126 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
127 {
128   PetscMPIInt    rank,size;
129   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
130   PetscErrorCode ierr;
131   Mat            mat;
132   Mat_SeqAIJ     *gmata;
133   PetscMPIInt    tag;
134   MPI_Status     status;
135   PetscBool      aij;
136   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
137 
138   PetscFunctionBegin;
139   CHKMEMQ;
140   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
141   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
142   if (!rank) {
143     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
144     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
145   }
146   if (reuse == MAT_INITIAL_MATRIX) {
147     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
148     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
149     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
150     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
151     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
152     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
153     rowners[0] = 0;
154     for (i=2; i<=size; i++) {
155       rowners[i] += rowners[i-1];
156     }
157     rstart = rowners[rank];
158     rend   = rowners[rank+1];
159     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
160     if (!rank) {
161       gmata = (Mat_SeqAIJ*) gmat->data;
162       /* send row lengths to all processors */
163       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
164       for (i=1; i<size; i++) {
165 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
166       }
167       /* determine number diagonal and off-diagonal counts */
168       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
169       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
170       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
171       jj = 0;
172       for (i=0; i<m; i++) {
173 	for (j=0; j<dlens[i]; j++) {
174           if (gmata->j[jj] < rstart) ld[i]++;
175 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
176 	  jj++;
177 	}
178       }
179       /* send column indices to other processes */
180       for (i=1; i<size; i++) {
181 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
182 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
183 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
184       }
185 
186       /* send numerical values to other processes */
187       for (i=1; i<size; i++) {
188         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
189         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
190       }
191       gmataa = gmata->a;
192       gmataj = gmata->j;
193 
194     } else {
195       /* receive row lengths */
196       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
197       /* receive column indices */
198       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
199       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
200       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
201       /* determine number diagonal and off-diagonal counts */
202       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
203       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
204       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
205       jj = 0;
206       for (i=0; i<m; i++) {
207 	for (j=0; j<dlens[i]; j++) {
208           if (gmataj[jj] < rstart) ld[i]++;
209 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
210 	  jj++;
211 	}
212       }
213       /* receive numerical values */
214       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
215       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
216     }
217     /* set preallocation */
218     for (i=0; i<m; i++) {
219       dlens[i] -= olens[i];
220     }
221     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
222     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
223 
224     for (i=0; i<m; i++) {
225       dlens[i] += olens[i];
226     }
227     cnt  = 0;
228     for (i=0; i<m; i++) {
229       row  = rstart + i;
230       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
231       cnt += dlens[i];
232     }
233     if (rank) {
234       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
235     }
236     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
237     ierr = PetscFree(rowners);CHKERRQ(ierr);
238     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
239     *inmat = mat;
240   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
241     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
242     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
243     mat   = *inmat;
244     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
245     if (!rank) {
246       /* send numerical values to other processes */
247       gmata = (Mat_SeqAIJ*) gmat->data;
248       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
249       gmataa = gmata->a;
250       for (i=1; i<size; i++) {
251         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
252         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
253       }
254       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
255     } else {
256       /* receive numerical values from process 0*/
257       nz   = Ad->nz + Ao->nz;
258       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
259       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
260     }
261     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
262     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
263     ad = Ad->a;
264     ao = Ao->a;
265     if (mat->rmap->n) {
266       i  = 0;
267       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
268       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
269     }
270     for (i=1; i<mat->rmap->n; i++) {
271       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
272       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
273     }
274     i--;
275     if (mat->rmap->n) {
276       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
277     }
278     if (rank) {
279       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
280     }
281   }
282   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
283   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
284   CHKMEMQ;
285   PetscFunctionReturn(0);
286 }
287 
288 /*
289   Local utility routine that creates a mapping from the global column
290 number to the local number in the off-diagonal part of the local
291 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
292 a slightly higher hash table cost; without it it is not scalable (each processor
293 has an order N integer array but is fast to acess.
294 */
295 #undef __FUNCT__
296 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
297 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
298 {
299   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
300   PetscErrorCode ierr;
301   PetscInt       n = aij->B->cmap->n,i;
302 
303   PetscFunctionBegin;
304 #if defined (PETSC_USE_CTABLE)
305   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
306   for (i=0; i<n; i++){
307     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
308   }
309 #else
310   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
311   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
312   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
313   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
314 #endif
315   PetscFunctionReturn(0);
316 }
317 
318 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
319 { \
320     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
321     lastcol1 = col;\
322     while (high1-low1 > 5) { \
323       t = (low1+high1)/2; \
324       if (rp1[t] > col) high1 = t; \
325       else             low1  = t; \
326     } \
327       for (_i=low1; _i<high1; _i++) { \
328         if (rp1[_i] > col) break; \
329         if (rp1[_i] == col) { \
330           if (addv == ADD_VALUES) ap1[_i] += value;   \
331           else                    ap1[_i] = value; \
332           goto a_noinsert; \
333         } \
334       }  \
335       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
336       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
337       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
338       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
339       N = nrow1++ - 1; a->nz++; high1++; \
340       /* shift up all the later entries in this row */ \
341       for (ii=N; ii>=_i; ii--) { \
342         rp1[ii+1] = rp1[ii]; \
343         ap1[ii+1] = ap1[ii]; \
344       } \
345       rp1[_i] = col;  \
346       ap1[_i] = value;  \
347       a_noinsert: ; \
348       ailen[row] = nrow1; \
349 }
350 
351 
352 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
353 { \
354     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
355     lastcol2 = col;\
356     while (high2-low2 > 5) { \
357       t = (low2+high2)/2; \
358       if (rp2[t] > col) high2 = t; \
359       else             low2  = t; \
360     } \
361     for (_i=low2; _i<high2; _i++) {		\
362       if (rp2[_i] > col) break;			\
363       if (rp2[_i] == col) {			      \
364 	if (addv == ADD_VALUES) ap2[_i] += value;     \
365 	else                    ap2[_i] = value;      \
366 	goto b_noinsert;			      \
367       }						      \
368     }							      \
369     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
370     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
371     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
372     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
373     N = nrow2++ - 1; b->nz++; high2++;					\
374     /* shift up all the later entries in this row */			\
375     for (ii=N; ii>=_i; ii--) {						\
376       rp2[ii+1] = rp2[ii];						\
377       ap2[ii+1] = ap2[ii];						\
378     }									\
379     rp2[_i] = col;							\
380     ap2[_i] = value;							\
381     b_noinsert: ;								\
382     bilen[row] = nrow2;							\
383 }
384 
385 #undef __FUNCT__
386 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
387 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
388 {
389   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
390   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
391   PetscErrorCode ierr;
392   PetscInt       l,*garray = mat->garray,diag;
393 
394   PetscFunctionBegin;
395   /* code only works for square matrices A */
396 
397   /* find size of row to the left of the diagonal part */
398   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
399   row  = row - diag;
400   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
401     if (garray[b->j[b->i[row]+l]] > diag) break;
402   }
403   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
404 
405   /* diagonal part */
406   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
407 
408   /* right of diagonal part */
409   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
410   PetscFunctionReturn(0);
411 }
412 
413 #undef __FUNCT__
414 #define __FUNCT__ "MatSetValues_MPIAIJ"
415 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
416 {
417   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
418   PetscScalar    value;
419   PetscErrorCode ierr;
420   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
421   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
422   PetscBool      roworiented = aij->roworiented;
423 
424   /* Some Variables required in the macro */
425   Mat            A = aij->A;
426   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
427   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
428   MatScalar      *aa = a->a;
429   PetscBool      ignorezeroentries = a->ignorezeroentries;
430   Mat            B = aij->B;
431   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
432   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
433   MatScalar      *ba = b->a;
434 
435   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
436   PetscInt       nonew = a->nonew;
437   MatScalar      *ap1,*ap2;
438 
439   PetscFunctionBegin;
440   if (v) PetscValidScalarPointer(v,6);
441   for (i=0; i<m; i++) {
442     if (im[i] < 0) continue;
443 #if defined(PETSC_USE_DEBUG)
444     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
445 #endif
446     if (im[i] >= rstart && im[i] < rend) {
447       row      = im[i] - rstart;
448       lastcol1 = -1;
449       rp1      = aj + ai[row];
450       ap1      = aa + ai[row];
451       rmax1    = aimax[row];
452       nrow1    = ailen[row];
453       low1     = 0;
454       high1    = nrow1;
455       lastcol2 = -1;
456       rp2      = bj + bi[row];
457       ap2      = ba + bi[row];
458       rmax2    = bimax[row];
459       nrow2    = bilen[row];
460       low2     = 0;
461       high2    = nrow2;
462 
463       for (j=0; j<n; j++) {
464         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
465         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
466         if (in[j] >= cstart && in[j] < cend){
467           col = in[j] - cstart;
468           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
469         } else if (in[j] < 0) continue;
470 #if defined(PETSC_USE_DEBUG)
471         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
472 #endif
473         else {
474           if (mat->was_assembled) {
475             if (!aij->colmap) {
476               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
477             }
478 #if defined (PETSC_USE_CTABLE)
479             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
480 	    col--;
481 #else
482             col = aij->colmap[in[j]] - 1;
483 #endif
484             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
485               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
486               col =  in[j];
487               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
488               B = aij->B;
489               b = (Mat_SeqAIJ*)B->data;
490               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
491               rp2      = bj + bi[row];
492               ap2      = ba + bi[row];
493               rmax2    = bimax[row];
494               nrow2    = bilen[row];
495               low2     = 0;
496               high2    = nrow2;
497               bm       = aij->B->rmap->n;
498               ba = b->a;
499             }
500           } else col = in[j];
501           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
502         }
503       }
504     } else {
505       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
506       if (!aij->donotstash) {
507         if (roworiented) {
508           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
509         } else {
510           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
511         }
512       }
513     }
514   }
515   PetscFunctionReturn(0);
516 }
517 
518 #undef __FUNCT__
519 #define __FUNCT__ "MatGetValues_MPIAIJ"
520 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
521 {
522   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
523   PetscErrorCode ierr;
524   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
525   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
526 
527   PetscFunctionBegin;
528   for (i=0; i<m; i++) {
529     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
530     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
531     if (idxm[i] >= rstart && idxm[i] < rend) {
532       row = idxm[i] - rstart;
533       for (j=0; j<n; j++) {
534         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
535         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
536         if (idxn[j] >= cstart && idxn[j] < cend){
537           col = idxn[j] - cstart;
538           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
539         } else {
540           if (!aij->colmap) {
541             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
542           }
543 #if defined (PETSC_USE_CTABLE)
544           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
545           col --;
546 #else
547           col = aij->colmap[idxn[j]] - 1;
548 #endif
549           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
550           else {
551             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
552           }
553         }
554       }
555     } else {
556       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
557     }
558   }
559   PetscFunctionReturn(0);
560 }
561 
562 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
563 
564 #undef __FUNCT__
565 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
566 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
567 {
568   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
569   PetscErrorCode ierr;
570   PetscInt       nstash,reallocs;
571   InsertMode     addv;
572 
573   PetscFunctionBegin;
574   if (aij->donotstash || mat->nooffprocentries) {
575     PetscFunctionReturn(0);
576   }
577 
578   /* make sure all processors are either in INSERTMODE or ADDMODE */
579   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
580   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
581   mat->insertmode = addv; /* in case this processor had no cache */
582 
583   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
584   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
585   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
591 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
592 {
593   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
594   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
595   PetscErrorCode ierr;
596   PetscMPIInt    n;
597   PetscInt       i,j,rstart,ncols,flg;
598   PetscInt       *row,*col;
599   PetscBool      other_disassembled;
600   PetscScalar    *val;
601   InsertMode     addv = mat->insertmode;
602 
603   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
604   PetscFunctionBegin;
605   if (!aij->donotstash && !mat->nooffprocentries) {
606     while (1) {
607       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
608       if (!flg) break;
609 
610       for (i=0; i<n;) {
611         /* Now identify the consecutive vals belonging to the same row */
612         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
613         if (j < n) ncols = j-i;
614         else       ncols = n-i;
615         /* Now assemble all these values with a single function call */
616         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
617         i = j;
618       }
619     }
620     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
621   }
622   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
623   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
624 
625   /* determine if any processor has disassembled, if so we must
626      also disassemble ourselfs, in order that we may reassemble. */
627   /*
628      if nonzero structure of submatrix B cannot change then we know that
629      no processor disassembled thus we can skip this stuff
630   */
631   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
632     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
633     if (mat->was_assembled && !other_disassembled) {
634       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
635     }
636   }
637   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
638     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
639   }
640   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
641   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
642   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
643   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
644 
645   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
646   aij->rowvalues = 0;
647 
648   /* used by MatAXPY() */
649   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
650   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
651 
652   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
653   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
659 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
660 {
661   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
662   PetscErrorCode ierr;
663 
664   PetscFunctionBegin;
665   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
666   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
667   PetscFunctionReturn(0);
668 }
669 
670 #undef __FUNCT__
671 #define __FUNCT__ "MatZeroRows_MPIAIJ"
672 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
673 {
674   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
675   PetscErrorCode    ierr;
676   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
677   PetscInt          i,*owners = A->rmap->range;
678   PetscInt          *nprocs,j,idx,nsends,row;
679   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
680   PetscInt          *rvalues,count,base,slen,*source;
681   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
682   MPI_Comm          comm = ((PetscObject)A)->comm;
683   MPI_Request       *send_waits,*recv_waits;
684   MPI_Status        recv_status,*send_status;
685   const PetscScalar *xx;
686   PetscScalar       *bb;
687 #if defined(PETSC_DEBUG)
688   PetscBool      found = PETSC_FALSE;
689 #endif
690 
691   PetscFunctionBegin;
692   /*  first count number of contributors to each processor */
693   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
694   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
695   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
696   j = 0;
697   for (i=0; i<N; i++) {
698     if (lastidx > (idx = rows[i])) j = 0;
699     lastidx = idx;
700     for (; j<size; j++) {
701       if (idx >= owners[j] && idx < owners[j+1]) {
702         nprocs[2*j]++;
703         nprocs[2*j+1] = 1;
704         owner[i] = j;
705 #if defined(PETSC_DEBUG)
706         found = PETSC_TRUE;
707 #endif
708         break;
709       }
710     }
711 #if defined(PETSC_DEBUG)
712     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
713     found = PETSC_FALSE;
714 #endif
715   }
716   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
717 
718   if (A->nooffproczerorows) {
719     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
720     nrecvs = nsends;
721     nmax   = N;
722   } else {
723     /* inform other processors of number of messages and max length*/
724     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
725   }
726 
727   /* post receives:   */
728   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
729   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
730   for (i=0; i<nrecvs; i++) {
731     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
732   }
733 
734   /* do sends:
735       1) starts[i] gives the starting index in svalues for stuff going to
736          the ith processor
737   */
738   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
739   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
740   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
741   starts[0] = 0;
742   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
743   for (i=0; i<N; i++) {
744     svalues[starts[owner[i]]++] = rows[i];
745   }
746 
747   starts[0] = 0;
748   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
749   count = 0;
750   for (i=0; i<size; i++) {
751     if (nprocs[2*i+1]) {
752       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
753     }
754   }
755   ierr = PetscFree(starts);CHKERRQ(ierr);
756 
757   base = owners[rank];
758 
759   /*  wait on receives */
760   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
761   count  = nrecvs; slen = 0;
762   while (count) {
763     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
764     /* unpack receives into our local space */
765     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
766     source[imdex]  = recv_status.MPI_SOURCE;
767     lens[imdex]    = n;
768     slen          += n;
769     count--;
770   }
771   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
772 
773   /* move the data into the send scatter */
774   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
775   count = 0;
776   for (i=0; i<nrecvs; i++) {
777     values = rvalues + i*nmax;
778     for (j=0; j<lens[i]; j++) {
779       lrows[count++] = values[j] - base;
780     }
781   }
782   ierr = PetscFree(rvalues);CHKERRQ(ierr);
783   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
784   ierr = PetscFree(owner);CHKERRQ(ierr);
785   ierr = PetscFree(nprocs);CHKERRQ(ierr);
786 
787   /* fix right hand side if needed */
788   if (x && b) {
789     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
790     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
791     for (i=0; i<slen; i++) {
792       bb[lrows[i]] = diag*xx[lrows[i]];
793     }
794     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
795     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
796   }
797   /*
798         Zero the required rows. If the "diagonal block" of the matrix
799      is square and the user wishes to set the diagonal we use separate
800      code so that MatSetValues() is not called for each diagonal allocating
801      new memory, thus calling lots of mallocs and slowing things down.
802 
803   */
804   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
805   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
806   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
807     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
808   } else if (diag != 0.0) {
809     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
810     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
811       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
812 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
813     }
814     for (i = 0; i < slen; i++) {
815       row  = lrows[i] + rstart;
816       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
817     }
818     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
819     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
820   } else {
821     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
822   }
823   ierr = PetscFree(lrows);CHKERRQ(ierr);
824 
825   /* wait on sends */
826   if (nsends) {
827     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
828     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
829     ierr = PetscFree(send_status);CHKERRQ(ierr);
830   }
831   ierr = PetscFree(send_waits);CHKERRQ(ierr);
832   ierr = PetscFree(svalues);CHKERRQ(ierr);
833 
834   PetscFunctionReturn(0);
835 }
836 
837 #undef __FUNCT__
838 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
839 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
840 {
841   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
842   PetscErrorCode    ierr;
843   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
844   PetscInt          i,*owners = A->rmap->range;
845   PetscInt          *nprocs,j,idx,nsends;
846   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
847   PetscInt          *rvalues,count,base,slen,*source;
848   PetscInt          *lens,*lrows,*values,m;
849   MPI_Comm          comm = ((PetscObject)A)->comm;
850   MPI_Request       *send_waits,*recv_waits;
851   MPI_Status        recv_status,*send_status;
852   const PetscScalar *xx;
853   PetscScalar       *bb,*mask;
854   Vec               xmask,lmask;
855   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
856   const PetscInt    *aj, *ii,*ridx;
857   PetscScalar       *aa;
858 #if defined(PETSC_DEBUG)
859   PetscBool         found = PETSC_FALSE;
860 #endif
861 
862   PetscFunctionBegin;
863   /*  first count number of contributors to each processor */
864   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
865   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
866   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
867   j = 0;
868   for (i=0; i<N; i++) {
869     if (lastidx > (idx = rows[i])) j = 0;
870     lastidx = idx;
871     for (; j<size; j++) {
872       if (idx >= owners[j] && idx < owners[j+1]) {
873         nprocs[2*j]++;
874         nprocs[2*j+1] = 1;
875         owner[i] = j;
876 #if defined(PETSC_DEBUG)
877         found = PETSC_TRUE;
878 #endif
879         break;
880       }
881     }
882 #if defined(PETSC_DEBUG)
883     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
884     found = PETSC_FALSE;
885 #endif
886   }
887   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
888 
889   /* inform other processors of number of messages and max length*/
890   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
891 
892   /* post receives:   */
893   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
894   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
895   for (i=0; i<nrecvs; i++) {
896     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
897   }
898 
899   /* do sends:
900       1) starts[i] gives the starting index in svalues for stuff going to
901          the ith processor
902   */
903   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
904   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
905   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
906   starts[0] = 0;
907   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
908   for (i=0; i<N; i++) {
909     svalues[starts[owner[i]]++] = rows[i];
910   }
911 
912   starts[0] = 0;
913   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
914   count = 0;
915   for (i=0; i<size; i++) {
916     if (nprocs[2*i+1]) {
917       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
918     }
919   }
920   ierr = PetscFree(starts);CHKERRQ(ierr);
921 
922   base = owners[rank];
923 
924   /*  wait on receives */
925   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
926   count  = nrecvs; slen = 0;
927   while (count) {
928     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
929     /* unpack receives into our local space */
930     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
931     source[imdex]  = recv_status.MPI_SOURCE;
932     lens[imdex]    = n;
933     slen          += n;
934     count--;
935   }
936   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
937 
938   /* move the data into the send scatter */
939   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
940   count = 0;
941   for (i=0; i<nrecvs; i++) {
942     values = rvalues + i*nmax;
943     for (j=0; j<lens[i]; j++) {
944       lrows[count++] = values[j] - base;
945     }
946   }
947   ierr = PetscFree(rvalues);CHKERRQ(ierr);
948   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
949   ierr = PetscFree(owner);CHKERRQ(ierr);
950   ierr = PetscFree(nprocs);CHKERRQ(ierr);
951   /* lrows are the local rows to be zeroed, slen is the number of local rows */
952 
953   /* zero diagonal part of matrix */
954   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
955 
956   /* handle off diagonal part of matrix */
957   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
958   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
959   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
960   for (i=0; i<slen; i++) {
961     bb[lrows[i]] = 1;
962   }
963   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
964   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
965   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
966   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
967   ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
968   ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
969   ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
970   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
971   ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
972 
973   /* remove zeroed rows of off diagonal matrix */
974   ii = aij->i;
975   for (i=0; i<slen; i++) {
976     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
977   }
978 
979   /* loop over all elements of off process part of matrix zeroing removed columns*/
980   if (aij->compressedrow.use){
981     m    = aij->compressedrow.nrows;
982     ii   = aij->compressedrow.i;
983     ridx = aij->compressedrow.rindex;
984     for (i=0; i<m; i++){
985       n   = ii[i+1] - ii[i];
986       aj  = aij->j + ii[i];
987       aa  = aij->a + ii[i];
988 
989       for (j=0; j<n; j++) {
990         if (PetscAbsScalar(mask[*aj])) {
991           bb[*ridx] -= *aa*xx[*aj];
992           *aa        = 0.0;
993         }
994         aa++;
995         aj++;
996       }
997       ridx++;
998     }
999   } else { /* do not use compressed row format */
1000     m = l->B->rmap->n;
1001    for (i=0; i<m; i++) {
1002      n   = ii[i+1] - ii[i];
1003      aj  = aij->j + ii[i];
1004      aa  = aij->a + ii[i];
1005      for (j=0; j<n; j++) {
1006         if (PetscAbsScalar(mask[*aj])) {
1007           bb[i] -= *aa*xx[*aj];
1008           *aa    = 0.0;
1009         }
1010         aa++;
1011         aj++;
1012       }
1013     }
1014   }
1015 
1016   ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1017   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1018   ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1019   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1020   ierr = PetscFree(lrows);CHKERRQ(ierr);
1021 
1022   /* wait on sends */
1023   if (nsends) {
1024     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1025     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1026     ierr = PetscFree(send_status);CHKERRQ(ierr);
1027   }
1028   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1029   ierr = PetscFree(svalues);CHKERRQ(ierr);
1030 
1031   PetscFunctionReturn(0);
1032 }
1033 
1034 #undef __FUNCT__
1035 #define __FUNCT__ "MatMult_MPIAIJ"
1036 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1037 {
1038   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1039   PetscErrorCode ierr;
1040   PetscInt       nt;
1041 
1042   PetscFunctionBegin;
1043   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1044   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1045   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1046   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1047   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1048   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 #undef __FUNCT__
1053 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1054 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1055 {
1056   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1057   PetscErrorCode ierr;
1058 
1059   PetscFunctionBegin;
1060   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 #undef __FUNCT__
1065 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1066 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1067 {
1068   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1069   PetscErrorCode ierr;
1070 
1071   PetscFunctionBegin;
1072   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1073   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1074   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1075   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 #undef __FUNCT__
1080 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1081 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1082 {
1083   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1084   PetscErrorCode ierr;
1085   PetscBool      merged;
1086 
1087   PetscFunctionBegin;
1088   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1089   /* do nondiagonal part */
1090   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1091   if (!merged) {
1092     /* send it on its way */
1093     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1094     /* do local part */
1095     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1096     /* receive remote parts: note this assumes the values are not actually */
1097     /* added in yy until the next line, */
1098     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1099   } else {
1100     /* do local part */
1101     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1102     /* send it on its way */
1103     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1104     /* values actually were received in the Begin() but we need to call this nop */
1105     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1106   }
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 EXTERN_C_BEGIN
1111 #undef __FUNCT__
1112 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1113 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1114 {
1115   MPI_Comm       comm;
1116   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1117   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1118   IS             Me,Notme;
1119   PetscErrorCode ierr;
1120   PetscInt       M,N,first,last,*notme,i;
1121   PetscMPIInt    size;
1122 
1123   PetscFunctionBegin;
1124 
1125   /* Easy test: symmetric diagonal block */
1126   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1127   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1128   if (!*f) PetscFunctionReturn(0);
1129   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1130   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1131   if (size == 1) PetscFunctionReturn(0);
1132 
1133   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1134   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1135   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1136   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1137   for (i=0; i<first; i++) notme[i] = i;
1138   for (i=last; i<M; i++) notme[i-last+first] = i;
1139   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1140   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1141   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1142   Aoff = Aoffs[0];
1143   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1144   Boff = Boffs[0];
1145   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1146   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1147   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1148   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1149   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1150   ierr = PetscFree(notme);CHKERRQ(ierr);
1151    PetscFunctionReturn(0);
1152 }
1153 EXTERN_C_END
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1157 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1158 {
1159   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1160   PetscErrorCode ierr;
1161 
1162   PetscFunctionBegin;
1163   /* do nondiagonal part */
1164   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1165   /* send it on its way */
1166   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1167   /* do local part */
1168   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1169   /* receive remote parts */
1170   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 /*
1175   This only works correctly for square matrices where the subblock A->A is the
1176    diagonal block
1177 */
1178 #undef __FUNCT__
1179 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1180 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1181 {
1182   PetscErrorCode ierr;
1183   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1184 
1185   PetscFunctionBegin;
1186   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1187   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1188   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatScale_MPIAIJ"
1194 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1195 {
1196   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1197   PetscErrorCode ierr;
1198 
1199   PetscFunctionBegin;
1200   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1201   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 #undef __FUNCT__
1206 #define __FUNCT__ "MatDestroy_MPIAIJ"
1207 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1208 {
1209   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1210   PetscErrorCode ierr;
1211 
1212   PetscFunctionBegin;
1213 #if defined(PETSC_USE_LOG)
1214   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1215 #endif
1216   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1217   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1218   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1219   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1220 #if defined (PETSC_USE_CTABLE)
1221   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1222 #else
1223   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1224 #endif
1225   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1226   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1227   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1228   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1229   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1230   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1231 
1232   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1233   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1235   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1236   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1237   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1238   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1239   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1240   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1246 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1247 {
1248   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1249   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1250   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1251   PetscErrorCode    ierr;
1252   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1253   int               fd;
1254   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1255   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1256   PetscScalar       *column_values;
1257   PetscInt          message_count,flowcontrolcount;
1258 
1259   PetscFunctionBegin;
1260   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1261   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1262   nz   = A->nz + B->nz;
1263   if (!rank) {
1264     header[0] = MAT_FILE_CLASSID;
1265     header[1] = mat->rmap->N;
1266     header[2] = mat->cmap->N;
1267     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1268     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1269     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1270     /* get largest number of rows any processor has */
1271     rlen = mat->rmap->n;
1272     range = mat->rmap->range;
1273     for (i=1; i<size; i++) {
1274       rlen = PetscMax(rlen,range[i+1] - range[i]);
1275     }
1276   } else {
1277     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1278     rlen = mat->rmap->n;
1279   }
1280 
1281   /* load up the local row counts */
1282   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1283   for (i=0; i<mat->rmap->n; i++) {
1284     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1285   }
1286 
1287   /* store the row lengths to the file */
1288   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1289   if (!rank) {
1290     MPI_Status status;
1291     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1292     for (i=1; i<size; i++) {
1293       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1294       rlen = range[i+1] - range[i];
1295       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1296       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1297     }
1298     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1299   } else {
1300     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1301     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1302     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1303   }
1304   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1305 
1306   /* load up the local column indices */
1307   nzmax = nz; /* )th processor needs space a largest processor needs */
1308   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1309   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1310   cnt  = 0;
1311   for (i=0; i<mat->rmap->n; i++) {
1312     for (j=B->i[i]; j<B->i[i+1]; j++) {
1313       if ( (col = garray[B->j[j]]) > cstart) break;
1314       column_indices[cnt++] = col;
1315     }
1316     for (k=A->i[i]; k<A->i[i+1]; k++) {
1317       column_indices[cnt++] = A->j[k] + cstart;
1318     }
1319     for (; j<B->i[i+1]; j++) {
1320       column_indices[cnt++] = garray[B->j[j]];
1321     }
1322   }
1323   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1324 
1325   /* store the column indices to the file */
1326    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1327   if (!rank) {
1328     MPI_Status status;
1329     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1330     for (i=1; i<size; i++) {
1331       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1332       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1333       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1334       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1335       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1336     }
1337      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1338   } else {
1339     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1340     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1341     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1342     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1343   }
1344   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1345 
1346   /* load up the local column values */
1347   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1348   cnt  = 0;
1349   for (i=0; i<mat->rmap->n; i++) {
1350     for (j=B->i[i]; j<B->i[i+1]; j++) {
1351       if ( garray[B->j[j]] > cstart) break;
1352       column_values[cnt++] = B->a[j];
1353     }
1354     for (k=A->i[i]; k<A->i[i+1]; k++) {
1355       column_values[cnt++] = A->a[k];
1356     }
1357     for (; j<B->i[i+1]; j++) {
1358       column_values[cnt++] = B->a[j];
1359     }
1360   }
1361   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1362 
1363   /* store the column values to the file */
1364    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1365   if (!rank) {
1366     MPI_Status status;
1367     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1368     for (i=1; i<size; i++) {
1369        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1370       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1371       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1372       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1373       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1374     }
1375     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1376   } else {
1377     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1378     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1379     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1380     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1381   }
1382   ierr = PetscFree(column_values);CHKERRQ(ierr);
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 #undef __FUNCT__
1387 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1388 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1389 {
1390   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1391   PetscErrorCode    ierr;
1392   PetscMPIInt       rank = aij->rank,size = aij->size;
1393   PetscBool         isdraw,iascii,isbinary;
1394   PetscViewer       sviewer;
1395   PetscViewerFormat format;
1396 
1397   PetscFunctionBegin;
1398   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1399   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1400   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1401   if (iascii) {
1402     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1403     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1404       MatInfo    info;
1405       PetscBool  inodes;
1406 
1407       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1408       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1409       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1410       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1411       if (!inodes) {
1412         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1413 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1414       } else {
1415         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1416 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1417       }
1418       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1419       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1420       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1421       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1422       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1423       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1424       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1425       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1426       PetscFunctionReturn(0);
1427     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1428       PetscInt   inodecount,inodelimit,*inodes;
1429       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1430       if (inodes) {
1431         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1432       } else {
1433         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1434       }
1435       PetscFunctionReturn(0);
1436     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1437       PetscFunctionReturn(0);
1438     }
1439   } else if (isbinary) {
1440     if (size == 1) {
1441       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1442       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1443     } else {
1444       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1445     }
1446     PetscFunctionReturn(0);
1447   } else if (isdraw) {
1448     PetscDraw  draw;
1449     PetscBool  isnull;
1450     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1451     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1452   }
1453 
1454   if (size == 1) {
1455     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1456     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1457   } else {
1458     /* assemble the entire matrix onto first processor. */
1459     Mat         A;
1460     Mat_SeqAIJ  *Aloc;
1461     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1462     MatScalar   *a;
1463 
1464     if (mat->rmap->N > 1024) {
1465       PetscBool  flg = PETSC_FALSE;
1466 
1467       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1468       if (!flg) {
1469         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1470       }
1471     }
1472 
1473     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1474     if (!rank) {
1475       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1476     } else {
1477       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1478     }
1479     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1480     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1481     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1482     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1483 
1484     /* copy over the A part */
1485     Aloc = (Mat_SeqAIJ*)aij->A->data;
1486     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1487     row = mat->rmap->rstart;
1488     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1489     for (i=0; i<m; i++) {
1490       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1491       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1492     }
1493     aj = Aloc->j;
1494     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1495 
1496     /* copy over the B part */
1497     Aloc = (Mat_SeqAIJ*)aij->B->data;
1498     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1499     row  = mat->rmap->rstart;
1500     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1501     ct   = cols;
1502     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1503     for (i=0; i<m; i++) {
1504       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1505       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1506     }
1507     ierr = PetscFree(ct);CHKERRQ(ierr);
1508     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1509     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1510     /*
1511        Everyone has to call to draw the matrix since the graphics waits are
1512        synchronized across all processors that share the PetscDraw object
1513     */
1514     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1515     if (!rank) {
1516       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1517       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1518       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1519       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1520     }
1521     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1522     ierr = MatDestroy(&A);CHKERRQ(ierr);
1523   }
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 #undef __FUNCT__
1528 #define __FUNCT__ "MatView_MPIAIJ"
1529 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1530 {
1531   PetscErrorCode ierr;
1532   PetscBool      iascii,isdraw,issocket,isbinary;
1533 
1534   PetscFunctionBegin;
1535   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1536   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1537   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1538   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1539   if (iascii || isdraw || isbinary || issocket) {
1540     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1541   } else {
1542     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1543   }
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 #undef __FUNCT__
1548 #define __FUNCT__ "MatSOR_MPIAIJ"
1549 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1550 {
1551   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1552   PetscErrorCode ierr;
1553   Vec            bb1 = 0;
1554   PetscBool      hasop;
1555 
1556   PetscFunctionBegin;
1557   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1558     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1559   }
1560 
1561   if (flag == SOR_APPLY_UPPER) {
1562     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1563     PetscFunctionReturn(0);
1564   }
1565 
1566   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1567     if (flag & SOR_ZERO_INITIAL_GUESS) {
1568       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1569       its--;
1570     }
1571 
1572     while (its--) {
1573       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1574       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1575 
1576       /* update rhs: bb1 = bb - B*x */
1577       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1578       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1579 
1580       /* local sweep */
1581       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1582     }
1583   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1584     if (flag & SOR_ZERO_INITIAL_GUESS) {
1585       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1586       its--;
1587     }
1588     while (its--) {
1589       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1590       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1591 
1592       /* update rhs: bb1 = bb - B*x */
1593       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1594       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1595 
1596       /* local sweep */
1597       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1598     }
1599   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1600     if (flag & SOR_ZERO_INITIAL_GUESS) {
1601       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1602       its--;
1603     }
1604     while (its--) {
1605       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1606       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1607 
1608       /* update rhs: bb1 = bb - B*x */
1609       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1610       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1611 
1612       /* local sweep */
1613       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1614     }
1615   }  else if (flag & SOR_EISENSTAT) {
1616     Vec         xx1;
1617 
1618     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1619     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1620 
1621     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1622     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1623     if (!mat->diag) {
1624       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1625       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1626     }
1627     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1628     if (hasop) {
1629       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1630     } else {
1631       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1632     }
1633     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1634 
1635     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1636 
1637     /* local sweep */
1638     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1639     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1640     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1641   } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1642 
1643   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1644   PetscFunctionReturn(0);
1645 }
1646 
1647 #undef __FUNCT__
1648 #define __FUNCT__ "MatPermute_MPIAIJ"
1649 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1650 {
1651   MPI_Comm       comm,pcomm;
1652   PetscInt       first,local_size,nrows;
1653   const PetscInt *rows;
1654   PetscMPIInt    size;
1655   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1656   PetscErrorCode ierr;
1657 
1658   PetscFunctionBegin;
1659   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1660   /* make a collective version of 'rowp' */
1661   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1662   if (pcomm==comm) {
1663     crowp = rowp;
1664   } else {
1665     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1666     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1667     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1668     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1669   }
1670   /* collect the global row permutation and invert it */
1671   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1672   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1673   if (pcomm!=comm) {
1674     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1675   }
1676   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1677   /* get the local target indices */
1678   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1679   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1680   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1681   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1682   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1683   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1684   /* the column permutation is so much easier;
1685      make a local version of 'colp' and invert it */
1686   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1687   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1688   if (size==1) {
1689     lcolp = colp;
1690   } else {
1691     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1692     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1693     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1694   }
1695   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1696   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1697   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1698   if (size>1) {
1699     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1700     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1701   }
1702   /* now we just get the submatrix */
1703   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1704   /* clean up */
1705   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1706   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1707   PetscFunctionReturn(0);
1708 }
1709 
1710 #undef __FUNCT__
1711 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1712 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1713 {
1714   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1715   Mat            A = mat->A,B = mat->B;
1716   PetscErrorCode ierr;
1717   PetscReal      isend[5],irecv[5];
1718 
1719   PetscFunctionBegin;
1720   info->block_size     = 1.0;
1721   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1722   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1723   isend[3] = info->memory;  isend[4] = info->mallocs;
1724   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1725   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1726   isend[3] += info->memory;  isend[4] += info->mallocs;
1727   if (flag == MAT_LOCAL) {
1728     info->nz_used      = isend[0];
1729     info->nz_allocated = isend[1];
1730     info->nz_unneeded  = isend[2];
1731     info->memory       = isend[3];
1732     info->mallocs      = isend[4];
1733   } else if (flag == MAT_GLOBAL_MAX) {
1734     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1735     info->nz_used      = irecv[0];
1736     info->nz_allocated = irecv[1];
1737     info->nz_unneeded  = irecv[2];
1738     info->memory       = irecv[3];
1739     info->mallocs      = irecv[4];
1740   } else if (flag == MAT_GLOBAL_SUM) {
1741     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1742     info->nz_used      = irecv[0];
1743     info->nz_allocated = irecv[1];
1744     info->nz_unneeded  = irecv[2];
1745     info->memory       = irecv[3];
1746     info->mallocs      = irecv[4];
1747   }
1748   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1749   info->fill_ratio_needed = 0;
1750   info->factor_mallocs    = 0;
1751 
1752   PetscFunctionReturn(0);
1753 }
1754 
1755 #undef __FUNCT__
1756 #define __FUNCT__ "MatSetOption_MPIAIJ"
1757 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1758 {
1759   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1760   PetscErrorCode ierr;
1761 
1762   PetscFunctionBegin;
1763   switch (op) {
1764   case MAT_NEW_NONZERO_LOCATIONS:
1765   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1766   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1767   case MAT_KEEP_NONZERO_PATTERN:
1768   case MAT_NEW_NONZERO_LOCATION_ERR:
1769   case MAT_USE_INODES:
1770   case MAT_IGNORE_ZERO_ENTRIES:
1771     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1772     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1773     break;
1774   case MAT_ROW_ORIENTED:
1775     a->roworiented = flg;
1776     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1777     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1778     break;
1779   case MAT_NEW_DIAGONALS:
1780     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1781     break;
1782   case MAT_IGNORE_OFF_PROC_ENTRIES:
1783     a->donotstash = PETSC_TRUE;
1784     break;
1785   case MAT_SPD:
1786     A->spd_set                         = PETSC_TRUE;
1787     A->spd                             = flg;
1788     if (flg) {
1789       A->symmetric                     = PETSC_TRUE;
1790       A->structurally_symmetric        = PETSC_TRUE;
1791       A->symmetric_set                 = PETSC_TRUE;
1792       A->structurally_symmetric_set    = PETSC_TRUE;
1793     }
1794     break;
1795   case MAT_SYMMETRIC:
1796     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1797     break;
1798   case MAT_STRUCTURALLY_SYMMETRIC:
1799     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1800     break;
1801   case MAT_HERMITIAN:
1802     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1803     break;
1804   case MAT_SYMMETRY_ETERNAL:
1805     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1806     break;
1807   default:
1808     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1809   }
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatGetRow_MPIAIJ"
1815 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1816 {
1817   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1818   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1819   PetscErrorCode ierr;
1820   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1821   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1822   PetscInt       *cmap,*idx_p;
1823 
1824   PetscFunctionBegin;
1825   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1826   mat->getrowactive = PETSC_TRUE;
1827 
1828   if (!mat->rowvalues && (idx || v)) {
1829     /*
1830         allocate enough space to hold information from the longest row.
1831     */
1832     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1833     PetscInt   max = 1,tmp;
1834     for (i=0; i<matin->rmap->n; i++) {
1835       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1836       if (max < tmp) { max = tmp; }
1837     }
1838     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1839   }
1840 
1841   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1842   lrow = row - rstart;
1843 
1844   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1845   if (!v)   {pvA = 0; pvB = 0;}
1846   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1847   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1848   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1849   nztot = nzA + nzB;
1850 
1851   cmap  = mat->garray;
1852   if (v  || idx) {
1853     if (nztot) {
1854       /* Sort by increasing column numbers, assuming A and B already sorted */
1855       PetscInt imark = -1;
1856       if (v) {
1857         *v = v_p = mat->rowvalues;
1858         for (i=0; i<nzB; i++) {
1859           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1860           else break;
1861         }
1862         imark = i;
1863         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1864         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1865       }
1866       if (idx) {
1867         *idx = idx_p = mat->rowindices;
1868         if (imark > -1) {
1869           for (i=0; i<imark; i++) {
1870             idx_p[i] = cmap[cworkB[i]];
1871           }
1872         } else {
1873           for (i=0; i<nzB; i++) {
1874             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1875             else break;
1876           }
1877           imark = i;
1878         }
1879         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1880         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1881       }
1882     } else {
1883       if (idx) *idx = 0;
1884       if (v)   *v   = 0;
1885     }
1886   }
1887   *nz = nztot;
1888   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1889   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 #undef __FUNCT__
1894 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1895 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1896 {
1897   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1898 
1899   PetscFunctionBegin;
1900   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1901   aij->getrowactive = PETSC_FALSE;
1902   PetscFunctionReturn(0);
1903 }
1904 
1905 #undef __FUNCT__
1906 #define __FUNCT__ "MatNorm_MPIAIJ"
1907 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1908 {
1909   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1910   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1911   PetscErrorCode ierr;
1912   PetscInt       i,j,cstart = mat->cmap->rstart;
1913   PetscReal      sum = 0.0;
1914   MatScalar      *v;
1915 
1916   PetscFunctionBegin;
1917   if (aij->size == 1) {
1918     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1919   } else {
1920     if (type == NORM_FROBENIUS) {
1921       v = amat->a;
1922       for (i=0; i<amat->nz; i++) {
1923 #if defined(PETSC_USE_COMPLEX)
1924         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1925 #else
1926         sum += (*v)*(*v); v++;
1927 #endif
1928       }
1929       v = bmat->a;
1930       for (i=0; i<bmat->nz; i++) {
1931 #if defined(PETSC_USE_COMPLEX)
1932         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1933 #else
1934         sum += (*v)*(*v); v++;
1935 #endif
1936       }
1937       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1938       *norm = sqrt(*norm);
1939     } else if (type == NORM_1) { /* max column norm */
1940       PetscReal *tmp,*tmp2;
1941       PetscInt  *jj,*garray = aij->garray;
1942       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1943       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1944       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1945       *norm = 0.0;
1946       v = amat->a; jj = amat->j;
1947       for (j=0; j<amat->nz; j++) {
1948         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1949       }
1950       v = bmat->a; jj = bmat->j;
1951       for (j=0; j<bmat->nz; j++) {
1952         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1953       }
1954       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1955       for (j=0; j<mat->cmap->N; j++) {
1956         if (tmp2[j] > *norm) *norm = tmp2[j];
1957       }
1958       ierr = PetscFree(tmp);CHKERRQ(ierr);
1959       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1960     } else if (type == NORM_INFINITY) { /* max row norm */
1961       PetscReal ntemp = 0.0;
1962       for (j=0; j<aij->A->rmap->n; j++) {
1963         v = amat->a + amat->i[j];
1964         sum = 0.0;
1965         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1966           sum += PetscAbsScalar(*v); v++;
1967         }
1968         v = bmat->a + bmat->i[j];
1969         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1970           sum += PetscAbsScalar(*v); v++;
1971         }
1972         if (sum > ntemp) ntemp = sum;
1973       }
1974       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1975     } else {
1976       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1977     }
1978   }
1979   PetscFunctionReturn(0);
1980 }
1981 
1982 #undef __FUNCT__
1983 #define __FUNCT__ "MatTranspose_MPIAIJ"
1984 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1985 {
1986   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1987   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1988   PetscErrorCode ierr;
1989   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1990   PetscInt       cstart=A->cmap->rstart,ncol;
1991   Mat            B;
1992   MatScalar      *array;
1993 
1994   PetscFunctionBegin;
1995   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1996 
1997   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1998   ai = Aloc->i; aj = Aloc->j;
1999   bi = Bloc->i; bj = Bloc->j;
2000   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2001     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2002     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2003     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2004     for (i=0; i<ai[ma]; i++){
2005       d_nnz[aj[i]] ++;
2006       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2007     }
2008 
2009     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2010     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2011     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2012     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2013     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2014   } else {
2015     B = *matout;
2016   }
2017 
2018   /* copy over the A part */
2019   array = Aloc->a;
2020   row = A->rmap->rstart;
2021   for (i=0; i<ma; i++) {
2022     ncol = ai[i+1]-ai[i];
2023     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2024     row++; array += ncol; aj += ncol;
2025   }
2026   aj = Aloc->j;
2027   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2028 
2029   /* copy over the B part */
2030   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2031   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2032   array = Bloc->a;
2033   row = A->rmap->rstart;
2034   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2035   cols_tmp = cols;
2036   for (i=0; i<mb; i++) {
2037     ncol = bi[i+1]-bi[i];
2038     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2039     row++; array += ncol; cols_tmp += ncol;
2040   }
2041   ierr = PetscFree(cols);CHKERRQ(ierr);
2042 
2043   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2044   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2045   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2046     *matout = B;
2047   } else {
2048     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2049   }
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 #undef __FUNCT__
2054 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2055 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2056 {
2057   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2058   Mat            a = aij->A,b = aij->B;
2059   PetscErrorCode ierr;
2060   PetscInt       s1,s2,s3;
2061 
2062   PetscFunctionBegin;
2063   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2064   if (rr) {
2065     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2066     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2067     /* Overlap communication with computation. */
2068     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2069   }
2070   if (ll) {
2071     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2072     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2073     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2074   }
2075   /* scale  the diagonal block */
2076   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2077 
2078   if (rr) {
2079     /* Do a scatter end and then right scale the off-diagonal block */
2080     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2081     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2082   }
2083 
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 #undef __FUNCT__
2088 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2089 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2090 {
2091   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2092   PetscErrorCode ierr;
2093 
2094   PetscFunctionBegin;
2095   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2096   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2097   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2098   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2099   PetscFunctionReturn(0);
2100 }
2101 #undef __FUNCT__
2102 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2103 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2104 {
2105   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2106   PetscErrorCode ierr;
2107 
2108   PetscFunctionBegin;
2109   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2110   PetscFunctionReturn(0);
2111 }
2112 
2113 #undef __FUNCT__
2114 #define __FUNCT__ "MatEqual_MPIAIJ"
2115 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2116 {
2117   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2118   Mat            a,b,c,d;
2119   PetscBool      flg;
2120   PetscErrorCode ierr;
2121 
2122   PetscFunctionBegin;
2123   a = matA->A; b = matA->B;
2124   c = matB->A; d = matB->B;
2125 
2126   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2127   if (flg) {
2128     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2129   }
2130   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 #undef __FUNCT__
2135 #define __FUNCT__ "MatCopy_MPIAIJ"
2136 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2137 {
2138   PetscErrorCode ierr;
2139   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2140   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2141 
2142   PetscFunctionBegin;
2143   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2144   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2145     /* because of the column compression in the off-processor part of the matrix a->B,
2146        the number of columns in a->B and b->B may be different, hence we cannot call
2147        the MatCopy() directly on the two parts. If need be, we can provide a more
2148        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2149        then copying the submatrices */
2150     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2151   } else {
2152     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2153     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2154   }
2155   PetscFunctionReturn(0);
2156 }
2157 
2158 #undef __FUNCT__
2159 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2160 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2161 {
2162   PetscErrorCode ierr;
2163 
2164   PetscFunctionBegin;
2165   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2166   PetscFunctionReturn(0);
2167 }
2168 
2169 #undef __FUNCT__
2170 #define __FUNCT__ "MatAXPY_MPIAIJ"
2171 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2172 {
2173   PetscErrorCode ierr;
2174   PetscInt       i;
2175   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2176   PetscBLASInt   bnz,one=1;
2177   Mat_SeqAIJ     *x,*y;
2178 
2179   PetscFunctionBegin;
2180   if (str == SAME_NONZERO_PATTERN) {
2181     PetscScalar alpha = a;
2182     x = (Mat_SeqAIJ *)xx->A->data;
2183     y = (Mat_SeqAIJ *)yy->A->data;
2184     bnz = PetscBLASIntCast(x->nz);
2185     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2186     x = (Mat_SeqAIJ *)xx->B->data;
2187     y = (Mat_SeqAIJ *)yy->B->data;
2188     bnz = PetscBLASIntCast(x->nz);
2189     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2190   } else if (str == SUBSET_NONZERO_PATTERN) {
2191     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2192 
2193     x = (Mat_SeqAIJ *)xx->B->data;
2194     y = (Mat_SeqAIJ *)yy->B->data;
2195     if (y->xtoy && y->XtoY != xx->B) {
2196       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2197       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2198     }
2199     if (!y->xtoy) { /* get xtoy */
2200       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2201       y->XtoY = xx->B;
2202       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2203     }
2204     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2205   } else {
2206     Mat B;
2207     PetscInt *nnz_d,*nnz_o;
2208     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2209     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2210     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2211     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2212     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2213     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2214     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2215     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2216     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
2217     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2218     ierr = MatHeaderReplace(Y,B);
2219     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2220     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2221   }
2222   PetscFunctionReturn(0);
2223 }
2224 
2225 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2226 
2227 #undef __FUNCT__
2228 #define __FUNCT__ "MatConjugate_MPIAIJ"
2229 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2230 {
2231 #if defined(PETSC_USE_COMPLEX)
2232   PetscErrorCode ierr;
2233   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2234 
2235   PetscFunctionBegin;
2236   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2237   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2238 #else
2239   PetscFunctionBegin;
2240 #endif
2241   PetscFunctionReturn(0);
2242 }
2243 
2244 #undef __FUNCT__
2245 #define __FUNCT__ "MatRealPart_MPIAIJ"
2246 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2247 {
2248   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2249   PetscErrorCode ierr;
2250 
2251   PetscFunctionBegin;
2252   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2253   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2259 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2260 {
2261   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2262   PetscErrorCode ierr;
2263 
2264   PetscFunctionBegin;
2265   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2266   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2267   PetscFunctionReturn(0);
2268 }
2269 
2270 #ifdef PETSC_HAVE_PBGL
2271 
2272 #include <boost/parallel/mpi/bsp_process_group.hpp>
2273 #include <boost/graph/distributed/ilu_default_graph.hpp>
2274 #include <boost/graph/distributed/ilu_0_block.hpp>
2275 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2276 #include <boost/graph/distributed/petsc/interface.hpp>
2277 #include <boost/multi_array.hpp>
2278 #include <boost/parallel/distributed_property_map->hpp>
2279 
2280 #undef __FUNCT__
2281 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2282 /*
2283   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2284 */
2285 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2286 {
2287   namespace petsc = boost::distributed::petsc;
2288 
2289   namespace graph_dist = boost::graph::distributed;
2290   using boost::graph::distributed::ilu_default::process_group_type;
2291   using boost::graph::ilu_permuted;
2292 
2293   PetscBool       row_identity, col_identity;
2294   PetscContainer  c;
2295   PetscInt        m, n, M, N;
2296   PetscErrorCode  ierr;
2297 
2298   PetscFunctionBegin;
2299   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2300   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2301   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2302   if (!row_identity || !col_identity) {
2303     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2304   }
2305 
2306   process_group_type pg;
2307   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2308   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2309   lgraph_type&   level_graph = *lgraph_p;
2310   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2311 
2312   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2313   ilu_permuted(level_graph);
2314 
2315   /* put together the new matrix */
2316   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2317   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2318   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2319   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2320   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2321   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2322   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2323 
2324   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2325   ierr = PetscContainerSetPointer(c, lgraph_p);
2326   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2327   ierr = PetscContainerDestroy(&c);
2328   PetscFunctionReturn(0);
2329 }
2330 
2331 #undef __FUNCT__
2332 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2333 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2334 {
2335   PetscFunctionBegin;
2336   PetscFunctionReturn(0);
2337 }
2338 
2339 #undef __FUNCT__
2340 #define __FUNCT__ "MatSolve_MPIAIJ"
2341 /*
2342   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2343 */
2344 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2345 {
2346   namespace graph_dist = boost::graph::distributed;
2347 
2348   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2349   lgraph_type*   lgraph_p;
2350   PetscContainer c;
2351   PetscErrorCode ierr;
2352 
2353   PetscFunctionBegin;
2354   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2355   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2356   ierr = VecCopy(b, x);CHKERRQ(ierr);
2357 
2358   PetscScalar* array_x;
2359   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2360   PetscInt sx;
2361   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2362 
2363   PetscScalar* array_b;
2364   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2365   PetscInt sb;
2366   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2367 
2368   lgraph_type&   level_graph = *lgraph_p;
2369   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2370 
2371   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2372   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2373                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2374 
2375   typedef boost::iterator_property_map<array_ref_type::iterator,
2376                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2377   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2378                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2379 
2380   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2381 
2382   PetscFunctionReturn(0);
2383 }
2384 #endif
2385 
2386 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2387   PetscInt       nzlocal,nsends,nrecvs;
2388   PetscMPIInt    *send_rank,*recv_rank;
2389   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2390   PetscScalar    *sbuf_a,**rbuf_a;
2391   PetscErrorCode (*Destroy)(Mat);
2392 } Mat_Redundant;
2393 
2394 #undef __FUNCT__
2395 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2396 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2397 {
2398   PetscErrorCode       ierr;
2399   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2400   PetscInt             i;
2401 
2402   PetscFunctionBegin;
2403   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2404   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2405   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2406   for (i=0; i<redund->nrecvs; i++){
2407     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2408     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2409   }
2410   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2411   ierr = PetscFree(redund);CHKERRQ(ierr);
2412   PetscFunctionReturn(0);
2413 }
2414 
2415 #undef __FUNCT__
2416 #define __FUNCT__ "MatDestroy_MatRedundant"
2417 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2418 {
2419   PetscErrorCode  ierr;
2420   PetscContainer  container;
2421   Mat_Redundant   *redund=PETSC_NULL;
2422 
2423   PetscFunctionBegin;
2424   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2425   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2426   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2427   A->ops->destroy = redund->Destroy;
2428   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2429   if (A->ops->destroy) {
2430     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2431   }
2432   PetscFunctionReturn(0);
2433 }
2434 
2435 #undef __FUNCT__
2436 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2437 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2438 {
2439   PetscMPIInt    rank,size;
2440   MPI_Comm       comm=((PetscObject)mat)->comm;
2441   PetscErrorCode ierr;
2442   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2443   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2444   PetscInt       *rowrange=mat->rmap->range;
2445   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2446   Mat            A=aij->A,B=aij->B,C=*matredundant;
2447   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2448   PetscScalar    *sbuf_a;
2449   PetscInt       nzlocal=a->nz+b->nz;
2450   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2451   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2452   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2453   MatScalar      *aworkA,*aworkB;
2454   PetscScalar    *vals;
2455   PetscMPIInt    tag1,tag2,tag3,imdex;
2456   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2457                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2458   MPI_Status     recv_status,*send_status;
2459   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2460   PetscInt       **rbuf_j=PETSC_NULL;
2461   PetscScalar    **rbuf_a=PETSC_NULL;
2462   Mat_Redundant  *redund=PETSC_NULL;
2463   PetscContainer container;
2464 
2465   PetscFunctionBegin;
2466   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2467   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2468 
2469   if (reuse == MAT_REUSE_MATRIX) {
2470     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2471     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2472     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2473     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2474     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2475     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2476     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2477     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2478 
2479     nsends    = redund->nsends;
2480     nrecvs    = redund->nrecvs;
2481     send_rank = redund->send_rank;
2482     recv_rank = redund->recv_rank;
2483     sbuf_nz   = redund->sbuf_nz;
2484     rbuf_nz   = redund->rbuf_nz;
2485     sbuf_j    = redund->sbuf_j;
2486     sbuf_a    = redund->sbuf_a;
2487     rbuf_j    = redund->rbuf_j;
2488     rbuf_a    = redund->rbuf_a;
2489   }
2490 
2491   if (reuse == MAT_INITIAL_MATRIX){
2492     PetscMPIInt  subrank,subsize;
2493     PetscInt     nleftover,np_subcomm;
2494     /* get the destination processors' id send_rank, nsends and nrecvs */
2495     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2496     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2497     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2498     np_subcomm = size/nsubcomm;
2499     nleftover  = size - nsubcomm*np_subcomm;
2500     nsends = 0; nrecvs = 0;
2501     for (i=0; i<size; i++){ /* i=rank*/
2502       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2503         send_rank[nsends] = i; nsends++;
2504         recv_rank[nrecvs++] = i;
2505       }
2506     }
2507     if (rank >= size - nleftover){/* this proc is a leftover processor */
2508       i = size-nleftover-1;
2509       j = 0;
2510       while (j < nsubcomm - nleftover){
2511         send_rank[nsends++] = i;
2512         i--; j++;
2513       }
2514     }
2515 
2516     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2517       for (i=0; i<nleftover; i++){
2518         recv_rank[nrecvs++] = size-nleftover+i;
2519       }
2520     }
2521 
2522     /* allocate sbuf_j, sbuf_a */
2523     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2524     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2525     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2526   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2527 
2528   /* copy mat's local entries into the buffers */
2529   if (reuse == MAT_INITIAL_MATRIX){
2530     rownz_max = 0;
2531     rptr = sbuf_j;
2532     cols = sbuf_j + rend-rstart + 1;
2533     vals = sbuf_a;
2534     rptr[0] = 0;
2535     for (i=0; i<rend-rstart; i++){
2536       row = i + rstart;
2537       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2538       ncols  = nzA + nzB;
2539       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2540       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2541       /* load the column indices for this row into cols */
2542       lwrite = 0;
2543       for (l=0; l<nzB; l++) {
2544         if ((ctmp = bmap[cworkB[l]]) < cstart){
2545           vals[lwrite]   = aworkB[l];
2546           cols[lwrite++] = ctmp;
2547         }
2548       }
2549       for (l=0; l<nzA; l++){
2550         vals[lwrite]   = aworkA[l];
2551         cols[lwrite++] = cstart + cworkA[l];
2552       }
2553       for (l=0; l<nzB; l++) {
2554         if ((ctmp = bmap[cworkB[l]]) >= cend){
2555           vals[lwrite]   = aworkB[l];
2556           cols[lwrite++] = ctmp;
2557         }
2558       }
2559       vals += ncols;
2560       cols += ncols;
2561       rptr[i+1] = rptr[i] + ncols;
2562       if (rownz_max < ncols) rownz_max = ncols;
2563     }
2564     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2565   } else { /* only copy matrix values into sbuf_a */
2566     rptr = sbuf_j;
2567     vals = sbuf_a;
2568     rptr[0] = 0;
2569     for (i=0; i<rend-rstart; i++){
2570       row = i + rstart;
2571       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2572       ncols  = nzA + nzB;
2573       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2574       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2575       lwrite = 0;
2576       for (l=0; l<nzB; l++) {
2577         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2578       }
2579       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2580       for (l=0; l<nzB; l++) {
2581         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2582       }
2583       vals += ncols;
2584       rptr[i+1] = rptr[i] + ncols;
2585     }
2586   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2587 
2588   /* send nzlocal to others, and recv other's nzlocal */
2589   /*--------------------------------------------------*/
2590   if (reuse == MAT_INITIAL_MATRIX){
2591     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2592     s_waits2 = s_waits3 + nsends;
2593     s_waits1 = s_waits2 + nsends;
2594     r_waits1 = s_waits1 + nsends;
2595     r_waits2 = r_waits1 + nrecvs;
2596     r_waits3 = r_waits2 + nrecvs;
2597   } else {
2598     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2599     r_waits3 = s_waits3 + nsends;
2600   }
2601 
2602   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2603   if (reuse == MAT_INITIAL_MATRIX){
2604     /* get new tags to keep the communication clean */
2605     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2606     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2607     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2608 
2609     /* post receives of other's nzlocal */
2610     for (i=0; i<nrecvs; i++){
2611       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2612     }
2613     /* send nzlocal to others */
2614     for (i=0; i<nsends; i++){
2615       sbuf_nz[i] = nzlocal;
2616       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2617     }
2618     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2619     count = nrecvs;
2620     while (count) {
2621       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2622       recv_rank[imdex] = recv_status.MPI_SOURCE;
2623       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2624       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2625 
2626       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2627       rbuf_nz[imdex] += i + 2;
2628       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2629       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2630       count--;
2631     }
2632     /* wait on sends of nzlocal */
2633     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2634     /* send mat->i,j to others, and recv from other's */
2635     /*------------------------------------------------*/
2636     for (i=0; i<nsends; i++){
2637       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2638       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2639     }
2640     /* wait on receives of mat->i,j */
2641     /*------------------------------*/
2642     count = nrecvs;
2643     while (count) {
2644       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2645       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2646       count--;
2647     }
2648     /* wait on sends of mat->i,j */
2649     /*---------------------------*/
2650     if (nsends) {
2651       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2652     }
2653   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2654 
2655   /* post receives, send and receive mat->a */
2656   /*----------------------------------------*/
2657   for (imdex=0; imdex<nrecvs; imdex++) {
2658     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2659   }
2660   for (i=0; i<nsends; i++){
2661     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2662   }
2663   count = nrecvs;
2664   while (count) {
2665     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2666     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2667     count--;
2668   }
2669   if (nsends) {
2670     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2671   }
2672 
2673   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2674 
2675   /* create redundant matrix */
2676   /*-------------------------*/
2677   if (reuse == MAT_INITIAL_MATRIX){
2678     /* compute rownz_max for preallocation */
2679     for (imdex=0; imdex<nrecvs; imdex++){
2680       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2681       rptr = rbuf_j[imdex];
2682       for (i=0; i<j; i++){
2683         ncols = rptr[i+1] - rptr[i];
2684         if (rownz_max < ncols) rownz_max = ncols;
2685       }
2686     }
2687 
2688     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2689     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2690     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2691     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2692     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2693   } else {
2694     C = *matredundant;
2695   }
2696 
2697   /* insert local matrix entries */
2698   rptr = sbuf_j;
2699   cols = sbuf_j + rend-rstart + 1;
2700   vals = sbuf_a;
2701   for (i=0; i<rend-rstart; i++){
2702     row   = i + rstart;
2703     ncols = rptr[i+1] - rptr[i];
2704     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2705     vals += ncols;
2706     cols += ncols;
2707   }
2708   /* insert received matrix entries */
2709   for (imdex=0; imdex<nrecvs; imdex++){
2710     rstart = rowrange[recv_rank[imdex]];
2711     rend   = rowrange[recv_rank[imdex]+1];
2712     rptr = rbuf_j[imdex];
2713     cols = rbuf_j[imdex] + rend-rstart + 1;
2714     vals = rbuf_a[imdex];
2715     for (i=0; i<rend-rstart; i++){
2716       row   = i + rstart;
2717       ncols = rptr[i+1] - rptr[i];
2718       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2719       vals += ncols;
2720       cols += ncols;
2721     }
2722   }
2723   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2724   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2725   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2726   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2727   if (reuse == MAT_INITIAL_MATRIX) {
2728     PetscContainer container;
2729     *matredundant = C;
2730     /* create a supporting struct and attach it to C for reuse */
2731     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2732     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2733     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2734     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2735     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2736     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2737 
2738     redund->nzlocal = nzlocal;
2739     redund->nsends  = nsends;
2740     redund->nrecvs  = nrecvs;
2741     redund->send_rank = send_rank;
2742     redund->recv_rank = recv_rank;
2743     redund->sbuf_nz = sbuf_nz;
2744     redund->rbuf_nz = rbuf_nz;
2745     redund->sbuf_j  = sbuf_j;
2746     redund->sbuf_a  = sbuf_a;
2747     redund->rbuf_j  = rbuf_j;
2748     redund->rbuf_a  = rbuf_a;
2749 
2750     redund->Destroy = C->ops->destroy;
2751     C->ops->destroy = MatDestroy_MatRedundant;
2752   }
2753   PetscFunctionReturn(0);
2754 }
2755 
2756 #undef __FUNCT__
2757 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2758 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2759 {
2760   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2761   PetscErrorCode ierr;
2762   PetscInt       i,*idxb = 0;
2763   PetscScalar    *va,*vb;
2764   Vec            vtmp;
2765 
2766   PetscFunctionBegin;
2767   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2768   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2769   if (idx) {
2770     for (i=0; i<A->rmap->n; i++) {
2771       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2772     }
2773   }
2774 
2775   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2776   if (idx) {
2777     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2778   }
2779   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2780   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2781 
2782   for (i=0; i<A->rmap->n; i++){
2783     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2784       va[i] = vb[i];
2785       if (idx) idx[i] = a->garray[idxb[i]];
2786     }
2787   }
2788 
2789   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2790   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2791   ierr = PetscFree(idxb);CHKERRQ(ierr);
2792   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2793   PetscFunctionReturn(0);
2794 }
2795 
2796 #undef __FUNCT__
2797 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2798 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2799 {
2800   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2801   PetscErrorCode ierr;
2802   PetscInt       i,*idxb = 0;
2803   PetscScalar    *va,*vb;
2804   Vec            vtmp;
2805 
2806   PetscFunctionBegin;
2807   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2808   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2809   if (idx) {
2810     for (i=0; i<A->cmap->n; i++) {
2811       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2812     }
2813   }
2814 
2815   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2816   if (idx) {
2817     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2818   }
2819   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2820   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2821 
2822   for (i=0; i<A->rmap->n; i++){
2823     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2824       va[i] = vb[i];
2825       if (idx) idx[i] = a->garray[idxb[i]];
2826     }
2827   }
2828 
2829   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2830   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2831   ierr = PetscFree(idxb);CHKERRQ(ierr);
2832   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2833   PetscFunctionReturn(0);
2834 }
2835 
2836 #undef __FUNCT__
2837 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2838 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2839 {
2840   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2841   PetscInt       n      = A->rmap->n;
2842   PetscInt       cstart = A->cmap->rstart;
2843   PetscInt      *cmap   = mat->garray;
2844   PetscInt      *diagIdx, *offdiagIdx;
2845   Vec            diagV, offdiagV;
2846   PetscScalar   *a, *diagA, *offdiagA;
2847   PetscInt       r;
2848   PetscErrorCode ierr;
2849 
2850   PetscFunctionBegin;
2851   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2852   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2853   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2854   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2855   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2856   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2857   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2858   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2859   for(r = 0; r < n; ++r) {
2860     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2861       a[r]   = diagA[r];
2862       idx[r] = cstart + diagIdx[r];
2863     } else {
2864       a[r]   = offdiagA[r];
2865       idx[r] = cmap[offdiagIdx[r]];
2866     }
2867   }
2868   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2869   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2870   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2871   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2872   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2873   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2874   PetscFunctionReturn(0);
2875 }
2876 
2877 #undef __FUNCT__
2878 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2879 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2880 {
2881   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2882   PetscInt       n      = A->rmap->n;
2883   PetscInt       cstart = A->cmap->rstart;
2884   PetscInt      *cmap   = mat->garray;
2885   PetscInt      *diagIdx, *offdiagIdx;
2886   Vec            diagV, offdiagV;
2887   PetscScalar   *a, *diagA, *offdiagA;
2888   PetscInt       r;
2889   PetscErrorCode ierr;
2890 
2891   PetscFunctionBegin;
2892   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2893   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2894   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2895   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2896   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2897   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2898   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2899   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2900   for(r = 0; r < n; ++r) {
2901     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2902       a[r]   = diagA[r];
2903       idx[r] = cstart + diagIdx[r];
2904     } else {
2905       a[r]   = offdiagA[r];
2906       idx[r] = cmap[offdiagIdx[r]];
2907     }
2908   }
2909   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2910   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2911   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2912   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2913   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2914   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2915   PetscFunctionReturn(0);
2916 }
2917 
2918 #undef __FUNCT__
2919 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2920 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2921 {
2922   PetscErrorCode ierr;
2923   Mat            *dummy;
2924 
2925   PetscFunctionBegin;
2926   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2927   *newmat = *dummy;
2928   ierr = PetscFree(dummy);CHKERRQ(ierr);
2929   PetscFunctionReturn(0);
2930 }
2931 
2932 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2933 /* -------------------------------------------------------------------*/
2934 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2935        MatGetRow_MPIAIJ,
2936        MatRestoreRow_MPIAIJ,
2937        MatMult_MPIAIJ,
2938 /* 4*/ MatMultAdd_MPIAIJ,
2939        MatMultTranspose_MPIAIJ,
2940        MatMultTransposeAdd_MPIAIJ,
2941 #ifdef PETSC_HAVE_PBGL
2942        MatSolve_MPIAIJ,
2943 #else
2944        0,
2945 #endif
2946        0,
2947        0,
2948 /*10*/ 0,
2949        0,
2950        0,
2951        MatSOR_MPIAIJ,
2952        MatTranspose_MPIAIJ,
2953 /*15*/ MatGetInfo_MPIAIJ,
2954        MatEqual_MPIAIJ,
2955        MatGetDiagonal_MPIAIJ,
2956        MatDiagonalScale_MPIAIJ,
2957        MatNorm_MPIAIJ,
2958 /*20*/ MatAssemblyBegin_MPIAIJ,
2959        MatAssemblyEnd_MPIAIJ,
2960        MatSetOption_MPIAIJ,
2961        MatZeroEntries_MPIAIJ,
2962 /*24*/ MatZeroRows_MPIAIJ,
2963        0,
2964 #ifdef PETSC_HAVE_PBGL
2965        0,
2966 #else
2967        0,
2968 #endif
2969        0,
2970        0,
2971 /*29*/ MatSetUpPreallocation_MPIAIJ,
2972 #ifdef PETSC_HAVE_PBGL
2973        0,
2974 #else
2975        0,
2976 #endif
2977        0,
2978        0,
2979        0,
2980 /*34*/ MatDuplicate_MPIAIJ,
2981        0,
2982        0,
2983        0,
2984        0,
2985 /*39*/ MatAXPY_MPIAIJ,
2986        MatGetSubMatrices_MPIAIJ,
2987        MatIncreaseOverlap_MPIAIJ,
2988        MatGetValues_MPIAIJ,
2989        MatCopy_MPIAIJ,
2990 /*44*/ MatGetRowMax_MPIAIJ,
2991        MatScale_MPIAIJ,
2992        0,
2993        0,
2994        MatZeroRowsColumns_MPIAIJ,
2995 /*49*/ MatSetBlockSize_MPIAIJ,
2996        0,
2997        0,
2998        0,
2999        0,
3000 /*54*/ MatFDColoringCreate_MPIAIJ,
3001        0,
3002        MatSetUnfactored_MPIAIJ,
3003        MatPermute_MPIAIJ,
3004        0,
3005 /*59*/ MatGetSubMatrix_MPIAIJ,
3006        MatDestroy_MPIAIJ,
3007        MatView_MPIAIJ,
3008        0,
3009        0,
3010 /*64*/ 0,
3011        0,
3012        0,
3013        0,
3014        0,
3015 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3016        MatGetRowMinAbs_MPIAIJ,
3017        0,
3018        MatSetColoring_MPIAIJ,
3019 #if defined(PETSC_HAVE_ADIC)
3020        MatSetValuesAdic_MPIAIJ,
3021 #else
3022        0,
3023 #endif
3024        MatSetValuesAdifor_MPIAIJ,
3025 /*75*/ MatFDColoringApply_AIJ,
3026        0,
3027        0,
3028        0,
3029        0,
3030 /*80*/ 0,
3031        0,
3032        0,
3033 /*83*/ MatLoad_MPIAIJ,
3034        0,
3035        0,
3036        0,
3037        0,
3038        0,
3039 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3040        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3041        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3042        MatPtAP_Basic,
3043        MatPtAPSymbolic_MPIAIJ,
3044 /*94*/ MatPtAPNumeric_MPIAIJ,
3045        0,
3046        0,
3047        0,
3048        0,
3049 /*99*/ 0,
3050        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3051        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3052        MatConjugate_MPIAIJ,
3053        0,
3054 /*104*/MatSetValuesRow_MPIAIJ,
3055        MatRealPart_MPIAIJ,
3056        MatImaginaryPart_MPIAIJ,
3057        0,
3058        0,
3059 /*109*/0,
3060        MatGetRedundantMatrix_MPIAIJ,
3061        MatGetRowMin_MPIAIJ,
3062        0,
3063        0,
3064 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
3065        0,
3066        0,
3067        0,
3068        0,
3069 /*119*/0,
3070        0,
3071        0,
3072        0,
3073        MatGetMultiProcBlock_MPIAIJ,
3074 /*124*/MatFindNonZeroRows_MPIAIJ,
3075        MatGetColumnNorms_MPIAIJ,
3076        0,
3077        0,
3078        MatGetSubMatricesParallel_MPIAIJ
3079 };
3080 
3081 /* ----------------------------------------------------------------------------------------*/
3082 
3083 EXTERN_C_BEGIN
3084 #undef __FUNCT__
3085 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3086 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3087 {
3088   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3089   PetscErrorCode ierr;
3090 
3091   PetscFunctionBegin;
3092   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3093   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3094   PetscFunctionReturn(0);
3095 }
3096 EXTERN_C_END
3097 
3098 EXTERN_C_BEGIN
3099 #undef __FUNCT__
3100 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3101 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3102 {
3103   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3104   PetscErrorCode ierr;
3105 
3106   PetscFunctionBegin;
3107   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3108   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3109   PetscFunctionReturn(0);
3110 }
3111 EXTERN_C_END
3112 
3113 EXTERN_C_BEGIN
3114 #undef __FUNCT__
3115 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3116 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3117 {
3118   Mat_MPIAIJ     *b;
3119   PetscErrorCode ierr;
3120   PetscInt       i;
3121 
3122   PetscFunctionBegin;
3123   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3124   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3125   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3126   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3127 
3128   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3129   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3130   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3131   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3132   if (d_nnz) {
3133     for (i=0; i<B->rmap->n; i++) {
3134       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3135     }
3136   }
3137   if (o_nnz) {
3138     for (i=0; i<B->rmap->n; i++) {
3139       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3140     }
3141   }
3142   b = (Mat_MPIAIJ*)B->data;
3143 
3144   if (!B->preallocated) {
3145     /* Explicitly create 2 MATSEQAIJ matrices. */
3146     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3147     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3148     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3149     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3150     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3151     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3152     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3153     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3154   }
3155 
3156   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3157   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3158   B->preallocated = PETSC_TRUE;
3159   PetscFunctionReturn(0);
3160 }
3161 EXTERN_C_END
3162 
3163 #undef __FUNCT__
3164 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3165 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3166 {
3167   Mat            mat;
3168   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3169   PetscErrorCode ierr;
3170 
3171   PetscFunctionBegin;
3172   *newmat       = 0;
3173   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3174   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3175   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3176   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3177   a    = (Mat_MPIAIJ*)mat->data;
3178 
3179   mat->factortype    = matin->factortype;
3180   mat->rmap->bs      = matin->rmap->bs;
3181   mat->assembled    = PETSC_TRUE;
3182   mat->insertmode   = NOT_SET_VALUES;
3183   mat->preallocated = PETSC_TRUE;
3184 
3185   a->size           = oldmat->size;
3186   a->rank           = oldmat->rank;
3187   a->donotstash     = oldmat->donotstash;
3188   a->roworiented    = oldmat->roworiented;
3189   a->rowindices     = 0;
3190   a->rowvalues      = 0;
3191   a->getrowactive   = PETSC_FALSE;
3192 
3193   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3194   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3195 
3196   if (oldmat->colmap) {
3197 #if defined (PETSC_USE_CTABLE)
3198     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3199 #else
3200     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3201     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3202     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3203 #endif
3204   } else a->colmap = 0;
3205   if (oldmat->garray) {
3206     PetscInt len;
3207     len  = oldmat->B->cmap->n;
3208     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3209     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3210     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3211   } else a->garray = 0;
3212 
3213   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3214   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3215   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3216   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3217   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3218   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3219   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3220   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3221   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3222   *newmat = mat;
3223   PetscFunctionReturn(0);
3224 }
3225 
3226 /*
3227     Allows sending/receiving larger messages then 2 gigabytes in a single call
3228 */
3229 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm)
3230 {
3231   int             ierr;
3232   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3233   PetscInt        i,numchunks;
3234   PetscMPIInt     icnt;
3235 
3236   numchunks = cnt/CHUNKSIZE + 1;
3237   for (i=0; i<numchunks; i++) {
3238     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3239     ierr = MPI_Send(mess,icnt,type,to,tag,comm);if (ierr) return ierr;
3240     if (type == MPIU_INT) {
3241       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3242     } else if (type == MPIU_SCALAR) {
3243       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3244     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3245   }
3246   return 0;
3247 }
3248 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm)
3249 {
3250   int             ierr;
3251   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3252   MPI_Status      status;
3253   PetscInt        i,numchunks;
3254   PetscMPIInt     icnt;
3255 
3256   numchunks = cnt/CHUNKSIZE + 1;
3257   for (i=0; i<numchunks; i++) {
3258     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3259     ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);if (ierr) return ierr;
3260     if (type == MPIU_INT) {
3261       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3262     } else if (type == MPIU_SCALAR) {
3263       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3264     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3265   }
3266   return 0;
3267 }
3268 
3269 #undef __FUNCT__
3270 #define __FUNCT__ "MatLoad_MPIAIJ"
3271 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3272 {
3273   PetscScalar    *vals,*svals;
3274   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3275   PetscErrorCode ierr;
3276   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3277   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3278   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3279   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3280   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3281   int            fd;
3282 
3283   PetscFunctionBegin;
3284   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3285   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3286   if (!rank) {
3287     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3288     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3289     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3290   }
3291 
3292   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3293 
3294   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3295   M = header[1]; N = header[2];
3296   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3297   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3298   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3299 
3300   /* If global sizes are set, check if they are consistent with that given in the file */
3301   if (sizesset) {
3302     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3303   }
3304   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3305   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3306 
3307   /* determine ownership of all rows */
3308   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3309   else m = newMat->rmap->n; /* Set by user */
3310 
3311   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3312   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3313 
3314   /* First process needs enough room for process with most rows */
3315   if (!rank) {
3316     mmax       = rowners[1];
3317     for (i=2; i<size; i++) {
3318       mmax = PetscMax(mmax,rowners[i]);
3319     }
3320   } else mmax = m;
3321 
3322   rowners[0] = 0;
3323   for (i=2; i<=size; i++) {
3324     rowners[i] += rowners[i-1];
3325   }
3326   rstart = rowners[rank];
3327   rend   = rowners[rank+1];
3328 
3329   /* distribute row lengths to all processors */
3330   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3331   if (!rank) {
3332     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3333     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3334     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3335     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3336     for (j=0; j<m; j++) {
3337       procsnz[0] += ourlens[j];
3338     }
3339     for (i=1; i<size; i++) {
3340       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3341       /* calculate the number of nonzeros on each processor */
3342       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3343         procsnz[i] += rowlengths[j];
3344       }
3345       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3346     }
3347     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3348   } else {
3349     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3350   }
3351 
3352   if (!rank) {
3353     /* determine max buffer needed and allocate it */
3354     maxnz = 0;
3355     for (i=0; i<size; i++) {
3356       maxnz = PetscMax(maxnz,procsnz[i]);
3357     }
3358     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3359 
3360     /* read in my part of the matrix column indices  */
3361     nz   = procsnz[0];
3362     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3363     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3364 
3365     /* read in every one elses and ship off */
3366     for (i=1; i<size; i++) {
3367       nz     = procsnz[i];
3368       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3369       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3370     }
3371     ierr = PetscFree(cols);CHKERRQ(ierr);
3372   } else {
3373     /* determine buffer space needed for message */
3374     nz = 0;
3375     for (i=0; i<m; i++) {
3376       nz += ourlens[i];
3377     }
3378     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3379 
3380     /* receive message of column indices*/
3381     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3382   }
3383 
3384   /* determine column ownership if matrix is not square */
3385   if (N != M) {
3386     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3387     else n = newMat->cmap->n;
3388     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3389     cstart = cend - n;
3390   } else {
3391     cstart = rstart;
3392     cend   = rend;
3393     n      = cend - cstart;
3394   }
3395 
3396   /* loop over local rows, determining number of off diagonal entries */
3397   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3398   jj = 0;
3399   for (i=0; i<m; i++) {
3400     for (j=0; j<ourlens[i]; j++) {
3401       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3402       jj++;
3403     }
3404   }
3405 
3406   for (i=0; i<m; i++) {
3407     ourlens[i] -= offlens[i];
3408   }
3409   if (!sizesset) {
3410     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3411   }
3412   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3413 
3414   for (i=0; i<m; i++) {
3415     ourlens[i] += offlens[i];
3416   }
3417 
3418   if (!rank) {
3419     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3420 
3421     /* read in my part of the matrix numerical values  */
3422     nz   = procsnz[0];
3423     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3424 
3425     /* insert into matrix */
3426     jj      = rstart;
3427     smycols = mycols;
3428     svals   = vals;
3429     for (i=0; i<m; i++) {
3430       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3431       smycols += ourlens[i];
3432       svals   += ourlens[i];
3433       jj++;
3434     }
3435 
3436     /* read in other processors and ship out */
3437     for (i=1; i<size; i++) {
3438       nz     = procsnz[i];
3439       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3440       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3441     }
3442     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3443   } else {
3444     /* receive numeric values */
3445     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3446 
3447     /* receive message of values*/
3448     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3449 
3450     /* insert into matrix */
3451     jj      = rstart;
3452     smycols = mycols;
3453     svals   = vals;
3454     for (i=0; i<m; i++) {
3455       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3456       smycols += ourlens[i];
3457       svals   += ourlens[i];
3458       jj++;
3459     }
3460   }
3461   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3462   ierr = PetscFree(vals);CHKERRQ(ierr);
3463   ierr = PetscFree(mycols);CHKERRQ(ierr);
3464   ierr = PetscFree(rowners);CHKERRQ(ierr);
3465 
3466   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3467   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3468   PetscFunctionReturn(0);
3469 }
3470 
3471 #undef __FUNCT__
3472 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3473 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3474 {
3475   PetscErrorCode ierr;
3476   IS             iscol_local;
3477   PetscInt       csize;
3478 
3479   PetscFunctionBegin;
3480   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3481   if (call == MAT_REUSE_MATRIX) {
3482     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3483     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3484   } else {
3485     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3486   }
3487   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3488   if (call == MAT_INITIAL_MATRIX) {
3489     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3490     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3491   }
3492   PetscFunctionReturn(0);
3493 }
3494 
3495 #undef __FUNCT__
3496 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3497 /*
3498     Not great since it makes two copies of the submatrix, first an SeqAIJ
3499   in local and then by concatenating the local matrices the end result.
3500   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3501 
3502   Note: This requires a sequential iscol with all indices.
3503 */
3504 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3505 {
3506   PetscErrorCode ierr;
3507   PetscMPIInt    rank,size;
3508   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3509   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3510   Mat            *local,M,Mreuse;
3511   MatScalar      *vwork,*aa;
3512   MPI_Comm       comm = ((PetscObject)mat)->comm;
3513   Mat_SeqAIJ     *aij;
3514 
3515 
3516   PetscFunctionBegin;
3517   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3518   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3519 
3520   if (call ==  MAT_REUSE_MATRIX) {
3521     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3522     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3523     local = &Mreuse;
3524     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3525   } else {
3526     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3527     Mreuse = *local;
3528     ierr   = PetscFree(local);CHKERRQ(ierr);
3529   }
3530 
3531   /*
3532       m - number of local rows
3533       n - number of columns (same on all processors)
3534       rstart - first row in new global matrix generated
3535   */
3536   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3537   if (call == MAT_INITIAL_MATRIX) {
3538     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3539     ii  = aij->i;
3540     jj  = aij->j;
3541 
3542     /*
3543         Determine the number of non-zeros in the diagonal and off-diagonal
3544         portions of the matrix in order to do correct preallocation
3545     */
3546 
3547     /* first get start and end of "diagonal" columns */
3548     if (csize == PETSC_DECIDE) {
3549       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3550       if (mglobal == n) { /* square matrix */
3551 	nlocal = m;
3552       } else {
3553         nlocal = n/size + ((n % size) > rank);
3554       }
3555     } else {
3556       nlocal = csize;
3557     }
3558     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3559     rstart = rend - nlocal;
3560     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3561 
3562     /* next, compute all the lengths */
3563     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3564     olens = dlens + m;
3565     for (i=0; i<m; i++) {
3566       jend = ii[i+1] - ii[i];
3567       olen = 0;
3568       dlen = 0;
3569       for (j=0; j<jend; j++) {
3570         if (*jj < rstart || *jj >= rend) olen++;
3571         else dlen++;
3572         jj++;
3573       }
3574       olens[i] = olen;
3575       dlens[i] = dlen;
3576     }
3577     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3578     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3579     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3580     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3581     ierr = PetscFree(dlens);CHKERRQ(ierr);
3582   } else {
3583     PetscInt ml,nl;
3584 
3585     M = *newmat;
3586     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3587     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3588     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3589     /*
3590          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3591        rather than the slower MatSetValues().
3592     */
3593     M->was_assembled = PETSC_TRUE;
3594     M->assembled     = PETSC_FALSE;
3595   }
3596   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3597   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3598   ii  = aij->i;
3599   jj  = aij->j;
3600   aa  = aij->a;
3601   for (i=0; i<m; i++) {
3602     row   = rstart + i;
3603     nz    = ii[i+1] - ii[i];
3604     cwork = jj;     jj += nz;
3605     vwork = aa;     aa += nz;
3606     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3607   }
3608 
3609   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3610   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3611   *newmat = M;
3612 
3613   /* save submatrix used in processor for next request */
3614   if (call ==  MAT_INITIAL_MATRIX) {
3615     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3616     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3617   }
3618 
3619   PetscFunctionReturn(0);
3620 }
3621 
3622 EXTERN_C_BEGIN
3623 #undef __FUNCT__
3624 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3625 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3626 {
3627   PetscInt       m,cstart, cend,j,nnz,i,d;
3628   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3629   const PetscInt *JJ;
3630   PetscScalar    *values;
3631   PetscErrorCode ierr;
3632 
3633   PetscFunctionBegin;
3634   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3635 
3636   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3637   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3638   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3639   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3640   m      = B->rmap->n;
3641   cstart = B->cmap->rstart;
3642   cend   = B->cmap->rend;
3643   rstart = B->rmap->rstart;
3644 
3645   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3646 
3647 #if defined(PETSC_USE_DEBUGGING)
3648   for (i=0; i<m; i++) {
3649     nnz     = Ii[i+1]- Ii[i];
3650     JJ      = J + Ii[i];
3651     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3652     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3653     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3654   }
3655 #endif
3656 
3657   for (i=0; i<m; i++) {
3658     nnz     = Ii[i+1]- Ii[i];
3659     JJ      = J + Ii[i];
3660     nnz_max = PetscMax(nnz_max,nnz);
3661     d       = 0;
3662     for (j=0; j<nnz; j++) {
3663       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3664     }
3665     d_nnz[i] = d;
3666     o_nnz[i] = nnz - d;
3667   }
3668   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3669   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3670 
3671   if (v) values = (PetscScalar*)v;
3672   else {
3673     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3674     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3675   }
3676 
3677   for (i=0; i<m; i++) {
3678     ii   = i + rstart;
3679     nnz  = Ii[i+1]- Ii[i];
3680     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3681   }
3682   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3683   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3684 
3685   if (!v) {
3686     ierr = PetscFree(values);CHKERRQ(ierr);
3687   }
3688   PetscFunctionReturn(0);
3689 }
3690 EXTERN_C_END
3691 
3692 #undef __FUNCT__
3693 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3694 /*@
3695    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3696    (the default parallel PETSc format).
3697 
3698    Collective on MPI_Comm
3699 
3700    Input Parameters:
3701 +  B - the matrix
3702 .  i - the indices into j for the start of each local row (starts with zero)
3703 .  j - the column indices for each local row (starts with zero)
3704 -  v - optional values in the matrix
3705 
3706    Level: developer
3707 
3708    Notes:
3709        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3710      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3711      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3712 
3713        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3714 
3715        The format which is used for the sparse matrix input, is equivalent to a
3716     row-major ordering.. i.e for the following matrix, the input data expected is
3717     as shown:
3718 
3719         1 0 0
3720         2 0 3     P0
3721        -------
3722         4 5 6     P1
3723 
3724      Process0 [P0]: rows_owned=[0,1]
3725         i =  {0,1,3}  [size = nrow+1  = 2+1]
3726         j =  {0,0,2}  [size = nz = 6]
3727         v =  {1,2,3}  [size = nz = 6]
3728 
3729      Process1 [P1]: rows_owned=[2]
3730         i =  {0,3}    [size = nrow+1  = 1+1]
3731         j =  {0,1,2}  [size = nz = 6]
3732         v =  {4,5,6}  [size = nz = 6]
3733 
3734 .keywords: matrix, aij, compressed row, sparse, parallel
3735 
3736 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3737           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3738 @*/
3739 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3740 {
3741   PetscErrorCode ierr;
3742 
3743   PetscFunctionBegin;
3744   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3745   PetscFunctionReturn(0);
3746 }
3747 
3748 #undef __FUNCT__
3749 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3750 /*@C
3751    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3752    (the default parallel PETSc format).  For good matrix assembly performance
3753    the user should preallocate the matrix storage by setting the parameters
3754    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3755    performance can be increased by more than a factor of 50.
3756 
3757    Collective on MPI_Comm
3758 
3759    Input Parameters:
3760 +  A - the matrix
3761 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3762            (same value is used for all local rows)
3763 .  d_nnz - array containing the number of nonzeros in the various rows of the
3764            DIAGONAL portion of the local submatrix (possibly different for each row)
3765            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3766            The size of this array is equal to the number of local rows, i.e 'm'.
3767            You must leave room for the diagonal entry even if it is zero.
3768 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3769            submatrix (same value is used for all local rows).
3770 -  o_nnz - array containing the number of nonzeros in the various rows of the
3771            OFF-DIAGONAL portion of the local submatrix (possibly different for
3772            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3773            structure. The size of this array is equal to the number
3774            of local rows, i.e 'm'.
3775 
3776    If the *_nnz parameter is given then the *_nz parameter is ignored
3777 
3778    The AIJ format (also called the Yale sparse matrix format or
3779    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3780    storage.  The stored row and column indices begin with zero.
3781    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3782 
3783    The parallel matrix is partitioned such that the first m0 rows belong to
3784    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3785    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3786 
3787    The DIAGONAL portion of the local submatrix of a processor can be defined
3788    as the submatrix which is obtained by extraction the part corresponding to
3789    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3790    first row that belongs to the processor, r2 is the last row belonging to
3791    the this processor, and c1-c2 is range of indices of the local part of a
3792    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3793    common case of a square matrix, the row and column ranges are the same and
3794    the DIAGONAL part is also square. The remaining portion of the local
3795    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3796 
3797    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3798 
3799    You can call MatGetInfo() to get information on how effective the preallocation was;
3800    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3801    You can also run with the option -info and look for messages with the string
3802    malloc in them to see if additional memory allocation was needed.
3803 
3804    Example usage:
3805 
3806    Consider the following 8x8 matrix with 34 non-zero values, that is
3807    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3808    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3809    as follows:
3810 
3811 .vb
3812             1  2  0  |  0  3  0  |  0  4
3813     Proc0   0  5  6  |  7  0  0  |  8  0
3814             9  0 10  | 11  0  0  | 12  0
3815     -------------------------------------
3816            13  0 14  | 15 16 17  |  0  0
3817     Proc1   0 18  0  | 19 20 21  |  0  0
3818             0  0  0  | 22 23  0  | 24  0
3819     -------------------------------------
3820     Proc2  25 26 27  |  0  0 28  | 29  0
3821            30  0  0  | 31 32 33  |  0 34
3822 .ve
3823 
3824    This can be represented as a collection of submatrices as:
3825 
3826 .vb
3827       A B C
3828       D E F
3829       G H I
3830 .ve
3831 
3832    Where the submatrices A,B,C are owned by proc0, D,E,F are
3833    owned by proc1, G,H,I are owned by proc2.
3834 
3835    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3836    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3837    The 'M','N' parameters are 8,8, and have the same values on all procs.
3838 
3839    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3840    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3841    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3842    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3843    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3844    matrix, ans [DF] as another SeqAIJ matrix.
3845 
3846    When d_nz, o_nz parameters are specified, d_nz storage elements are
3847    allocated for every row of the local diagonal submatrix, and o_nz
3848    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3849    One way to choose d_nz and o_nz is to use the max nonzerors per local
3850    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3851    In this case, the values of d_nz,o_nz are:
3852 .vb
3853      proc0 : dnz = 2, o_nz = 2
3854      proc1 : dnz = 3, o_nz = 2
3855      proc2 : dnz = 1, o_nz = 4
3856 .ve
3857    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3858    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3859    for proc3. i.e we are using 12+15+10=37 storage locations to store
3860    34 values.
3861 
3862    When d_nnz, o_nnz parameters are specified, the storage is specified
3863    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3864    In the above case the values for d_nnz,o_nnz are:
3865 .vb
3866      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3867      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3868      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3869 .ve
3870    Here the space allocated is sum of all the above values i.e 34, and
3871    hence pre-allocation is perfect.
3872 
3873    Level: intermediate
3874 
3875 .keywords: matrix, aij, compressed row, sparse, parallel
3876 
3877 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3878           MPIAIJ, MatGetInfo()
3879 @*/
3880 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3881 {
3882   PetscErrorCode ierr;
3883 
3884   PetscFunctionBegin;
3885   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3886   PetscFunctionReturn(0);
3887 }
3888 
3889 #undef __FUNCT__
3890 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3891 /*@
3892      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3893          CSR format the local rows.
3894 
3895    Collective on MPI_Comm
3896 
3897    Input Parameters:
3898 +  comm - MPI communicator
3899 .  m - number of local rows (Cannot be PETSC_DECIDE)
3900 .  n - This value should be the same as the local size used in creating the
3901        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3902        calculated if N is given) For square matrices n is almost always m.
3903 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3904 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3905 .   i - row indices
3906 .   j - column indices
3907 -   a - matrix values
3908 
3909    Output Parameter:
3910 .   mat - the matrix
3911 
3912    Level: intermediate
3913 
3914    Notes:
3915        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3916      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3917      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3918 
3919        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3920 
3921        The format which is used for the sparse matrix input, is equivalent to a
3922     row-major ordering.. i.e for the following matrix, the input data expected is
3923     as shown:
3924 
3925         1 0 0
3926         2 0 3     P0
3927        -------
3928         4 5 6     P1
3929 
3930      Process0 [P0]: rows_owned=[0,1]
3931         i =  {0,1,3}  [size = nrow+1  = 2+1]
3932         j =  {0,0,2}  [size = nz = 6]
3933         v =  {1,2,3}  [size = nz = 6]
3934 
3935      Process1 [P1]: rows_owned=[2]
3936         i =  {0,3}    [size = nrow+1  = 1+1]
3937         j =  {0,1,2}  [size = nz = 6]
3938         v =  {4,5,6}  [size = nz = 6]
3939 
3940 .keywords: matrix, aij, compressed row, sparse, parallel
3941 
3942 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3943           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3944 @*/
3945 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3946 {
3947   PetscErrorCode ierr;
3948 
3949  PetscFunctionBegin;
3950   if (i[0]) {
3951     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3952   }
3953   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3954   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3955   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3956   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3957   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3958   PetscFunctionReturn(0);
3959 }
3960 
3961 #undef __FUNCT__
3962 #define __FUNCT__ "MatCreateMPIAIJ"
3963 /*@C
3964    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3965    (the default parallel PETSc format).  For good matrix assembly performance
3966    the user should preallocate the matrix storage by setting the parameters
3967    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3968    performance can be increased by more than a factor of 50.
3969 
3970    Collective on MPI_Comm
3971 
3972    Input Parameters:
3973 +  comm - MPI communicator
3974 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3975            This value should be the same as the local size used in creating the
3976            y vector for the matrix-vector product y = Ax.
3977 .  n - This value should be the same as the local size used in creating the
3978        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3979        calculated if N is given) For square matrices n is almost always m.
3980 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3981 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3982 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3983            (same value is used for all local rows)
3984 .  d_nnz - array containing the number of nonzeros in the various rows of the
3985            DIAGONAL portion of the local submatrix (possibly different for each row)
3986            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3987            The size of this array is equal to the number of local rows, i.e 'm'.
3988            You must leave room for the diagonal entry even if it is zero.
3989 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3990            submatrix (same value is used for all local rows).
3991 -  o_nnz - array containing the number of nonzeros in the various rows of the
3992            OFF-DIAGONAL portion of the local submatrix (possibly different for
3993            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3994            structure. The size of this array is equal to the number
3995            of local rows, i.e 'm'.
3996 
3997    Output Parameter:
3998 .  A - the matrix
3999 
4000    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4001    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4002    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4003 
4004    Notes:
4005    If the *_nnz parameter is given then the *_nz parameter is ignored
4006 
4007    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4008    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4009    storage requirements for this matrix.
4010 
4011    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4012    processor than it must be used on all processors that share the object for
4013    that argument.
4014 
4015    The user MUST specify either the local or global matrix dimensions
4016    (possibly both).
4017 
4018    The parallel matrix is partitioned across processors such that the
4019    first m0 rows belong to process 0, the next m1 rows belong to
4020    process 1, the next m2 rows belong to process 2 etc.. where
4021    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4022    values corresponding to [m x N] submatrix.
4023 
4024    The columns are logically partitioned with the n0 columns belonging
4025    to 0th partition, the next n1 columns belonging to the next
4026    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4027 
4028    The DIAGONAL portion of the local submatrix on any given processor
4029    is the submatrix corresponding to the rows and columns m,n
4030    corresponding to the given processor. i.e diagonal matrix on
4031    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4032    etc. The remaining portion of the local submatrix [m x (N-n)]
4033    constitute the OFF-DIAGONAL portion. The example below better
4034    illustrates this concept.
4035 
4036    For a square global matrix we define each processor's diagonal portion
4037    to be its local rows and the corresponding columns (a square submatrix);
4038    each processor's off-diagonal portion encompasses the remainder of the
4039    local matrix (a rectangular submatrix).
4040 
4041    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4042 
4043    When calling this routine with a single process communicator, a matrix of
4044    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4045    type of communicator, use the construction mechanism:
4046      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4047 
4048    By default, this format uses inodes (identical nodes) when possible.
4049    We search for consecutive rows with the same nonzero structure, thereby
4050    reusing matrix information to achieve increased efficiency.
4051 
4052    Options Database Keys:
4053 +  -mat_no_inode  - Do not use inodes
4054 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4055 -  -mat_aij_oneindex - Internally use indexing starting at 1
4056         rather than 0.  Note that when calling MatSetValues(),
4057         the user still MUST index entries starting at 0!
4058 
4059 
4060    Example usage:
4061 
4062    Consider the following 8x8 matrix with 34 non-zero values, that is
4063    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4064    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4065    as follows:
4066 
4067 .vb
4068             1  2  0  |  0  3  0  |  0  4
4069     Proc0   0  5  6  |  7  0  0  |  8  0
4070             9  0 10  | 11  0  0  | 12  0
4071     -------------------------------------
4072            13  0 14  | 15 16 17  |  0  0
4073     Proc1   0 18  0  | 19 20 21  |  0  0
4074             0  0  0  | 22 23  0  | 24  0
4075     -------------------------------------
4076     Proc2  25 26 27  |  0  0 28  | 29  0
4077            30  0  0  | 31 32 33  |  0 34
4078 .ve
4079 
4080    This can be represented as a collection of submatrices as:
4081 
4082 .vb
4083       A B C
4084       D E F
4085       G H I
4086 .ve
4087 
4088    Where the submatrices A,B,C are owned by proc0, D,E,F are
4089    owned by proc1, G,H,I are owned by proc2.
4090 
4091    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4092    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4093    The 'M','N' parameters are 8,8, and have the same values on all procs.
4094 
4095    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4096    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4097    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4098    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4099    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4100    matrix, ans [DF] as another SeqAIJ matrix.
4101 
4102    When d_nz, o_nz parameters are specified, d_nz storage elements are
4103    allocated for every row of the local diagonal submatrix, and o_nz
4104    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4105    One way to choose d_nz and o_nz is to use the max nonzerors per local
4106    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4107    In this case, the values of d_nz,o_nz are:
4108 .vb
4109      proc0 : dnz = 2, o_nz = 2
4110      proc1 : dnz = 3, o_nz = 2
4111      proc2 : dnz = 1, o_nz = 4
4112 .ve
4113    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4114    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4115    for proc3. i.e we are using 12+15+10=37 storage locations to store
4116    34 values.
4117 
4118    When d_nnz, o_nnz parameters are specified, the storage is specified
4119    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4120    In the above case the values for d_nnz,o_nnz are:
4121 .vb
4122      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4123      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4124      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4125 .ve
4126    Here the space allocated is sum of all the above values i.e 34, and
4127    hence pre-allocation is perfect.
4128 
4129    Level: intermediate
4130 
4131 .keywords: matrix, aij, compressed row, sparse, parallel
4132 
4133 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4134           MPIAIJ, MatCreateMPIAIJWithArrays()
4135 @*/
4136 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4137 {
4138   PetscErrorCode ierr;
4139   PetscMPIInt    size;
4140 
4141   PetscFunctionBegin;
4142   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4143   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4144   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4145   if (size > 1) {
4146     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4147     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4148   } else {
4149     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4150     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4151   }
4152   PetscFunctionReturn(0);
4153 }
4154 
4155 #undef __FUNCT__
4156 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4157 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4158 {
4159   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4160 
4161   PetscFunctionBegin;
4162   *Ad     = a->A;
4163   *Ao     = a->B;
4164   *colmap = a->garray;
4165   PetscFunctionReturn(0);
4166 }
4167 
4168 #undef __FUNCT__
4169 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4170 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4171 {
4172   PetscErrorCode ierr;
4173   PetscInt       i;
4174   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4175 
4176   PetscFunctionBegin;
4177   if (coloring->ctype == IS_COLORING_GLOBAL) {
4178     ISColoringValue *allcolors,*colors;
4179     ISColoring      ocoloring;
4180 
4181     /* set coloring for diagonal portion */
4182     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4183 
4184     /* set coloring for off-diagonal portion */
4185     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4186     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4187     for (i=0; i<a->B->cmap->n; i++) {
4188       colors[i] = allcolors[a->garray[i]];
4189     }
4190     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4191     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4192     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4193     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4194   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4195     ISColoringValue *colors;
4196     PetscInt        *larray;
4197     ISColoring      ocoloring;
4198 
4199     /* set coloring for diagonal portion */
4200     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4201     for (i=0; i<a->A->cmap->n; i++) {
4202       larray[i] = i + A->cmap->rstart;
4203     }
4204     ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4205     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4206     for (i=0; i<a->A->cmap->n; i++) {
4207       colors[i] = coloring->colors[larray[i]];
4208     }
4209     ierr = PetscFree(larray);CHKERRQ(ierr);
4210     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4211     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4212     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4213 
4214     /* set coloring for off-diagonal portion */
4215     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4216     ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4217     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4218     for (i=0; i<a->B->cmap->n; i++) {
4219       colors[i] = coloring->colors[larray[i]];
4220     }
4221     ierr = PetscFree(larray);CHKERRQ(ierr);
4222     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4223     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4224     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4225   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4226 
4227   PetscFunctionReturn(0);
4228 }
4229 
4230 #if defined(PETSC_HAVE_ADIC)
4231 #undef __FUNCT__
4232 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4233 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4234 {
4235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4236   PetscErrorCode ierr;
4237 
4238   PetscFunctionBegin;
4239   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4240   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4241   PetscFunctionReturn(0);
4242 }
4243 #endif
4244 
4245 #undef __FUNCT__
4246 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4247 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4248 {
4249   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4250   PetscErrorCode ierr;
4251 
4252   PetscFunctionBegin;
4253   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4254   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4255   PetscFunctionReturn(0);
4256 }
4257 
4258 #undef __FUNCT__
4259 #define __FUNCT__ "MatMerge"
4260 /*@
4261       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4262                  matrices from each processor
4263 
4264     Collective on MPI_Comm
4265 
4266    Input Parameters:
4267 +    comm - the communicators the parallel matrix will live on
4268 .    inmat - the input sequential matrices
4269 .    n - number of local columns (or PETSC_DECIDE)
4270 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4271 
4272    Output Parameter:
4273 .    outmat - the parallel matrix generated
4274 
4275     Level: advanced
4276 
4277    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4278 
4279 @*/
4280 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4281 {
4282   PetscErrorCode ierr;
4283   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
4284   PetscInt       *indx;
4285   PetscScalar    *values;
4286 
4287   PetscFunctionBegin;
4288   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4289   if (scall == MAT_INITIAL_MATRIX){
4290     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
4291     if (n == PETSC_DECIDE){
4292       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4293     }
4294     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4295     rstart -= m;
4296 
4297     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4298     for (i=0;i<m;i++) {
4299       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4300       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4301       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4302     }
4303     /* This routine will ONLY return MPIAIJ type matrix */
4304     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4305     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4306     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4307     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4308     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4309 
4310   } else if (scall == MAT_REUSE_MATRIX){
4311     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4312   } else {
4313     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4314   }
4315 
4316   for (i=0;i<m;i++) {
4317     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4318     Ii    = i + rstart;
4319     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4320     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4321   }
4322   ierr = MatDestroy(&inmat);CHKERRQ(ierr);
4323   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4324   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4325 
4326   PetscFunctionReturn(0);
4327 }
4328 
4329 #undef __FUNCT__
4330 #define __FUNCT__ "MatFileSplit"
4331 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4332 {
4333   PetscErrorCode    ierr;
4334   PetscMPIInt       rank;
4335   PetscInt          m,N,i,rstart,nnz;
4336   size_t            len;
4337   const PetscInt    *indx;
4338   PetscViewer       out;
4339   char              *name;
4340   Mat               B;
4341   const PetscScalar *values;
4342 
4343   PetscFunctionBegin;
4344   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4345   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4346   /* Should this be the type of the diagonal block of A? */
4347   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4348   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4349   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4350   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4351   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4352   for (i=0;i<m;i++) {
4353     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4354     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4355     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4356   }
4357   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4358   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4359 
4360   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4361   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4362   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4363   sprintf(name,"%s.%d",outfile,rank);
4364   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4365   ierr = PetscFree(name);
4366   ierr = MatView(B,out);CHKERRQ(ierr);
4367   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4368   ierr = MatDestroy(&B);CHKERRQ(ierr);
4369   PetscFunctionReturn(0);
4370 }
4371 
4372 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4373 #undef __FUNCT__
4374 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4375 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4376 {
4377   PetscErrorCode       ierr;
4378   Mat_Merge_SeqsToMPI  *merge;
4379   PetscContainer       container;
4380 
4381   PetscFunctionBegin;
4382   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4383   if (container) {
4384     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4385     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4386     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4387     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4388     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4389     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4390     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4391     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4392     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4393     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4394     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4395     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4396     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4397     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4398     ierr = PetscFree(merge);CHKERRQ(ierr);
4399     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4400   }
4401   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4402   PetscFunctionReturn(0);
4403 }
4404 
4405 #include <../src/mat/utils/freespace.h>
4406 #include <petscbt.h>
4407 
4408 #undef __FUNCT__
4409 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4410 /*@C
4411       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4412                  matrices from each processor
4413 
4414     Collective on MPI_Comm
4415 
4416    Input Parameters:
4417 +    comm - the communicators the parallel matrix will live on
4418 .    seqmat - the input sequential matrices
4419 .    m - number of local rows (or PETSC_DECIDE)
4420 .    n - number of local columns (or PETSC_DECIDE)
4421 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4422 
4423    Output Parameter:
4424 .    mpimat - the parallel matrix generated
4425 
4426     Level: advanced
4427 
4428    Notes:
4429      The dimensions of the sequential matrix in each processor MUST be the same.
4430      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4431      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4432 @*/
4433 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4434 {
4435   PetscErrorCode       ierr;
4436   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4437   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4438   PetscMPIInt          size,rank,taga,*len_s;
4439   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4440   PetscInt             proc,m;
4441   PetscInt             **buf_ri,**buf_rj;
4442   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4443   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4444   MPI_Request          *s_waits,*r_waits;
4445   MPI_Status           *status;
4446   MatScalar            *aa=a->a;
4447   MatScalar            **abuf_r,*ba_i;
4448   Mat_Merge_SeqsToMPI  *merge;
4449   PetscContainer       container;
4450 
4451   PetscFunctionBegin;
4452   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4453 
4454   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4455   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4456 
4457   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4458   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4459 
4460   bi     = merge->bi;
4461   bj     = merge->bj;
4462   buf_ri = merge->buf_ri;
4463   buf_rj = merge->buf_rj;
4464 
4465   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4466   owners = merge->rowmap->range;
4467   len_s  = merge->len_s;
4468 
4469   /* send and recv matrix values */
4470   /*-----------------------------*/
4471   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4472   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4473 
4474   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4475   for (proc=0,k=0; proc<size; proc++){
4476     if (!len_s[proc]) continue;
4477     i = owners[proc];
4478     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4479     k++;
4480   }
4481 
4482   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4483   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4484   ierr = PetscFree(status);CHKERRQ(ierr);
4485 
4486   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4487   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4488 
4489   /* insert mat values of mpimat */
4490   /*----------------------------*/
4491   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4492   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4493 
4494   for (k=0; k<merge->nrecv; k++){
4495     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4496     nrows = *(buf_ri_k[k]);
4497     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4498     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4499   }
4500 
4501   /* set values of ba */
4502   m = merge->rowmap->n;
4503   for (i=0; i<m; i++) {
4504     arow = owners[rank] + i;
4505     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4506     bnzi = bi[i+1] - bi[i];
4507     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4508 
4509     /* add local non-zero vals of this proc's seqmat into ba */
4510     anzi = ai[arow+1] - ai[arow];
4511     aj   = a->j + ai[arow];
4512     aa   = a->a + ai[arow];
4513     nextaj = 0;
4514     for (j=0; nextaj<anzi; j++){
4515       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4516         ba_i[j] += aa[nextaj++];
4517       }
4518     }
4519 
4520     /* add received vals into ba */
4521     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4522       /* i-th row */
4523       if (i == *nextrow[k]) {
4524         anzi = *(nextai[k]+1) - *nextai[k];
4525         aj   = buf_rj[k] + *(nextai[k]);
4526         aa   = abuf_r[k] + *(nextai[k]);
4527         nextaj = 0;
4528         for (j=0; nextaj<anzi; j++){
4529           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4530             ba_i[j] += aa[nextaj++];
4531           }
4532         }
4533         nextrow[k]++; nextai[k]++;
4534       }
4535     }
4536     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4537   }
4538   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4539   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4540 
4541   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4542   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4543   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4544   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4545   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4546   PetscFunctionReturn(0);
4547 }
4548 
4549 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4550 
4551 #undef __FUNCT__
4552 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4553 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4554 {
4555   PetscErrorCode       ierr;
4556   Mat                  B_mpi;
4557   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4558   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4559   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4560   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4561   PetscInt             len,proc,*dnz,*onz;
4562   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4563   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4564   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4565   MPI_Status           *status;
4566   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4567   PetscBT              lnkbt;
4568   Mat_Merge_SeqsToMPI  *merge;
4569   PetscContainer       container;
4570 
4571   PetscFunctionBegin;
4572   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4573 
4574   /* make sure it is a PETSc comm */
4575   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4576   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4577   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4578 
4579   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4580   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4581 
4582   /* determine row ownership */
4583   /*---------------------------------------------------------*/
4584   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4585   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4586   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4587   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4588   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4589   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4590   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4591 
4592   m      = merge->rowmap->n;
4593   M      = merge->rowmap->N;
4594   owners = merge->rowmap->range;
4595 
4596   /* determine the number of messages to send, their lengths */
4597   /*---------------------------------------------------------*/
4598   len_s  = merge->len_s;
4599 
4600   len = 0;  /* length of buf_si[] */
4601   merge->nsend = 0;
4602   for (proc=0; proc<size; proc++){
4603     len_si[proc] = 0;
4604     if (proc == rank){
4605       len_s[proc] = 0;
4606     } else {
4607       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4608       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4609     }
4610     if (len_s[proc]) {
4611       merge->nsend++;
4612       nrows = 0;
4613       for (i=owners[proc]; i<owners[proc+1]; i++){
4614         if (ai[i+1] > ai[i]) nrows++;
4615       }
4616       len_si[proc] = 2*(nrows+1);
4617       len += len_si[proc];
4618     }
4619   }
4620 
4621   /* determine the number and length of messages to receive for ij-structure */
4622   /*-------------------------------------------------------------------------*/
4623   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4624   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4625 
4626   /* post the Irecv of j-structure */
4627   /*-------------------------------*/
4628   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4629   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4630 
4631   /* post the Isend of j-structure */
4632   /*--------------------------------*/
4633   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4634 
4635   for (proc=0, k=0; proc<size; proc++){
4636     if (!len_s[proc]) continue;
4637     i = owners[proc];
4638     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4639     k++;
4640   }
4641 
4642   /* receives and sends of j-structure are complete */
4643   /*------------------------------------------------*/
4644   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4645   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4646 
4647   /* send and recv i-structure */
4648   /*---------------------------*/
4649   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4650   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4651 
4652   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4653   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4654   for (proc=0,k=0; proc<size; proc++){
4655     if (!len_s[proc]) continue;
4656     /* form outgoing message for i-structure:
4657          buf_si[0]:                 nrows to be sent
4658                [1:nrows]:           row index (global)
4659                [nrows+1:2*nrows+1]: i-structure index
4660     */
4661     /*-------------------------------------------*/
4662     nrows = len_si[proc]/2 - 1;
4663     buf_si_i    = buf_si + nrows+1;
4664     buf_si[0]   = nrows;
4665     buf_si_i[0] = 0;
4666     nrows = 0;
4667     for (i=owners[proc]; i<owners[proc+1]; i++){
4668       anzi = ai[i+1] - ai[i];
4669       if (anzi) {
4670         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4671         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4672         nrows++;
4673       }
4674     }
4675     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4676     k++;
4677     buf_si += len_si[proc];
4678   }
4679 
4680   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4681   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4682 
4683   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4684   for (i=0; i<merge->nrecv; i++){
4685     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4686   }
4687 
4688   ierr = PetscFree(len_si);CHKERRQ(ierr);
4689   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4690   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4691   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4692   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4693   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4694   ierr = PetscFree(status);CHKERRQ(ierr);
4695 
4696   /* compute a local seq matrix in each processor */
4697   /*----------------------------------------------*/
4698   /* allocate bi array and free space for accumulating nonzero column info */
4699   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4700   bi[0] = 0;
4701 
4702   /* create and initialize a linked list */
4703   nlnk = N+1;
4704   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4705 
4706   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4707   len = 0;
4708   len  = ai[owners[rank+1]] - ai[owners[rank]];
4709   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4710   current_space = free_space;
4711 
4712   /* determine symbolic info for each local row */
4713   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4714 
4715   for (k=0; k<merge->nrecv; k++){
4716     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4717     nrows = *buf_ri_k[k];
4718     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4719     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4720   }
4721 
4722   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4723   len = 0;
4724   for (i=0;i<m;i++) {
4725     bnzi   = 0;
4726     /* add local non-zero cols of this proc's seqmat into lnk */
4727     arow   = owners[rank] + i;
4728     anzi   = ai[arow+1] - ai[arow];
4729     aj     = a->j + ai[arow];
4730     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4731     bnzi += nlnk;
4732     /* add received col data into lnk */
4733     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4734       if (i == *nextrow[k]) { /* i-th row */
4735         anzi = *(nextai[k]+1) - *nextai[k];
4736         aj   = buf_rj[k] + *nextai[k];
4737         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4738         bnzi += nlnk;
4739         nextrow[k]++; nextai[k]++;
4740       }
4741     }
4742     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4743 
4744     /* if free space is not available, make more free space */
4745     if (current_space->local_remaining<bnzi) {
4746       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4747       nspacedouble++;
4748     }
4749     /* copy data into free space, then initialize lnk */
4750     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4751     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4752 
4753     current_space->array           += bnzi;
4754     current_space->local_used      += bnzi;
4755     current_space->local_remaining -= bnzi;
4756 
4757     bi[i+1] = bi[i] + bnzi;
4758   }
4759 
4760   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4761 
4762   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4763   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4764   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4765 
4766   /* create symbolic parallel matrix B_mpi */
4767   /*---------------------------------------*/
4768   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4769   if (n==PETSC_DECIDE) {
4770     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4771   } else {
4772     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4773   }
4774   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4775   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4776   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4777 
4778   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4779   B_mpi->assembled     = PETSC_FALSE;
4780   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4781   merge->bi            = bi;
4782   merge->bj            = bj;
4783   merge->buf_ri        = buf_ri;
4784   merge->buf_rj        = buf_rj;
4785   merge->coi           = PETSC_NULL;
4786   merge->coj           = PETSC_NULL;
4787   merge->owners_co     = PETSC_NULL;
4788 
4789   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4790 
4791   /* attach the supporting struct to B_mpi for reuse */
4792   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4793   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4794   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4795   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4796   *mpimat = B_mpi;
4797 
4798   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4799   PetscFunctionReturn(0);
4800 }
4801 
4802 #undef __FUNCT__
4803 #define __FUNCT__ "MatMerge_SeqsToMPI"
4804 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4805 {
4806   PetscErrorCode   ierr;
4807 
4808   PetscFunctionBegin;
4809   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4810   if (scall == MAT_INITIAL_MATRIX){
4811     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4812   }
4813   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4814   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4815   PetscFunctionReturn(0);
4816 }
4817 
4818 #undef __FUNCT__
4819 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4820 /*@
4821      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4822           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4823           with MatGetSize()
4824 
4825     Not Collective
4826 
4827    Input Parameters:
4828 +    A - the matrix
4829 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4830 
4831    Output Parameter:
4832 .    A_loc - the local sequential matrix generated
4833 
4834     Level: developer
4835 
4836 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4837 
4838 @*/
4839 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4840 {
4841   PetscErrorCode  ierr;
4842   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4843   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4844   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4845   MatScalar       *aa=a->a,*ba=b->a,*cam;
4846   PetscScalar     *ca;
4847   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4848   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4849   PetscBool       match;
4850 
4851   PetscFunctionBegin;
4852   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4853   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4854   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4855   if (scall == MAT_INITIAL_MATRIX){
4856     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4857     ci[0] = 0;
4858     for (i=0; i<am; i++){
4859       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4860     }
4861     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4862     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4863     k = 0;
4864     for (i=0; i<am; i++) {
4865       ncols_o = bi[i+1] - bi[i];
4866       ncols_d = ai[i+1] - ai[i];
4867       /* off-diagonal portion of A */
4868       for (jo=0; jo<ncols_o; jo++) {
4869         col = cmap[*bj];
4870         if (col >= cstart) break;
4871         cj[k]   = col; bj++;
4872         ca[k++] = *ba++;
4873       }
4874       /* diagonal portion of A */
4875       for (j=0; j<ncols_d; j++) {
4876         cj[k]   = cstart + *aj++;
4877         ca[k++] = *aa++;
4878       }
4879       /* off-diagonal portion of A */
4880       for (j=jo; j<ncols_o; j++) {
4881         cj[k]   = cmap[*bj++];
4882         ca[k++] = *ba++;
4883       }
4884     }
4885     /* put together the new matrix */
4886     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4887     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4888     /* Since these are PETSc arrays, change flags to free them as necessary. */
4889     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4890     mat->free_a  = PETSC_TRUE;
4891     mat->free_ij = PETSC_TRUE;
4892     mat->nonew   = 0;
4893   } else if (scall == MAT_REUSE_MATRIX){
4894     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4895     ci = mat->i; cj = mat->j; cam = mat->a;
4896     for (i=0; i<am; i++) {
4897       /* off-diagonal portion of A */
4898       ncols_o = bi[i+1] - bi[i];
4899       for (jo=0; jo<ncols_o; jo++) {
4900         col = cmap[*bj];
4901         if (col >= cstart) break;
4902         *cam++ = *ba++; bj++;
4903       }
4904       /* diagonal portion of A */
4905       ncols_d = ai[i+1] - ai[i];
4906       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4907       /* off-diagonal portion of A */
4908       for (j=jo; j<ncols_o; j++) {
4909         *cam++ = *ba++; bj++;
4910       }
4911     }
4912   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4913   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4914   PetscFunctionReturn(0);
4915 }
4916 
4917 #undef __FUNCT__
4918 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4919 /*@C
4920      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4921 
4922     Not Collective
4923 
4924    Input Parameters:
4925 +    A - the matrix
4926 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4927 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4928 
4929    Output Parameter:
4930 .    A_loc - the local sequential matrix generated
4931 
4932     Level: developer
4933 
4934 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4935 
4936 @*/
4937 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4938 {
4939   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4940   PetscErrorCode    ierr;
4941   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4942   IS                isrowa,iscola;
4943   Mat               *aloc;
4944   PetscBool       match;
4945 
4946   PetscFunctionBegin;
4947   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4948   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4949   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4950   if (!row){
4951     start = A->rmap->rstart; end = A->rmap->rend;
4952     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4953   } else {
4954     isrowa = *row;
4955   }
4956   if (!col){
4957     start = A->cmap->rstart;
4958     cmap  = a->garray;
4959     nzA   = a->A->cmap->n;
4960     nzB   = a->B->cmap->n;
4961     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4962     ncols = 0;
4963     for (i=0; i<nzB; i++) {
4964       if (cmap[i] < start) idx[ncols++] = cmap[i];
4965       else break;
4966     }
4967     imark = i;
4968     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4969     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4970     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4971   } else {
4972     iscola = *col;
4973   }
4974   if (scall != MAT_INITIAL_MATRIX){
4975     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4976     aloc[0] = *A_loc;
4977   }
4978   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4979   *A_loc = aloc[0];
4980   ierr = PetscFree(aloc);CHKERRQ(ierr);
4981   if (!row){
4982     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4983   }
4984   if (!col){
4985     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4986   }
4987   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4988   PetscFunctionReturn(0);
4989 }
4990 
4991 #undef __FUNCT__
4992 #define __FUNCT__ "MatGetBrowsOfAcols"
4993 /*@C
4994     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4995 
4996     Collective on Mat
4997 
4998    Input Parameters:
4999 +    A,B - the matrices in mpiaij format
5000 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5001 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5002 
5003    Output Parameter:
5004 +    rowb, colb - index sets of rows and columns of B to extract
5005 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
5006 -    B_seq - the sequential matrix generated
5007 
5008     Level: developer
5009 
5010 @*/
5011 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
5012 {
5013   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5014   PetscErrorCode    ierr;
5015   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5016   IS                isrowb,iscolb;
5017   Mat               *bseq;
5018 
5019   PetscFunctionBegin;
5020   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5021     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5022   }
5023   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5024 
5025   if (scall == MAT_INITIAL_MATRIX){
5026     start = A->cmap->rstart;
5027     cmap  = a->garray;
5028     nzA   = a->A->cmap->n;
5029     nzB   = a->B->cmap->n;
5030     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5031     ncols = 0;
5032     for (i=0; i<nzB; i++) {  /* row < local row index */
5033       if (cmap[i] < start) idx[ncols++] = cmap[i];
5034       else break;
5035     }
5036     imark = i;
5037     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5038     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5039     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5040     *brstart = imark;
5041     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5042   } else {
5043     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5044     isrowb = *rowb; iscolb = *colb;
5045     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5046     bseq[0] = *B_seq;
5047   }
5048   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5049   *B_seq = bseq[0];
5050   ierr = PetscFree(bseq);CHKERRQ(ierr);
5051   if (!rowb){
5052     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5053   } else {
5054     *rowb = isrowb;
5055   }
5056   if (!colb){
5057     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5058   } else {
5059     *colb = iscolb;
5060   }
5061   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5062   PetscFunctionReturn(0);
5063 }
5064 
5065 #undef __FUNCT__
5066 #define __FUNCT__ "MatGetBrowsOfAoCols"
5067 /*@C
5068     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5069     of the OFF-DIAGONAL portion of local A
5070 
5071     Collective on Mat
5072 
5073    Input Parameters:
5074 +    A,B - the matrices in mpiaij format
5075 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5076 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5077 .    startsj_r - similar to startsj for receives
5078 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5079 
5080    Output Parameter:
5081 +    B_oth - the sequential matrix generated
5082 
5083     Level: developer
5084 
5085 @*/
5086 PetscErrorCode  MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5087 {
5088   VecScatter_MPI_General *gen_to,*gen_from;
5089   PetscErrorCode         ierr;
5090   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5091   Mat_SeqAIJ             *b_oth;
5092   VecScatter             ctx=a->Mvctx;
5093   MPI_Comm               comm=((PetscObject)ctx)->comm;
5094   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5095   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5096   PetscScalar            *rvalues,*svalues;
5097   MatScalar              *b_otha,*bufa,*bufA;
5098   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5099   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5100   MPI_Status             *sstatus,rstatus;
5101   PetscMPIInt            jj;
5102   PetscInt               *cols,sbs,rbs;
5103   PetscScalar            *vals;
5104 
5105   PetscFunctionBegin;
5106   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5107     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5108   }
5109   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5110   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5111 
5112   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5113   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5114   rvalues  = gen_from->values; /* holds the length of receiving row */
5115   svalues  = gen_to->values;   /* holds the length of sending row */
5116   nrecvs   = gen_from->n;
5117   nsends   = gen_to->n;
5118 
5119   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5120   srow     = gen_to->indices;   /* local row index to be sent */
5121   sstarts  = gen_to->starts;
5122   sprocs   = gen_to->procs;
5123   sstatus  = gen_to->sstatus;
5124   sbs      = gen_to->bs;
5125   rstarts  = gen_from->starts;
5126   rprocs   = gen_from->procs;
5127   rbs      = gen_from->bs;
5128 
5129   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5130   if (scall == MAT_INITIAL_MATRIX){
5131     /* i-array */
5132     /*---------*/
5133     /*  post receives */
5134     for (i=0; i<nrecvs; i++){
5135       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5136       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5137       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5138     }
5139 
5140     /* pack the outgoing message */
5141     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5142     sstartsj[0] = 0;  rstartsj[0] = 0;
5143     len = 0; /* total length of j or a array to be sent */
5144     k = 0;
5145     for (i=0; i<nsends; i++){
5146       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5147       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5148       for (j=0; j<nrows; j++) {
5149         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5150         for (l=0; l<sbs; l++){
5151           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5152           rowlen[j*sbs+l] = ncols;
5153           len += ncols;
5154           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5155         }
5156         k++;
5157       }
5158       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5159       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5160     }
5161     /* recvs and sends of i-array are completed */
5162     i = nrecvs;
5163     while (i--) {
5164       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5165     }
5166     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5167 
5168     /* allocate buffers for sending j and a arrays */
5169     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5170     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5171 
5172     /* create i-array of B_oth */
5173     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5174     b_othi[0] = 0;
5175     len = 0; /* total length of j or a array to be received */
5176     k = 0;
5177     for (i=0; i<nrecvs; i++){
5178       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5179       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5180       for (j=0; j<nrows; j++) {
5181         b_othi[k+1] = b_othi[k] + rowlen[j];
5182         len += rowlen[j]; k++;
5183       }
5184       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5185     }
5186 
5187     /* allocate space for j and a arrrays of B_oth */
5188     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5189     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5190 
5191     /* j-array */
5192     /*---------*/
5193     /*  post receives of j-array */
5194     for (i=0; i<nrecvs; i++){
5195       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5196       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5197     }
5198 
5199     /* pack the outgoing message j-array */
5200     k = 0;
5201     for (i=0; i<nsends; i++){
5202       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5203       bufJ = bufj+sstartsj[i];
5204       for (j=0; j<nrows; j++) {
5205         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5206         for (ll=0; ll<sbs; ll++){
5207           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5208           for (l=0; l<ncols; l++){
5209             *bufJ++ = cols[l];
5210           }
5211           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5212         }
5213       }
5214       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5215     }
5216 
5217     /* recvs and sends of j-array are completed */
5218     i = nrecvs;
5219     while (i--) {
5220       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5221     }
5222     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5223   } else if (scall == MAT_REUSE_MATRIX){
5224     sstartsj = *startsj;
5225     rstartsj = *startsj_r;
5226     bufa     = *bufa_ptr;
5227     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5228     b_otha   = b_oth->a;
5229   } else {
5230     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5231   }
5232 
5233   /* a-array */
5234   /*---------*/
5235   /*  post receives of a-array */
5236   for (i=0; i<nrecvs; i++){
5237     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5238     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5239   }
5240 
5241   /* pack the outgoing message a-array */
5242   k = 0;
5243   for (i=0; i<nsends; i++){
5244     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5245     bufA = bufa+sstartsj[i];
5246     for (j=0; j<nrows; j++) {
5247       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5248       for (ll=0; ll<sbs; ll++){
5249         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5250         for (l=0; l<ncols; l++){
5251           *bufA++ = vals[l];
5252         }
5253         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5254       }
5255     }
5256     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5257   }
5258   /* recvs and sends of a-array are completed */
5259   i = nrecvs;
5260   while (i--) {
5261     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5262   }
5263   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5264   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5265 
5266   if (scall == MAT_INITIAL_MATRIX){
5267     /* put together the new matrix */
5268     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5269 
5270     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5271     /* Since these are PETSc arrays, change flags to free them as necessary. */
5272     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5273     b_oth->free_a  = PETSC_TRUE;
5274     b_oth->free_ij = PETSC_TRUE;
5275     b_oth->nonew   = 0;
5276 
5277     ierr = PetscFree(bufj);CHKERRQ(ierr);
5278     if (!startsj || !bufa_ptr){
5279       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5280       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5281     } else {
5282       *startsj   = sstartsj;
5283       *startsj_r = rstartsj;
5284       *bufa_ptr  = bufa;
5285     }
5286   }
5287   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5288   PetscFunctionReturn(0);
5289 }
5290 
5291 #undef __FUNCT__
5292 #define __FUNCT__ "MatGetCommunicationStructs"
5293 /*@C
5294   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5295 
5296   Not Collective
5297 
5298   Input Parameters:
5299 . A - The matrix in mpiaij format
5300 
5301   Output Parameter:
5302 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5303 . colmap - A map from global column index to local index into lvec
5304 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5305 
5306   Level: developer
5307 
5308 @*/
5309 #if defined (PETSC_USE_CTABLE)
5310 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5311 #else
5312 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5313 #endif
5314 {
5315   Mat_MPIAIJ *a;
5316 
5317   PetscFunctionBegin;
5318   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5319   PetscValidPointer(lvec, 2);
5320   PetscValidPointer(colmap, 3);
5321   PetscValidPointer(multScatter, 4);
5322   a = (Mat_MPIAIJ *) A->data;
5323   if (lvec) *lvec = a->lvec;
5324   if (colmap) *colmap = a->colmap;
5325   if (multScatter) *multScatter = a->Mvctx;
5326   PetscFunctionReturn(0);
5327 }
5328 
5329 EXTERN_C_BEGIN
5330 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5331 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5332 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5333 EXTERN_C_END
5334 
5335 #undef __FUNCT__
5336 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5337 /*
5338     Computes (B'*A')' since computing B*A directly is untenable
5339 
5340                n                       p                          p
5341         (              )       (              )         (                  )
5342       m (      A       )  *  n (       B      )   =   m (         C        )
5343         (              )       (              )         (                  )
5344 
5345 */
5346 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5347 {
5348   PetscErrorCode     ierr;
5349   Mat                At,Bt,Ct;
5350 
5351   PetscFunctionBegin;
5352   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5353   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5354   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5355   ierr = MatDestroy(&At);CHKERRQ(ierr);
5356   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5357   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5358   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5359   PetscFunctionReturn(0);
5360 }
5361 
5362 #undef __FUNCT__
5363 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5364 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5365 {
5366   PetscErrorCode ierr;
5367   PetscInt       m=A->rmap->n,n=B->cmap->n;
5368   Mat            Cmat;
5369 
5370   PetscFunctionBegin;
5371   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5372   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5373   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5374   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5375   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5376   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5377   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5378   *C   = Cmat;
5379   PetscFunctionReturn(0);
5380 }
5381 
5382 /* ----------------------------------------------------------------*/
5383 #undef __FUNCT__
5384 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5385 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5386 {
5387   PetscErrorCode ierr;
5388 
5389   PetscFunctionBegin;
5390   if (scall == MAT_INITIAL_MATRIX){
5391     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5392   }
5393   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5394   PetscFunctionReturn(0);
5395 }
5396 
5397 EXTERN_C_BEGIN
5398 #if defined(PETSC_HAVE_MUMPS)
5399 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5400 #endif
5401 #if defined(PETSC_HAVE_PASTIX)
5402 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5403 #endif
5404 #if defined(PETSC_HAVE_SUPERLU_DIST)
5405 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5406 #endif
5407 #if defined(PETSC_HAVE_SPOOLES)
5408 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5409 #endif
5410 EXTERN_C_END
5411 
5412 /*MC
5413    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5414 
5415    Options Database Keys:
5416 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5417 
5418   Level: beginner
5419 
5420 .seealso: MatCreateMPIAIJ()
5421 M*/
5422 
5423 EXTERN_C_BEGIN
5424 #undef __FUNCT__
5425 #define __FUNCT__ "MatCreate_MPIAIJ"
5426 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5427 {
5428   Mat_MPIAIJ     *b;
5429   PetscErrorCode ierr;
5430   PetscMPIInt    size;
5431 
5432   PetscFunctionBegin;
5433   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5434 
5435   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5436   B->data         = (void*)b;
5437   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5438   B->rmap->bs     = 1;
5439   B->assembled    = PETSC_FALSE;
5440 
5441   B->insertmode   = NOT_SET_VALUES;
5442   b->size         = size;
5443   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5444 
5445   /* build cache for off array entries formed */
5446   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5447   b->donotstash  = PETSC_FALSE;
5448   b->colmap      = 0;
5449   b->garray      = 0;
5450   b->roworiented = PETSC_TRUE;
5451 
5452   /* stuff used for matrix vector multiply */
5453   b->lvec      = PETSC_NULL;
5454   b->Mvctx     = PETSC_NULL;
5455 
5456   /* stuff for MatGetRow() */
5457   b->rowindices   = 0;
5458   b->rowvalues    = 0;
5459   b->getrowactive = PETSC_FALSE;
5460 
5461 #if defined(PETSC_HAVE_SPOOLES)
5462   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5463                                      "MatGetFactor_mpiaij_spooles",
5464                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5465 #endif
5466 #if defined(PETSC_HAVE_MUMPS)
5467   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5468                                      "MatGetFactor_aij_mumps",
5469                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5470 #endif
5471 #if defined(PETSC_HAVE_PASTIX)
5472   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5473 					   "MatGetFactor_mpiaij_pastix",
5474 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5475 #endif
5476 #if defined(PETSC_HAVE_SUPERLU_DIST)
5477   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5478                                      "MatGetFactor_mpiaij_superlu_dist",
5479                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5480 #endif
5481   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5482                                      "MatStoreValues_MPIAIJ",
5483                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5484   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5485                                      "MatRetrieveValues_MPIAIJ",
5486                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5487   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5488 				     "MatGetDiagonalBlock_MPIAIJ",
5489                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5490   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5491 				     "MatIsTranspose_MPIAIJ",
5492 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5493   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5494 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5495 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5496   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5497 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5498 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5499   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5500 				     "MatDiagonalScaleLocal_MPIAIJ",
5501 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5502   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5503                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5504                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5505   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5506                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5507                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5508   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5509                                      "MatConvert_MPIAIJ_MPISBAIJ",
5510                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5511   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5512                                      "MatMatMult_MPIDense_MPIAIJ",
5513                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5514   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5515                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5516                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5517   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5518                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5519                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5520   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5521   PetscFunctionReturn(0);
5522 }
5523 EXTERN_C_END
5524 
5525 #undef __FUNCT__
5526 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5527 /*@
5528      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5529          and "off-diagonal" part of the matrix in CSR format.
5530 
5531    Collective on MPI_Comm
5532 
5533    Input Parameters:
5534 +  comm - MPI communicator
5535 .  m - number of local rows (Cannot be PETSC_DECIDE)
5536 .  n - This value should be the same as the local size used in creating the
5537        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5538        calculated if N is given) For square matrices n is almost always m.
5539 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5540 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5541 .   i - row indices for "diagonal" portion of matrix
5542 .   j - column indices
5543 .   a - matrix values
5544 .   oi - row indices for "off-diagonal" portion of matrix
5545 .   oj - column indices
5546 -   oa - matrix values
5547 
5548    Output Parameter:
5549 .   mat - the matrix
5550 
5551    Level: advanced
5552 
5553    Notes:
5554        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5555        must free the arrays once the matrix has been destroyed and not before.
5556 
5557        The i and j indices are 0 based
5558 
5559        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5560 
5561        This sets local rows and cannot be used to set off-processor values.
5562 
5563        You cannot later use MatSetValues() to change values in this matrix.
5564 
5565 .keywords: matrix, aij, compressed row, sparse, parallel
5566 
5567 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5568           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5569 @*/
5570 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5571 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5572 {
5573   PetscErrorCode ierr;
5574   Mat_MPIAIJ     *maij;
5575 
5576  PetscFunctionBegin;
5577   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5578   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5579   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5580   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5581   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5582   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5583   maij = (Mat_MPIAIJ*) (*mat)->data;
5584   maij->donotstash     = PETSC_TRUE;
5585   (*mat)->preallocated = PETSC_TRUE;
5586 
5587   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5588   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5589   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5590   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5591 
5592   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5593   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5594 
5595   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5596   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5597   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5598   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5599 
5600   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5601   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5602   PetscFunctionReturn(0);
5603 }
5604 
5605 /*
5606     Special version for direct calls from Fortran
5607 */
5608 #include <private/fortranimpl.h>
5609 
5610 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5611 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5612 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5613 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5614 #endif
5615 
5616 /* Change these macros so can be used in void function */
5617 #undef CHKERRQ
5618 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5619 #undef SETERRQ2
5620 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5621 #undef SETERRQ
5622 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5623 
5624 EXTERN_C_BEGIN
5625 #undef __FUNCT__
5626 #define __FUNCT__ "matsetvaluesmpiaij_"
5627 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5628 {
5629   Mat             mat = *mmat;
5630   PetscInt        m = *mm, n = *mn;
5631   InsertMode      addv = *maddv;
5632   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5633   PetscScalar     value;
5634   PetscErrorCode  ierr;
5635 
5636   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5637   if (mat->insertmode == NOT_SET_VALUES) {
5638     mat->insertmode = addv;
5639   }
5640 #if defined(PETSC_USE_DEBUG)
5641   else if (mat->insertmode != addv) {
5642     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5643   }
5644 #endif
5645   {
5646   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5647   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5648   PetscBool       roworiented = aij->roworiented;
5649 
5650   /* Some Variables required in the macro */
5651   Mat             A = aij->A;
5652   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5653   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5654   MatScalar       *aa = a->a;
5655   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5656   Mat             B = aij->B;
5657   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5658   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5659   MatScalar       *ba = b->a;
5660 
5661   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5662   PetscInt        nonew = a->nonew;
5663   MatScalar       *ap1,*ap2;
5664 
5665   PetscFunctionBegin;
5666   for (i=0; i<m; i++) {
5667     if (im[i] < 0) continue;
5668 #if defined(PETSC_USE_DEBUG)
5669     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5670 #endif
5671     if (im[i] >= rstart && im[i] < rend) {
5672       row      = im[i] - rstart;
5673       lastcol1 = -1;
5674       rp1      = aj + ai[row];
5675       ap1      = aa + ai[row];
5676       rmax1    = aimax[row];
5677       nrow1    = ailen[row];
5678       low1     = 0;
5679       high1    = nrow1;
5680       lastcol2 = -1;
5681       rp2      = bj + bi[row];
5682       ap2      = ba + bi[row];
5683       rmax2    = bimax[row];
5684       nrow2    = bilen[row];
5685       low2     = 0;
5686       high2    = nrow2;
5687 
5688       for (j=0; j<n; j++) {
5689         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5690         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5691         if (in[j] >= cstart && in[j] < cend){
5692           col = in[j] - cstart;
5693           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5694         } else if (in[j] < 0) continue;
5695 #if defined(PETSC_USE_DEBUG)
5696         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5697 #endif
5698         else {
5699           if (mat->was_assembled) {
5700             if (!aij->colmap) {
5701               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5702             }
5703 #if defined (PETSC_USE_CTABLE)
5704             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5705 	    col--;
5706 #else
5707             col = aij->colmap[in[j]] - 1;
5708 #endif
5709             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5710               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5711               col =  in[j];
5712               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5713               B = aij->B;
5714               b = (Mat_SeqAIJ*)B->data;
5715               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5716               rp2      = bj + bi[row];
5717               ap2      = ba + bi[row];
5718               rmax2    = bimax[row];
5719               nrow2    = bilen[row];
5720               low2     = 0;
5721               high2    = nrow2;
5722               bm       = aij->B->rmap->n;
5723               ba = b->a;
5724             }
5725           } else col = in[j];
5726           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5727         }
5728       }
5729     } else {
5730       if (!aij->donotstash) {
5731         if (roworiented) {
5732           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5733         } else {
5734           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5735         }
5736       }
5737     }
5738   }}
5739   PetscFunctionReturnVoid();
5740 }
5741 EXTERN_C_END
5742 
5743