xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 435ecbd237d325b5792bcb4aa529299d2bb3406d)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
207 { \
208     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
209     lastcol1 = col;\
210     while (high1-low1 > 5) { \
211       t = (low1+high1)/2; \
212       if (rp1[t] > col) high1 = t; \
213       else             low1  = t; \
214     } \
215       for (_i=low1; _i<high1; _i++) { \
216         if (rp1[_i] > col) break; \
217         if (rp1[_i] == col) { \
218           if (addv == ADD_VALUES) ap1[_i] += value;   \
219           else                    ap1[_i] = value; \
220           goto a_noinsert; \
221         } \
222       }  \
223       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
224       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
225       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
226       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
227       N = nrow1++ - 1; a->nz++; high1++; \
228       /* shift up all the later entries in this row */ \
229       for (ii=N; ii>=_i; ii--) { \
230         rp1[ii+1] = rp1[ii]; \
231         ap1[ii+1] = ap1[ii]; \
232       } \
233       rp1[_i] = col;  \
234       ap1[_i] = value;  \
235       a_noinsert: ; \
236       ailen[row] = nrow1; \
237 }
238 
239 
240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
241 { \
242     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
243     lastcol2 = col;\
244     while (high2-low2 > 5) { \
245       t = (low2+high2)/2; \
246       if (rp2[t] > col) high2 = t; \
247       else             low2  = t; \
248     } \
249     for (_i=low2; _i<high2; _i++) {		\
250       if (rp2[_i] > col) break;			\
251       if (rp2[_i] == col) {			      \
252 	if (addv == ADD_VALUES) ap2[_i] += value;     \
253 	else                    ap2[_i] = value;      \
254 	goto b_noinsert;			      \
255       }						      \
256     }							      \
257     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
258     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
259     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
260     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
261     N = nrow2++ - 1; b->nz++; high2++;					\
262     /* shift up all the later entries in this row */			\
263     for (ii=N; ii>=_i; ii--) {						\
264       rp2[ii+1] = rp2[ii];						\
265       ap2[ii+1] = ap2[ii];						\
266     }									\
267     rp2[_i] = col;							\
268     ap2[_i] = value;							\
269     b_noinsert: ;								\
270     bilen[row] = nrow2;							\
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
276 {
277   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
278   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
279   PetscErrorCode ierr;
280   PetscInt       l,*garray = mat->garray,diag;
281 
282   PetscFunctionBegin;
283   /* code only works for square matrices A */
284 
285   /* find size of row to the left of the diagonal part */
286   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
287   row  = row - diag;
288   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
289     if (garray[b->j[b->i[row]+l]] > diag) break;
290   }
291   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
292 
293   /* diagonal part */
294   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
295 
296   /* right of diagonal part */
297   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatSetValues_MPIAIJ"
303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscScalar    value;
307   PetscErrorCode ierr;
308   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
309   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
310   PetscTruth     roworiented = aij->roworiented;
311 
312   /* Some Variables required in the macro */
313   Mat            A = aij->A;
314   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
315   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
316   MatScalar      *aa = a->a;
317   PetscTruth     ignorezeroentries = a->ignorezeroentries;
318   Mat            B = aij->B;
319   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
320   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
321   MatScalar      *ba = b->a;
322 
323   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
324   PetscInt       nonew = a->nonew;
325   MatScalar      *ap1,*ap2;
326 
327   PetscFunctionBegin;
328   if (v) PetscValidScalarPointer(v,6);
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (!aij->donotstash) {
394         if (roworiented) {
395           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
396         } else {
397           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
398         }
399       }
400     }
401   }
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatGetValues_MPIAIJ"
407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
408 {
409   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
410   PetscErrorCode ierr;
411   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
412   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
413 
414   PetscFunctionBegin;
415   for (i=0; i<m; i++) {
416     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
417     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
418     if (idxm[i] >= rstart && idxm[i] < rend) {
419       row = idxm[i] - rstart;
420       for (j=0; j<n; j++) {
421         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
422         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
423         if (idxn[j] >= cstart && idxn[j] < cend){
424           col = idxn[j] - cstart;
425           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
426         } else {
427           if (!aij->colmap) {
428             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
429           }
430 #if defined (PETSC_USE_CTABLE)
431           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
432           col --;
433 #else
434           col = aij->colmap[idxn[j]] - 1;
435 #endif
436           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
437           else {
438             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
439           }
440         }
441       }
442     } else {
443       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
444     }
445   }
446   PetscFunctionReturn(0);
447 }
448 
449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
454 {
455   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
456   PetscErrorCode ierr;
457   PetscInt       nstash,reallocs;
458   InsertMode     addv;
459 
460   PetscFunctionBegin;
461   if (aij->donotstash) {
462     PetscFunctionReturn(0);
463   }
464 
465   /* make sure all processors are either in INSERTMODE or ADDMODE */
466   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
467   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
468   mat->insertmode = addv; /* in case this processor had no cache */
469 
470   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
471   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
472   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNCT__
477 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
478 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
479 {
480   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
481   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
482   PetscErrorCode ierr;
483   PetscMPIInt    n;
484   PetscInt       i,j,rstart,ncols,flg;
485   PetscInt       *row,*col;
486   PetscTruth     other_disassembled;
487   PetscScalar    *val;
488   InsertMode     addv = mat->insertmode;
489 
490   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
491   PetscFunctionBegin;
492   if (!aij->donotstash) {
493     while (1) {
494       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
495       if (!flg) break;
496 
497       for (i=0; i<n;) {
498         /* Now identify the consecutive vals belonging to the same row */
499         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
500         if (j < n) ncols = j-i;
501         else       ncols = n-i;
502         /* Now assemble all these values with a single function call */
503         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
504         i = j;
505       }
506     }
507     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
508   }
509   a->compressedrow.use     = PETSC_FALSE;
510   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
511   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
512 
513   /* determine if any processor has disassembled, if so we must
514      also disassemble ourselfs, in order that we may reassemble. */
515   /*
516      if nonzero structure of submatrix B cannot change then we know that
517      no processor disassembled thus we can skip this stuff
518   */
519   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
520     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
521     if (mat->was_assembled && !other_disassembled) {
522       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523     }
524   }
525   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
526     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
527   }
528   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
529   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
530   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
531   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
532 
533   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
534   aij->rowvalues = 0;
535 
536   /* used by MatAXPY() */
537   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
538   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
539 
540   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
541   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
542   PetscFunctionReturn(0);
543 }
544 
545 #undef __FUNCT__
546 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
547 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
548 {
549   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
550   PetscErrorCode ierr;
551 
552   PetscFunctionBegin;
553   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
554   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
555   PetscFunctionReturn(0);
556 }
557 
558 #undef __FUNCT__
559 #define __FUNCT__ "MatZeroRows_MPIAIJ"
560 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
561 {
562   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
563   PetscErrorCode ierr;
564   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
565   PetscInt       i,*owners = A->rmap->range;
566   PetscInt       *nprocs,j,idx,nsends,row;
567   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
568   PetscInt       *rvalues,count,base,slen,*source;
569   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
570   MPI_Comm       comm = ((PetscObject)A)->comm;
571   MPI_Request    *send_waits,*recv_waits;
572   MPI_Status     recv_status,*send_status;
573 #if defined(PETSC_DEBUG)
574   PetscTruth     found = PETSC_FALSE;
575 #endif
576 
577   PetscFunctionBegin;
578   /*  first count number of contributors to each processor */
579   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
580   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
581   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
582   j = 0;
583   for (i=0; i<N; i++) {
584     if (lastidx > (idx = rows[i])) j = 0;
585     lastidx = idx;
586     for (; j<size; j++) {
587       if (idx >= owners[j] && idx < owners[j+1]) {
588         nprocs[2*j]++;
589         nprocs[2*j+1] = 1;
590         owner[i] = j;
591 #if defined(PETSC_DEBUG)
592         found = PETSC_TRUE;
593 #endif
594         break;
595       }
596     }
597 #if defined(PETSC_DEBUG)
598     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
599     found = PETSC_FALSE;
600 #endif
601   }
602   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
603 
604   /* inform other processors of number of messages and max length*/
605   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
606 
607   /* post receives:   */
608   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
609   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
610   for (i=0; i<nrecvs; i++) {
611     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
612   }
613 
614   /* do sends:
615       1) starts[i] gives the starting index in svalues for stuff going to
616          the ith processor
617   */
618   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
619   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
620   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
621   starts[0] = 0;
622   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
623   for (i=0; i<N; i++) {
624     svalues[starts[owner[i]]++] = rows[i];
625   }
626 
627   starts[0] = 0;
628   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
629   count = 0;
630   for (i=0; i<size; i++) {
631     if (nprocs[2*i+1]) {
632       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
633     }
634   }
635   ierr = PetscFree(starts);CHKERRQ(ierr);
636 
637   base = owners[rank];
638 
639   /*  wait on receives */
640   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
641   count  = nrecvs; slen = 0;
642   while (count) {
643     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
644     /* unpack receives into our local space */
645     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
646     source[imdex]  = recv_status.MPI_SOURCE;
647     lens[imdex]    = n;
648     slen          += n;
649     count--;
650   }
651   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
652 
653   /* move the data into the send scatter */
654   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
655   count = 0;
656   for (i=0; i<nrecvs; i++) {
657     values = rvalues + i*nmax;
658     for (j=0; j<lens[i]; j++) {
659       lrows[count++] = values[j] - base;
660     }
661   }
662   ierr = PetscFree(rvalues);CHKERRQ(ierr);
663   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
664   ierr = PetscFree(owner);CHKERRQ(ierr);
665   ierr = PetscFree(nprocs);CHKERRQ(ierr);
666 
667   /* actually zap the local rows */
668   /*
669         Zero the required rows. If the "diagonal block" of the matrix
670      is square and the user wishes to set the diagonal we use separate
671      code so that MatSetValues() is not called for each diagonal allocating
672      new memory, thus calling lots of mallocs and slowing things down.
673 
674   */
675   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
676   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
677   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
678     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
679   } else if (diag != 0.0) {
680     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
681     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
682       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
683 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
684     }
685     for (i = 0; i < slen; i++) {
686       row  = lrows[i] + rstart;
687       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
688     }
689     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
690     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
691   } else {
692     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
693   }
694   ierr = PetscFree(lrows);CHKERRQ(ierr);
695 
696   /* wait on sends */
697   if (nsends) {
698     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
699     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
700     ierr = PetscFree(send_status);CHKERRQ(ierr);
701   }
702   ierr = PetscFree(send_waits);CHKERRQ(ierr);
703   ierr = PetscFree(svalues);CHKERRQ(ierr);
704 
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatMult_MPIAIJ"
710 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
711 {
712   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
713   PetscErrorCode ierr;
714   PetscInt       nt;
715 
716   PetscFunctionBegin;
717   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
718   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
719   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
720   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
721   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
722   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
723   PetscFunctionReturn(0);
724 }
725 
726 #undef __FUNCT__
727 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
728 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
729 {
730   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
731   PetscErrorCode ierr;
732 
733   PetscFunctionBegin;
734   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
735   PetscFunctionReturn(0);
736 }
737 
738 #undef __FUNCT__
739 #define __FUNCT__ "MatMultAdd_MPIAIJ"
740 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
741 {
742   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
743   PetscErrorCode ierr;
744 
745   PetscFunctionBegin;
746   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
747   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
748   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
749   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
750   PetscFunctionReturn(0);
751 }
752 
753 #undef __FUNCT__
754 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
755 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
756 {
757   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
758   PetscErrorCode ierr;
759   PetscTruth     merged;
760 
761   PetscFunctionBegin;
762   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
763   /* do nondiagonal part */
764   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
765   if (!merged) {
766     /* send it on its way */
767     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
768     /* do local part */
769     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
770     /* receive remote parts: note this assumes the values are not actually */
771     /* added in yy until the next line, */
772     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773   } else {
774     /* do local part */
775     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
776     /* send it on its way */
777     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
778     /* values actually were received in the Begin() but we need to call this nop */
779     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
780   }
781   PetscFunctionReturn(0);
782 }
783 
784 EXTERN_C_BEGIN
785 #undef __FUNCT__
786 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
787 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
788 {
789   MPI_Comm       comm;
790   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
791   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
792   IS             Me,Notme;
793   PetscErrorCode ierr;
794   PetscInt       M,N,first,last,*notme,i;
795   PetscMPIInt    size;
796 
797   PetscFunctionBegin;
798 
799   /* Easy test: symmetric diagonal block */
800   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
801   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
802   if (!*f) PetscFunctionReturn(0);
803   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
804   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
805   if (size == 1) PetscFunctionReturn(0);
806 
807   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
808   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
809   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
810   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
811   for (i=0; i<first; i++) notme[i] = i;
812   for (i=last; i<M; i++) notme[i-last+first] = i;
813   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
814   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
815   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
816   Aoff = Aoffs[0];
817   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
818   Boff = Boffs[0];
819   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
820   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
821   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
822   ierr = ISDestroy(Me);CHKERRQ(ierr);
823   ierr = ISDestroy(Notme);CHKERRQ(ierr);
824 
825   PetscFunctionReturn(0);
826 }
827 EXTERN_C_END
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
831 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
832 {
833   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
834   PetscErrorCode ierr;
835 
836   PetscFunctionBegin;
837   /* do nondiagonal part */
838   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
839   /* send it on its way */
840   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
841   /* do local part */
842   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
843   /* receive remote parts */
844   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
845   PetscFunctionReturn(0);
846 }
847 
848 /*
849   This only works correctly for square matrices where the subblock A->A is the
850    diagonal block
851 */
852 #undef __FUNCT__
853 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
854 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
855 {
856   PetscErrorCode ierr;
857   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
858 
859   PetscFunctionBegin;
860   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
861   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
862   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatScale_MPIAIJ"
868 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
869 {
870   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
871   PetscErrorCode ierr;
872 
873   PetscFunctionBegin;
874   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
875   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
876   PetscFunctionReturn(0);
877 }
878 
879 #undef __FUNCT__
880 #define __FUNCT__ "MatDestroy_MPIAIJ"
881 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
882 {
883   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
884   PetscErrorCode ierr;
885 
886   PetscFunctionBegin;
887 #if defined(PETSC_USE_LOG)
888   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
889 #endif
890   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
891   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
892   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
893   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
894 #if defined (PETSC_USE_CTABLE)
895   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
896 #else
897   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
898 #endif
899   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
900   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
901   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
902   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
903   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
904   ierr = PetscFree(aij);CHKERRQ(ierr);
905 
906   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
908   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
909   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
910   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
911   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
912   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
913   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
915   PetscFunctionReturn(0);
916 }
917 
918 #undef __FUNCT__
919 #define __FUNCT__ "MatView_MPIAIJ_Binary"
920 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
921 {
922   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
923   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
924   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
925   PetscErrorCode    ierr;
926   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
927   int               fd;
928   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
929   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
930   PetscScalar       *column_values;
931 
932   PetscFunctionBegin;
933   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
934   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
935   nz   = A->nz + B->nz;
936   if (!rank) {
937     header[0] = MAT_FILE_CLASSID;
938     header[1] = mat->rmap->N;
939     header[2] = mat->cmap->N;
940     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
941     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
942     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
943     /* get largest number of rows any processor has */
944     rlen = mat->rmap->n;
945     range = mat->rmap->range;
946     for (i=1; i<size; i++) {
947       rlen = PetscMax(rlen,range[i+1] - range[i]);
948     }
949   } else {
950     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
951     rlen = mat->rmap->n;
952   }
953 
954   /* load up the local row counts */
955   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
956   for (i=0; i<mat->rmap->n; i++) {
957     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
958   }
959 
960   /* store the row lengths to the file */
961   if (!rank) {
962     MPI_Status status;
963     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
964     for (i=1; i<size; i++) {
965       rlen = range[i+1] - range[i];
966       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
967       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
968     }
969   } else {
970     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
971   }
972   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
973 
974   /* load up the local column indices */
975   nzmax = nz; /* )th processor needs space a largest processor needs */
976   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
977   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
978   cnt  = 0;
979   for (i=0; i<mat->rmap->n; i++) {
980     for (j=B->i[i]; j<B->i[i+1]; j++) {
981       if ( (col = garray[B->j[j]]) > cstart) break;
982       column_indices[cnt++] = col;
983     }
984     for (k=A->i[i]; k<A->i[i+1]; k++) {
985       column_indices[cnt++] = A->j[k] + cstart;
986     }
987     for (; j<B->i[i+1]; j++) {
988       column_indices[cnt++] = garray[B->j[j]];
989     }
990   }
991   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
992 
993   /* store the column indices to the file */
994   if (!rank) {
995     MPI_Status status;
996     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
997     for (i=1; i<size; i++) {
998       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
999       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1000       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1001       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1002     }
1003   } else {
1004     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1005     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1006   }
1007   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1008 
1009   /* load up the local column values */
1010   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1011   cnt  = 0;
1012   for (i=0; i<mat->rmap->n; i++) {
1013     for (j=B->i[i]; j<B->i[i+1]; j++) {
1014       if ( garray[B->j[j]] > cstart) break;
1015       column_values[cnt++] = B->a[j];
1016     }
1017     for (k=A->i[i]; k<A->i[i+1]; k++) {
1018       column_values[cnt++] = A->a[k];
1019     }
1020     for (; j<B->i[i+1]; j++) {
1021       column_values[cnt++] = B->a[j];
1022     }
1023   }
1024   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1025 
1026   /* store the column values to the file */
1027   if (!rank) {
1028     MPI_Status status;
1029     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1030     for (i=1; i<size; i++) {
1031       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1032       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1033       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1034       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1035     }
1036   } else {
1037     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1038     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1039   }
1040   ierr = PetscFree(column_values);CHKERRQ(ierr);
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 #undef __FUNCT__
1045 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1046 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1047 {
1048   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1049   PetscErrorCode    ierr;
1050   PetscMPIInt       rank = aij->rank,size = aij->size;
1051   PetscTruth        isdraw,iascii,isbinary;
1052   PetscViewer       sviewer;
1053   PetscViewerFormat format;
1054 
1055   PetscFunctionBegin;
1056   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1057   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1058   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1059   if (iascii) {
1060     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1061     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1062       MatInfo    info;
1063       PetscTruth inodes;
1064 
1065       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1066       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1067       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1068       if (!inodes) {
1069         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1070 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1071       } else {
1072         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1073 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1074       }
1075       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1076       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1077       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1078       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1079       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1080       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1081       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1082       PetscFunctionReturn(0);
1083     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1084       PetscInt   inodecount,inodelimit,*inodes;
1085       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1086       if (inodes) {
1087         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1088       } else {
1089         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1090       }
1091       PetscFunctionReturn(0);
1092     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1093       PetscFunctionReturn(0);
1094     }
1095   } else if (isbinary) {
1096     if (size == 1) {
1097       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1098       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1099     } else {
1100       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1101     }
1102     PetscFunctionReturn(0);
1103   } else if (isdraw) {
1104     PetscDraw  draw;
1105     PetscTruth isnull;
1106     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1107     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1108   }
1109 
1110   if (size == 1) {
1111     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1112     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1113   } else {
1114     /* assemble the entire matrix onto first processor. */
1115     Mat         A;
1116     Mat_SeqAIJ  *Aloc;
1117     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1118     MatScalar   *a;
1119 
1120     if (mat->rmap->N > 1024) {
1121       PetscTruth flg = PETSC_FALSE;
1122 
1123       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1124       if (!flg) {
1125         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1126       }
1127     }
1128 
1129     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1130     if (!rank) {
1131       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1132     } else {
1133       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1134     }
1135     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1136     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1137     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1138     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1139 
1140     /* copy over the A part */
1141     Aloc = (Mat_SeqAIJ*)aij->A->data;
1142     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1143     row = mat->rmap->rstart;
1144     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1145     for (i=0; i<m; i++) {
1146       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1147       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1148     }
1149     aj = Aloc->j;
1150     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1151 
1152     /* copy over the B part */
1153     Aloc = (Mat_SeqAIJ*)aij->B->data;
1154     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1155     row  = mat->rmap->rstart;
1156     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1157     ct   = cols;
1158     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1159     for (i=0; i<m; i++) {
1160       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1161       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1162     }
1163     ierr = PetscFree(ct);CHKERRQ(ierr);
1164     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1165     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1166     /*
1167        Everyone has to call to draw the matrix since the graphics waits are
1168        synchronized across all processors that share the PetscDraw object
1169     */
1170     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1171     if (!rank) {
1172       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1173       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1174     }
1175     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1176     ierr = MatDestroy(A);CHKERRQ(ierr);
1177   }
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 #undef __FUNCT__
1182 #define __FUNCT__ "MatView_MPIAIJ"
1183 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1184 {
1185   PetscErrorCode ierr;
1186   PetscTruth     iascii,isdraw,issocket,isbinary;
1187 
1188   PetscFunctionBegin;
1189   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1190   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1191   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1192   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1193   if (iascii || isdraw || isbinary || issocket) {
1194     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1195   } else {
1196     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1197   }
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatSOR_MPIAIJ"
1203 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1204 {
1205   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1206   PetscErrorCode ierr;
1207   Vec            bb1 = 0;
1208   PetscTruth     hasop;
1209 
1210   PetscFunctionBegin;
1211   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1212     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1213   }
1214 
1215   if (flag == SOR_APPLY_UPPER) {
1216     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1217     PetscFunctionReturn(0);
1218   }
1219 
1220   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1221     if (flag & SOR_ZERO_INITIAL_GUESS) {
1222       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1223       its--;
1224     }
1225 
1226     while (its--) {
1227       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1228       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1229 
1230       /* update rhs: bb1 = bb - B*x */
1231       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1232       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1233 
1234       /* local sweep */
1235       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1236     }
1237   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1238     if (flag & SOR_ZERO_INITIAL_GUESS) {
1239       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1240       its--;
1241     }
1242     while (its--) {
1243       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1244       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245 
1246       /* update rhs: bb1 = bb - B*x */
1247       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1248       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1249 
1250       /* local sweep */
1251       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1252     }
1253   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1254     if (flag & SOR_ZERO_INITIAL_GUESS) {
1255       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1256       its--;
1257     }
1258     while (its--) {
1259       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1260       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1261 
1262       /* update rhs: bb1 = bb - B*x */
1263       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1264       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1265 
1266       /* local sweep */
1267       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1268     }
1269   }  else if (flag & SOR_EISENSTAT) {
1270     Vec         xx1;
1271 
1272     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1273     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1274 
1275     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1276     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1277     if (!mat->diag) {
1278       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1279       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1280     }
1281     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1282     if (hasop) {
1283       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1284     } else {
1285       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1286     }
1287     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1288 
1289     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1290 
1291     /* local sweep */
1292     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1293     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1294     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1295   } else {
1296     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1297   }
1298 
1299   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatPermute_MPIAIJ"
1305 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1306 {
1307   MPI_Comm       comm,pcomm;
1308   PetscInt       first,local_size,nrows;
1309   const PetscInt *rows;
1310   PetscMPIInt    size;
1311   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1312   PetscErrorCode ierr;
1313 
1314   PetscFunctionBegin;
1315   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1316   /* make a collective version of 'rowp' */
1317   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1318   if (pcomm==comm) {
1319     crowp = rowp;
1320   } else {
1321     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1322     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1323     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1324     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1325   }
1326   /* collect the global row permutation and invert it */
1327   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1328   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1329   if (pcomm!=comm) {
1330     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1331   }
1332   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1333   /* get the local target indices */
1334   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1335   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1336   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1337   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1338   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1339   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1340   /* the column permutation is so much easier;
1341      make a local version of 'colp' and invert it */
1342   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1343   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1344   if (size==1) {
1345     lcolp = colp;
1346   } else {
1347     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1348     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1349     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1350   }
1351   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1352   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1353   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1354   if (size>1) {
1355     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1356     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1357   }
1358   /* now we just get the submatrix */
1359   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1360   /* clean up */
1361   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1362   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1368 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1369 {
1370   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1371   Mat            A = mat->A,B = mat->B;
1372   PetscErrorCode ierr;
1373   PetscReal      isend[5],irecv[5];
1374 
1375   PetscFunctionBegin;
1376   info->block_size     = 1.0;
1377   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1378   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1379   isend[3] = info->memory;  isend[4] = info->mallocs;
1380   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1381   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1382   isend[3] += info->memory;  isend[4] += info->mallocs;
1383   if (flag == MAT_LOCAL) {
1384     info->nz_used      = isend[0];
1385     info->nz_allocated = isend[1];
1386     info->nz_unneeded  = isend[2];
1387     info->memory       = isend[3];
1388     info->mallocs      = isend[4];
1389   } else if (flag == MAT_GLOBAL_MAX) {
1390     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1391     info->nz_used      = irecv[0];
1392     info->nz_allocated = irecv[1];
1393     info->nz_unneeded  = irecv[2];
1394     info->memory       = irecv[3];
1395     info->mallocs      = irecv[4];
1396   } else if (flag == MAT_GLOBAL_SUM) {
1397     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1398     info->nz_used      = irecv[0];
1399     info->nz_allocated = irecv[1];
1400     info->nz_unneeded  = irecv[2];
1401     info->memory       = irecv[3];
1402     info->mallocs      = irecv[4];
1403   }
1404   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1405   info->fill_ratio_needed = 0;
1406   info->factor_mallocs    = 0;
1407 
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatSetOption_MPIAIJ"
1413 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1414 {
1415   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1416   PetscErrorCode ierr;
1417 
1418   PetscFunctionBegin;
1419   switch (op) {
1420   case MAT_NEW_NONZERO_LOCATIONS:
1421   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1422   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1423   case MAT_KEEP_NONZERO_PATTERN:
1424   case MAT_NEW_NONZERO_LOCATION_ERR:
1425   case MAT_USE_INODES:
1426   case MAT_IGNORE_ZERO_ENTRIES:
1427     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1428     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1429     break;
1430   case MAT_ROW_ORIENTED:
1431     a->roworiented = flg;
1432     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1433     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1434     break;
1435   case MAT_NEW_DIAGONALS:
1436     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1437     break;
1438   case MAT_IGNORE_OFF_PROC_ENTRIES:
1439     a->donotstash = PETSC_TRUE;
1440     break;
1441   case MAT_SPD:
1442     A->spd_set                         = PETSC_TRUE;
1443     A->spd                             = flg;
1444     if (flg) {
1445       A->symmetric                     = PETSC_TRUE;
1446       A->structurally_symmetric        = PETSC_TRUE;
1447       A->symmetric_set                 = PETSC_TRUE;
1448       A->structurally_symmetric_set    = PETSC_TRUE;
1449     }
1450     break;
1451   case MAT_SYMMETRIC:
1452     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1453     break;
1454   case MAT_STRUCTURALLY_SYMMETRIC:
1455     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1456     break;
1457   case MAT_HERMITIAN:
1458     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1459     break;
1460   case MAT_SYMMETRY_ETERNAL:
1461     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1462     break;
1463   default:
1464     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1465   }
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 #undef __FUNCT__
1470 #define __FUNCT__ "MatGetRow_MPIAIJ"
1471 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1472 {
1473   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1474   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1475   PetscErrorCode ierr;
1476   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1477   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1478   PetscInt       *cmap,*idx_p;
1479 
1480   PetscFunctionBegin;
1481   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1482   mat->getrowactive = PETSC_TRUE;
1483 
1484   if (!mat->rowvalues && (idx || v)) {
1485     /*
1486         allocate enough space to hold information from the longest row.
1487     */
1488     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1489     PetscInt   max = 1,tmp;
1490     for (i=0; i<matin->rmap->n; i++) {
1491       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1492       if (max < tmp) { max = tmp; }
1493     }
1494     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1495   }
1496 
1497   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1498   lrow = row - rstart;
1499 
1500   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1501   if (!v)   {pvA = 0; pvB = 0;}
1502   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1503   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1504   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1505   nztot = nzA + nzB;
1506 
1507   cmap  = mat->garray;
1508   if (v  || idx) {
1509     if (nztot) {
1510       /* Sort by increasing column numbers, assuming A and B already sorted */
1511       PetscInt imark = -1;
1512       if (v) {
1513         *v = v_p = mat->rowvalues;
1514         for (i=0; i<nzB; i++) {
1515           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1516           else break;
1517         }
1518         imark = i;
1519         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1520         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1521       }
1522       if (idx) {
1523         *idx = idx_p = mat->rowindices;
1524         if (imark > -1) {
1525           for (i=0; i<imark; i++) {
1526             idx_p[i] = cmap[cworkB[i]];
1527           }
1528         } else {
1529           for (i=0; i<nzB; i++) {
1530             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1531             else break;
1532           }
1533           imark = i;
1534         }
1535         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1536         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1537       }
1538     } else {
1539       if (idx) *idx = 0;
1540       if (v)   *v   = 0;
1541     }
1542   }
1543   *nz = nztot;
1544   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1545   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1546   PetscFunctionReturn(0);
1547 }
1548 
1549 #undef __FUNCT__
1550 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1551 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1552 {
1553   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1554 
1555   PetscFunctionBegin;
1556   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1557   aij->getrowactive = PETSC_FALSE;
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 #undef __FUNCT__
1562 #define __FUNCT__ "MatNorm_MPIAIJ"
1563 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1564 {
1565   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1566   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1567   PetscErrorCode ierr;
1568   PetscInt       i,j,cstart = mat->cmap->rstart;
1569   PetscReal      sum = 0.0;
1570   MatScalar      *v;
1571 
1572   PetscFunctionBegin;
1573   if (aij->size == 1) {
1574     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1575   } else {
1576     if (type == NORM_FROBENIUS) {
1577       v = amat->a;
1578       for (i=0; i<amat->nz; i++) {
1579 #if defined(PETSC_USE_COMPLEX)
1580         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1581 #else
1582         sum += (*v)*(*v); v++;
1583 #endif
1584       }
1585       v = bmat->a;
1586       for (i=0; i<bmat->nz; i++) {
1587 #if defined(PETSC_USE_COMPLEX)
1588         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1589 #else
1590         sum += (*v)*(*v); v++;
1591 #endif
1592       }
1593       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1594       *norm = sqrt(*norm);
1595     } else if (type == NORM_1) { /* max column norm */
1596       PetscReal *tmp,*tmp2;
1597       PetscInt  *jj,*garray = aij->garray;
1598       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1599       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1600       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1601       *norm = 0.0;
1602       v = amat->a; jj = amat->j;
1603       for (j=0; j<amat->nz; j++) {
1604         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1605       }
1606       v = bmat->a; jj = bmat->j;
1607       for (j=0; j<bmat->nz; j++) {
1608         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1609       }
1610       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1611       for (j=0; j<mat->cmap->N; j++) {
1612         if (tmp2[j] > *norm) *norm = tmp2[j];
1613       }
1614       ierr = PetscFree(tmp);CHKERRQ(ierr);
1615       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1616     } else if (type == NORM_INFINITY) { /* max row norm */
1617       PetscReal ntemp = 0.0;
1618       for (j=0; j<aij->A->rmap->n; j++) {
1619         v = amat->a + amat->i[j];
1620         sum = 0.0;
1621         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1622           sum += PetscAbsScalar(*v); v++;
1623         }
1624         v = bmat->a + bmat->i[j];
1625         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1626           sum += PetscAbsScalar(*v); v++;
1627         }
1628         if (sum > ntemp) ntemp = sum;
1629       }
1630       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1631     } else {
1632       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1633     }
1634   }
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 #undef __FUNCT__
1639 #define __FUNCT__ "MatTranspose_MPIAIJ"
1640 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1641 {
1642   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1643   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1644   PetscErrorCode ierr;
1645   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1646   PetscInt       cstart=A->cmap->rstart,ncol;
1647   Mat            B;
1648   MatScalar      *array;
1649 
1650   PetscFunctionBegin;
1651   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1652 
1653   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1654   ai = Aloc->i; aj = Aloc->j;
1655   bi = Bloc->i; bj = Bloc->j;
1656   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1657     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1658     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1659     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1660     for (i=0; i<ai[ma]; i++){
1661       d_nnz[aj[i]] ++;
1662       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1663     }
1664 
1665     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1666     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1667     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1668     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1669     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1670   } else {
1671     B = *matout;
1672   }
1673 
1674   /* copy over the A part */
1675   array = Aloc->a;
1676   row = A->rmap->rstart;
1677   for (i=0; i<ma; i++) {
1678     ncol = ai[i+1]-ai[i];
1679     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1680     row++; array += ncol; aj += ncol;
1681   }
1682   aj = Aloc->j;
1683   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1684 
1685   /* copy over the B part */
1686   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1687   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1688   array = Bloc->a;
1689   row = A->rmap->rstart;
1690   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1691   cols_tmp = cols;
1692   for (i=0; i<mb; i++) {
1693     ncol = bi[i+1]-bi[i];
1694     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1695     row++; array += ncol; cols_tmp += ncol;
1696   }
1697   ierr = PetscFree(cols);CHKERRQ(ierr);
1698 
1699   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1700   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1701   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1702     *matout = B;
1703   } else {
1704     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1705   }
1706   PetscFunctionReturn(0);
1707 }
1708 
1709 #undef __FUNCT__
1710 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1711 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1712 {
1713   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1714   Mat            a = aij->A,b = aij->B;
1715   PetscErrorCode ierr;
1716   PetscInt       s1,s2,s3;
1717 
1718   PetscFunctionBegin;
1719   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1720   if (rr) {
1721     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1722     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1723     /* Overlap communication with computation. */
1724     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1725   }
1726   if (ll) {
1727     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1728     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1729     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1730   }
1731   /* scale  the diagonal block */
1732   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1733 
1734   if (rr) {
1735     /* Do a scatter end and then right scale the off-diagonal block */
1736     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1737     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1738   }
1739 
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 #undef __FUNCT__
1744 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1745 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1746 {
1747   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1748   PetscErrorCode ierr;
1749 
1750   PetscFunctionBegin;
1751   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1752   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1753   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1754   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1755   PetscFunctionReturn(0);
1756 }
1757 #undef __FUNCT__
1758 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1759 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1760 {
1761   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1762   PetscErrorCode ierr;
1763 
1764   PetscFunctionBegin;
1765   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1766   PetscFunctionReturn(0);
1767 }
1768 
1769 #undef __FUNCT__
1770 #define __FUNCT__ "MatEqual_MPIAIJ"
1771 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1772 {
1773   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1774   Mat            a,b,c,d;
1775   PetscTruth     flg;
1776   PetscErrorCode ierr;
1777 
1778   PetscFunctionBegin;
1779   a = matA->A; b = matA->B;
1780   c = matB->A; d = matB->B;
1781 
1782   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1783   if (flg) {
1784     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1785   }
1786   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 #undef __FUNCT__
1791 #define __FUNCT__ "MatCopy_MPIAIJ"
1792 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1793 {
1794   PetscErrorCode ierr;
1795   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1796   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1797 
1798   PetscFunctionBegin;
1799   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1800   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1801     /* because of the column compression in the off-processor part of the matrix a->B,
1802        the number of columns in a->B and b->B may be different, hence we cannot call
1803        the MatCopy() directly on the two parts. If need be, we can provide a more
1804        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1805        then copying the submatrices */
1806     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1807   } else {
1808     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1809     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1810   }
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 #undef __FUNCT__
1815 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1816 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1817 {
1818   PetscErrorCode ierr;
1819 
1820   PetscFunctionBegin;
1821   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 #undef __FUNCT__
1826 #define __FUNCT__ "MatAXPY_MPIAIJ"
1827 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1828 {
1829   PetscErrorCode ierr;
1830   PetscInt       i;
1831   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1832   PetscBLASInt   bnz,one=1;
1833   Mat_SeqAIJ     *x,*y;
1834 
1835   PetscFunctionBegin;
1836   if (str == SAME_NONZERO_PATTERN) {
1837     PetscScalar alpha = a;
1838     x = (Mat_SeqAIJ *)xx->A->data;
1839     y = (Mat_SeqAIJ *)yy->A->data;
1840     bnz = PetscBLASIntCast(x->nz);
1841     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1842     x = (Mat_SeqAIJ *)xx->B->data;
1843     y = (Mat_SeqAIJ *)yy->B->data;
1844     bnz = PetscBLASIntCast(x->nz);
1845     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1846   } else if (str == SUBSET_NONZERO_PATTERN) {
1847     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1848 
1849     x = (Mat_SeqAIJ *)xx->B->data;
1850     y = (Mat_SeqAIJ *)yy->B->data;
1851     if (y->xtoy && y->XtoY != xx->B) {
1852       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1853       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1854     }
1855     if (!y->xtoy) { /* get xtoy */
1856       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1857       y->XtoY = xx->B;
1858       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1859     }
1860     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1861   } else {
1862     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1863   }
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatConjugate_MPIAIJ"
1871 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1872 {
1873 #if defined(PETSC_USE_COMPLEX)
1874   PetscErrorCode ierr;
1875   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1876 
1877   PetscFunctionBegin;
1878   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1879   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1880 #else
1881   PetscFunctionBegin;
1882 #endif
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 #undef __FUNCT__
1887 #define __FUNCT__ "MatRealPart_MPIAIJ"
1888 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1889 {
1890   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1891   PetscErrorCode ierr;
1892 
1893   PetscFunctionBegin;
1894   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1895   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1901 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1902 {
1903   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1904   PetscErrorCode ierr;
1905 
1906   PetscFunctionBegin;
1907   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1908   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 #ifdef PETSC_HAVE_PBGL
1913 
1914 #include <boost/parallel/mpi/bsp_process_group.hpp>
1915 #include <boost/graph/distributed/ilu_default_graph.hpp>
1916 #include <boost/graph/distributed/ilu_0_block.hpp>
1917 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1918 #include <boost/graph/distributed/petsc/interface.hpp>
1919 #include <boost/multi_array.hpp>
1920 #include <boost/parallel/distributed_property_map->hpp>
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1924 /*
1925   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1926 */
1927 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1928 {
1929   namespace petsc = boost::distributed::petsc;
1930 
1931   namespace graph_dist = boost::graph::distributed;
1932   using boost::graph::distributed::ilu_default::process_group_type;
1933   using boost::graph::ilu_permuted;
1934 
1935   PetscTruth      row_identity, col_identity;
1936   PetscContainer  c;
1937   PetscInt        m, n, M, N;
1938   PetscErrorCode  ierr;
1939 
1940   PetscFunctionBegin;
1941   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1942   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1943   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1944   if (!row_identity || !col_identity) {
1945     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1946   }
1947 
1948   process_group_type pg;
1949   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1950   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1951   lgraph_type&   level_graph = *lgraph_p;
1952   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1953 
1954   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1955   ilu_permuted(level_graph);
1956 
1957   /* put together the new matrix */
1958   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1959   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1960   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1961   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1962   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1963   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1964   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1965 
1966   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1967   ierr = PetscContainerSetPointer(c, lgraph_p);
1968   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1974 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1975 {
1976   PetscFunctionBegin;
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 #undef __FUNCT__
1981 #define __FUNCT__ "MatSolve_MPIAIJ"
1982 /*
1983   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1984 */
1985 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1986 {
1987   namespace graph_dist = boost::graph::distributed;
1988 
1989   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1990   lgraph_type*   lgraph_p;
1991   PetscContainer c;
1992   PetscErrorCode ierr;
1993 
1994   PetscFunctionBegin;
1995   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1996   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1997   ierr = VecCopy(b, x);CHKERRQ(ierr);
1998 
1999   PetscScalar* array_x;
2000   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2001   PetscInt sx;
2002   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2003 
2004   PetscScalar* array_b;
2005   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2006   PetscInt sb;
2007   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2008 
2009   lgraph_type&   level_graph = *lgraph_p;
2010   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2011 
2012   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2013   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2014                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2015 
2016   typedef boost::iterator_property_map<array_ref_type::iterator,
2017                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2018   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2019                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2020 
2021   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2022 
2023   PetscFunctionReturn(0);
2024 }
2025 #endif
2026 
2027 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2028   PetscInt       nzlocal,nsends,nrecvs;
2029   PetscMPIInt    *send_rank,*recv_rank;
2030   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2031   PetscScalar    *sbuf_a,**rbuf_a;
2032   PetscErrorCode (*MatDestroy)(Mat);
2033 } Mat_Redundant;
2034 
2035 #undef __FUNCT__
2036 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2037 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2038 {
2039   PetscErrorCode       ierr;
2040   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2041   PetscInt             i;
2042 
2043   PetscFunctionBegin;
2044   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2045   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2046   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2047   for (i=0; i<redund->nrecvs; i++){
2048     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2049     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2050   }
2051   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2052   ierr = PetscFree(redund);CHKERRQ(ierr);
2053   PetscFunctionReturn(0);
2054 }
2055 
2056 #undef __FUNCT__
2057 #define __FUNCT__ "MatDestroy_MatRedundant"
2058 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2059 {
2060   PetscErrorCode  ierr;
2061   PetscContainer  container;
2062   Mat_Redundant   *redund=PETSC_NULL;
2063 
2064   PetscFunctionBegin;
2065   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2066   if (container) {
2067     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2068   } else {
2069     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2070   }
2071   A->ops->destroy = redund->MatDestroy;
2072   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2073   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2074   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 #undef __FUNCT__
2079 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2080 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2081 {
2082   PetscMPIInt    rank,size;
2083   MPI_Comm       comm=((PetscObject)mat)->comm;
2084   PetscErrorCode ierr;
2085   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2086   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2087   PetscInt       *rowrange=mat->rmap->range;
2088   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2089   Mat            A=aij->A,B=aij->B,C=*matredundant;
2090   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2091   PetscScalar    *sbuf_a;
2092   PetscInt       nzlocal=a->nz+b->nz;
2093   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2094   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2095   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2096   MatScalar      *aworkA,*aworkB;
2097   PetscScalar    *vals;
2098   PetscMPIInt    tag1,tag2,tag3,imdex;
2099   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2100                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2101   MPI_Status     recv_status,*send_status;
2102   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2103   PetscInt       **rbuf_j=PETSC_NULL;
2104   PetscScalar    **rbuf_a=PETSC_NULL;
2105   Mat_Redundant  *redund=PETSC_NULL;
2106   PetscContainer container;
2107 
2108   PetscFunctionBegin;
2109   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2110   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2111 
2112   if (reuse == MAT_REUSE_MATRIX) {
2113     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2114     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2115     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2116     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2117     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2118     if (container) {
2119       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2120     } else {
2121       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2122     }
2123     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2124 
2125     nsends    = redund->nsends;
2126     nrecvs    = redund->nrecvs;
2127     send_rank = redund->send_rank;
2128     recv_rank = redund->recv_rank;
2129     sbuf_nz   = redund->sbuf_nz;
2130     rbuf_nz   = redund->rbuf_nz;
2131     sbuf_j    = redund->sbuf_j;
2132     sbuf_a    = redund->sbuf_a;
2133     rbuf_j    = redund->rbuf_j;
2134     rbuf_a    = redund->rbuf_a;
2135   }
2136 
2137   if (reuse == MAT_INITIAL_MATRIX){
2138     PetscMPIInt  subrank,subsize;
2139     PetscInt     nleftover,np_subcomm;
2140     /* get the destination processors' id send_rank, nsends and nrecvs */
2141     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2142     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2143     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2144     np_subcomm = size/nsubcomm;
2145     nleftover  = size - nsubcomm*np_subcomm;
2146     nsends = 0; nrecvs = 0;
2147     for (i=0; i<size; i++){ /* i=rank*/
2148       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2149         send_rank[nsends] = i; nsends++;
2150         recv_rank[nrecvs++] = i;
2151       }
2152     }
2153     if (rank >= size - nleftover){/* this proc is a leftover processor */
2154       i = size-nleftover-1;
2155       j = 0;
2156       while (j < nsubcomm - nleftover){
2157         send_rank[nsends++] = i;
2158         i--; j++;
2159       }
2160     }
2161 
2162     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2163       for (i=0; i<nleftover; i++){
2164         recv_rank[nrecvs++] = size-nleftover+i;
2165       }
2166     }
2167 
2168     /* allocate sbuf_j, sbuf_a */
2169     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2170     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2171     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2172   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2173 
2174   /* copy mat's local entries into the buffers */
2175   if (reuse == MAT_INITIAL_MATRIX){
2176     rownz_max = 0;
2177     rptr = sbuf_j;
2178     cols = sbuf_j + rend-rstart + 1;
2179     vals = sbuf_a;
2180     rptr[0] = 0;
2181     for (i=0; i<rend-rstart; i++){
2182       row = i + rstart;
2183       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2184       ncols  = nzA + nzB;
2185       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2186       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2187       /* load the column indices for this row into cols */
2188       lwrite = 0;
2189       for (l=0; l<nzB; l++) {
2190         if ((ctmp = bmap[cworkB[l]]) < cstart){
2191           vals[lwrite]   = aworkB[l];
2192           cols[lwrite++] = ctmp;
2193         }
2194       }
2195       for (l=0; l<nzA; l++){
2196         vals[lwrite]   = aworkA[l];
2197         cols[lwrite++] = cstart + cworkA[l];
2198       }
2199       for (l=0; l<nzB; l++) {
2200         if ((ctmp = bmap[cworkB[l]]) >= cend){
2201           vals[lwrite]   = aworkB[l];
2202           cols[lwrite++] = ctmp;
2203         }
2204       }
2205       vals += ncols;
2206       cols += ncols;
2207       rptr[i+1] = rptr[i] + ncols;
2208       if (rownz_max < ncols) rownz_max = ncols;
2209     }
2210     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2211   } else { /* only copy matrix values into sbuf_a */
2212     rptr = sbuf_j;
2213     vals = sbuf_a;
2214     rptr[0] = 0;
2215     for (i=0; i<rend-rstart; i++){
2216       row = i + rstart;
2217       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2218       ncols  = nzA + nzB;
2219       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2220       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2221       lwrite = 0;
2222       for (l=0; l<nzB; l++) {
2223         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2224       }
2225       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2226       for (l=0; l<nzB; l++) {
2227         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2228       }
2229       vals += ncols;
2230       rptr[i+1] = rptr[i] + ncols;
2231     }
2232   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2233 
2234   /* send nzlocal to others, and recv other's nzlocal */
2235   /*--------------------------------------------------*/
2236   if (reuse == MAT_INITIAL_MATRIX){
2237     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2238     s_waits2 = s_waits3 + nsends;
2239     s_waits1 = s_waits2 + nsends;
2240     r_waits1 = s_waits1 + nsends;
2241     r_waits2 = r_waits1 + nrecvs;
2242     r_waits3 = r_waits2 + nrecvs;
2243   } else {
2244     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2245     r_waits3 = s_waits3 + nsends;
2246   }
2247 
2248   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2249   if (reuse == MAT_INITIAL_MATRIX){
2250     /* get new tags to keep the communication clean */
2251     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2252     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2253     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2254 
2255     /* post receives of other's nzlocal */
2256     for (i=0; i<nrecvs; i++){
2257       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2258     }
2259     /* send nzlocal to others */
2260     for (i=0; i<nsends; i++){
2261       sbuf_nz[i] = nzlocal;
2262       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2263     }
2264     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2265     count = nrecvs;
2266     while (count) {
2267       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2268       recv_rank[imdex] = recv_status.MPI_SOURCE;
2269       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2270       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2271 
2272       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2273       rbuf_nz[imdex] += i + 2;
2274       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2275       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2276       count--;
2277     }
2278     /* wait on sends of nzlocal */
2279     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2280     /* send mat->i,j to others, and recv from other's */
2281     /*------------------------------------------------*/
2282     for (i=0; i<nsends; i++){
2283       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2284       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2285     }
2286     /* wait on receives of mat->i,j */
2287     /*------------------------------*/
2288     count = nrecvs;
2289     while (count) {
2290       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2291       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2292       count--;
2293     }
2294     /* wait on sends of mat->i,j */
2295     /*---------------------------*/
2296     if (nsends) {
2297       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2298     }
2299   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2300 
2301   /* post receives, send and receive mat->a */
2302   /*----------------------------------------*/
2303   for (imdex=0; imdex<nrecvs; imdex++) {
2304     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2305   }
2306   for (i=0; i<nsends; i++){
2307     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2308   }
2309   count = nrecvs;
2310   while (count) {
2311     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2312     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2313     count--;
2314   }
2315   if (nsends) {
2316     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2317   }
2318 
2319   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2320 
2321   /* create redundant matrix */
2322   /*-------------------------*/
2323   if (reuse == MAT_INITIAL_MATRIX){
2324     /* compute rownz_max for preallocation */
2325     for (imdex=0; imdex<nrecvs; imdex++){
2326       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2327       rptr = rbuf_j[imdex];
2328       for (i=0; i<j; i++){
2329         ncols = rptr[i+1] - rptr[i];
2330         if (rownz_max < ncols) rownz_max = ncols;
2331       }
2332     }
2333 
2334     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2335     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2336     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2337     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2338     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2339   } else {
2340     C = *matredundant;
2341   }
2342 
2343   /* insert local matrix entries */
2344   rptr = sbuf_j;
2345   cols = sbuf_j + rend-rstart + 1;
2346   vals = sbuf_a;
2347   for (i=0; i<rend-rstart; i++){
2348     row   = i + rstart;
2349     ncols = rptr[i+1] - rptr[i];
2350     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2351     vals += ncols;
2352     cols += ncols;
2353   }
2354   /* insert received matrix entries */
2355   for (imdex=0; imdex<nrecvs; imdex++){
2356     rstart = rowrange[recv_rank[imdex]];
2357     rend   = rowrange[recv_rank[imdex]+1];
2358     rptr = rbuf_j[imdex];
2359     cols = rbuf_j[imdex] + rend-rstart + 1;
2360     vals = rbuf_a[imdex];
2361     for (i=0; i<rend-rstart; i++){
2362       row   = i + rstart;
2363       ncols = rptr[i+1] - rptr[i];
2364       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2365       vals += ncols;
2366       cols += ncols;
2367     }
2368   }
2369   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2370   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2371   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2372   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2373   if (reuse == MAT_INITIAL_MATRIX){
2374     PetscContainer container;
2375     *matredundant = C;
2376     /* create a supporting struct and attach it to C for reuse */
2377     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2378     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2379     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2380     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2381     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2382 
2383     redund->nzlocal = nzlocal;
2384     redund->nsends  = nsends;
2385     redund->nrecvs  = nrecvs;
2386     redund->send_rank = send_rank;
2387     redund->recv_rank = recv_rank;
2388     redund->sbuf_nz = sbuf_nz;
2389     redund->rbuf_nz = rbuf_nz;
2390     redund->sbuf_j  = sbuf_j;
2391     redund->sbuf_a  = sbuf_a;
2392     redund->rbuf_j  = rbuf_j;
2393     redund->rbuf_a  = rbuf_a;
2394 
2395     redund->MatDestroy = C->ops->destroy;
2396     C->ops->destroy    = MatDestroy_MatRedundant;
2397   }
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2403 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2404 {
2405   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2406   PetscErrorCode ierr;
2407   PetscInt       i,*idxb = 0;
2408   PetscScalar    *va,*vb;
2409   Vec            vtmp;
2410 
2411   PetscFunctionBegin;
2412   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2413   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2414   if (idx) {
2415     for (i=0; i<A->rmap->n; i++) {
2416       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2417     }
2418   }
2419 
2420   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2421   if (idx) {
2422     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2423   }
2424   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2425   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2426 
2427   for (i=0; i<A->rmap->n; i++){
2428     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2429       va[i] = vb[i];
2430       if (idx) idx[i] = a->garray[idxb[i]];
2431     }
2432   }
2433 
2434   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2435   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2436   if (idxb) {
2437     ierr = PetscFree(idxb);CHKERRQ(ierr);
2438   }
2439   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 #undef __FUNCT__
2444 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2445 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2446 {
2447   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2448   PetscErrorCode ierr;
2449   PetscInt       i,*idxb = 0;
2450   PetscScalar    *va,*vb;
2451   Vec            vtmp;
2452 
2453   PetscFunctionBegin;
2454   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2455   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2456   if (idx) {
2457     for (i=0; i<A->cmap->n; i++) {
2458       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2459     }
2460   }
2461 
2462   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2463   if (idx) {
2464     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2465   }
2466   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2467   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2468 
2469   for (i=0; i<A->rmap->n; i++){
2470     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2471       va[i] = vb[i];
2472       if (idx) idx[i] = a->garray[idxb[i]];
2473     }
2474   }
2475 
2476   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2477   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2478   if (idxb) {
2479     ierr = PetscFree(idxb);CHKERRQ(ierr);
2480   }
2481   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 #undef __FUNCT__
2486 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2487 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2488 {
2489   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2490   PetscInt       n      = A->rmap->n;
2491   PetscInt       cstart = A->cmap->rstart;
2492   PetscInt      *cmap   = mat->garray;
2493   PetscInt      *diagIdx, *offdiagIdx;
2494   Vec            diagV, offdiagV;
2495   PetscScalar   *a, *diagA, *offdiagA;
2496   PetscInt       r;
2497   PetscErrorCode ierr;
2498 
2499   PetscFunctionBegin;
2500   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2501   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2502   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2503   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2504   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2505   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2506   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2507   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2508   for(r = 0; r < n; ++r) {
2509     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2510       a[r]   = diagA[r];
2511       idx[r] = cstart + diagIdx[r];
2512     } else {
2513       a[r]   = offdiagA[r];
2514       idx[r] = cmap[offdiagIdx[r]];
2515     }
2516   }
2517   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2518   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2519   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2520   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2521   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2522   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 #undef __FUNCT__
2527 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2528 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2529 {
2530   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2531   PetscInt       n      = A->rmap->n;
2532   PetscInt       cstart = A->cmap->rstart;
2533   PetscInt      *cmap   = mat->garray;
2534   PetscInt      *diagIdx, *offdiagIdx;
2535   Vec            diagV, offdiagV;
2536   PetscScalar   *a, *diagA, *offdiagA;
2537   PetscInt       r;
2538   PetscErrorCode ierr;
2539 
2540   PetscFunctionBegin;
2541   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2542   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2543   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2544   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2545   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2546   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2547   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2548   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2549   for(r = 0; r < n; ++r) {
2550     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2551       a[r]   = diagA[r];
2552       idx[r] = cstart + diagIdx[r];
2553     } else {
2554       a[r]   = offdiagA[r];
2555       idx[r] = cmap[offdiagIdx[r]];
2556     }
2557   }
2558   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2559   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2560   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2561   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2562   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2563   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2564   PetscFunctionReturn(0);
2565 }
2566 
2567 #undef __FUNCT__
2568 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2569 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2570 {
2571   PetscErrorCode ierr;
2572   Mat            *dummy;
2573 
2574   PetscFunctionBegin;
2575   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2576   *newmat = *dummy;
2577   ierr = PetscFree(dummy);CHKERRQ(ierr);
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2582 /* -------------------------------------------------------------------*/
2583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2584        MatGetRow_MPIAIJ,
2585        MatRestoreRow_MPIAIJ,
2586        MatMult_MPIAIJ,
2587 /* 4*/ MatMultAdd_MPIAIJ,
2588        MatMultTranspose_MPIAIJ,
2589        MatMultTransposeAdd_MPIAIJ,
2590 #ifdef PETSC_HAVE_PBGL
2591        MatSolve_MPIAIJ,
2592 #else
2593        0,
2594 #endif
2595        0,
2596        0,
2597 /*10*/ 0,
2598        0,
2599        0,
2600        MatSOR_MPIAIJ,
2601        MatTranspose_MPIAIJ,
2602 /*15*/ MatGetInfo_MPIAIJ,
2603        MatEqual_MPIAIJ,
2604        MatGetDiagonal_MPIAIJ,
2605        MatDiagonalScale_MPIAIJ,
2606        MatNorm_MPIAIJ,
2607 /*20*/ MatAssemblyBegin_MPIAIJ,
2608        MatAssemblyEnd_MPIAIJ,
2609        MatSetOption_MPIAIJ,
2610        MatZeroEntries_MPIAIJ,
2611 /*24*/ MatZeroRows_MPIAIJ,
2612        0,
2613 #ifdef PETSC_HAVE_PBGL
2614        0,
2615 #else
2616        0,
2617 #endif
2618        0,
2619        0,
2620 /*29*/ MatSetUpPreallocation_MPIAIJ,
2621 #ifdef PETSC_HAVE_PBGL
2622        0,
2623 #else
2624        0,
2625 #endif
2626        0,
2627        0,
2628        0,
2629 /*34*/ MatDuplicate_MPIAIJ,
2630        0,
2631        0,
2632        0,
2633        0,
2634 /*39*/ MatAXPY_MPIAIJ,
2635        MatGetSubMatrices_MPIAIJ,
2636        MatIncreaseOverlap_MPIAIJ,
2637        MatGetValues_MPIAIJ,
2638        MatCopy_MPIAIJ,
2639 /*44*/ MatGetRowMax_MPIAIJ,
2640        MatScale_MPIAIJ,
2641        0,
2642        0,
2643        0,
2644 /*49*/ MatSetBlockSize_MPIAIJ,
2645        0,
2646        0,
2647        0,
2648        0,
2649 /*54*/ MatFDColoringCreate_MPIAIJ,
2650        0,
2651        MatSetUnfactored_MPIAIJ,
2652        MatPermute_MPIAIJ,
2653        0,
2654 /*59*/ MatGetSubMatrix_MPIAIJ,
2655        MatDestroy_MPIAIJ,
2656        MatView_MPIAIJ,
2657        0,
2658        0,
2659 /*64*/ 0,
2660        0,
2661        0,
2662        0,
2663        0,
2664 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2665        MatGetRowMinAbs_MPIAIJ,
2666        0,
2667        MatSetColoring_MPIAIJ,
2668 #if defined(PETSC_HAVE_ADIC)
2669        MatSetValuesAdic_MPIAIJ,
2670 #else
2671        0,
2672 #endif
2673        MatSetValuesAdifor_MPIAIJ,
2674 /*75*/ MatFDColoringApply_AIJ,
2675        0,
2676        0,
2677        0,
2678        0,
2679 /*80*/ 0,
2680        0,
2681        0,
2682 /*83*/ MatLoad_MPIAIJ,
2683        0,
2684        0,
2685        0,
2686        0,
2687        0,
2688 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2689        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2690        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2691        MatPtAP_Basic,
2692        MatPtAPSymbolic_MPIAIJ,
2693 /*94*/ MatPtAPNumeric_MPIAIJ,
2694        0,
2695        0,
2696        0,
2697        0,
2698 /*99*/ 0,
2699        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2700        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2701        MatConjugate_MPIAIJ,
2702        0,
2703 /*104*/MatSetValuesRow_MPIAIJ,
2704        MatRealPart_MPIAIJ,
2705        MatImaginaryPart_MPIAIJ,
2706        0,
2707        0,
2708 /*109*/0,
2709        MatGetRedundantMatrix_MPIAIJ,
2710        MatGetRowMin_MPIAIJ,
2711        0,
2712        0,
2713 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2714        0,
2715        0,
2716        0,
2717        0,
2718 /*119*/0,
2719        0,
2720        0,
2721        0
2722 };
2723 
2724 /* ----------------------------------------------------------------------------------------*/
2725 
2726 EXTERN_C_BEGIN
2727 #undef __FUNCT__
2728 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2729 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2730 {
2731   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2732   PetscErrorCode ierr;
2733 
2734   PetscFunctionBegin;
2735   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2736   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2737   PetscFunctionReturn(0);
2738 }
2739 EXTERN_C_END
2740 
2741 EXTERN_C_BEGIN
2742 #undef __FUNCT__
2743 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2744 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2745 {
2746   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2747   PetscErrorCode ierr;
2748 
2749   PetscFunctionBegin;
2750   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2751   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2752   PetscFunctionReturn(0);
2753 }
2754 EXTERN_C_END
2755 
2756 EXTERN_C_BEGIN
2757 #undef __FUNCT__
2758 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2759 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2760 {
2761   Mat_MPIAIJ     *b;
2762   PetscErrorCode ierr;
2763   PetscInt       i;
2764 
2765   PetscFunctionBegin;
2766   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2767   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2768   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2769   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2770 
2771   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2772   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2773   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2774   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2775   if (d_nnz) {
2776     for (i=0; i<B->rmap->n; i++) {
2777       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2778     }
2779   }
2780   if (o_nnz) {
2781     for (i=0; i<B->rmap->n; i++) {
2782       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2783     }
2784   }
2785   b = (Mat_MPIAIJ*)B->data;
2786 
2787   if (!B->preallocated) {
2788     /* Explicitly create 2 MATSEQAIJ matrices. */
2789     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2790     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2791     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2792     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2793     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2794     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2795     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2796     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2797   }
2798 
2799   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2800   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2801   B->preallocated = PETSC_TRUE;
2802   PetscFunctionReturn(0);
2803 }
2804 EXTERN_C_END
2805 
2806 #undef __FUNCT__
2807 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2808 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2809 {
2810   Mat            mat;
2811   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2812   PetscErrorCode ierr;
2813 
2814   PetscFunctionBegin;
2815   *newmat       = 0;
2816   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2817   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2818   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2819   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2820   a    = (Mat_MPIAIJ*)mat->data;
2821 
2822   mat->factortype    = matin->factortype;
2823   mat->rmap->bs      = matin->rmap->bs;
2824   mat->assembled    = PETSC_TRUE;
2825   mat->insertmode   = NOT_SET_VALUES;
2826   mat->preallocated = PETSC_TRUE;
2827 
2828   a->size           = oldmat->size;
2829   a->rank           = oldmat->rank;
2830   a->donotstash     = oldmat->donotstash;
2831   a->roworiented    = oldmat->roworiented;
2832   a->rowindices     = 0;
2833   a->rowvalues      = 0;
2834   a->getrowactive   = PETSC_FALSE;
2835 
2836   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2837   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2838 
2839   if (oldmat->colmap) {
2840 #if defined (PETSC_USE_CTABLE)
2841     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2842 #else
2843     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2844     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2845     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2846 #endif
2847   } else a->colmap = 0;
2848   if (oldmat->garray) {
2849     PetscInt len;
2850     len  = oldmat->B->cmap->n;
2851     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2852     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2853     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2854   } else a->garray = 0;
2855 
2856   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2857   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2858   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2859   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2860   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2861   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2862   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2863   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2864   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2865   *newmat = mat;
2866   PetscFunctionReturn(0);
2867 }
2868 
2869 /*
2870     Allows sending/receiving larger messages then 2 gigabytes in a single call
2871 */
2872 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm)
2873 {
2874   int             ierr;
2875   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
2876   PetscInt        i,numchunks;
2877   PetscMPIInt     icnt;
2878 
2879   numchunks = cnt/CHUNKSIZE + 1;
2880   for (i=0; i<numchunks; i++) {
2881     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
2882     ierr = MPI_Send(mess,icnt,type,to,tag,comm);
2883     if (type == MPIU_INT) {
2884       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
2885     } else if (type == MPIU_SCALAR) {
2886       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
2887     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
2888   }
2889   return 0;
2890 }
2891 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm)
2892 {
2893   int             ierr;
2894   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
2895   MPI_Status      status;
2896   PetscInt        i,numchunks;
2897   PetscMPIInt     icnt;
2898 
2899   numchunks = cnt/CHUNKSIZE + 1;
2900   for (i=0; i<numchunks; i++) {
2901     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
2902     ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);
2903     if (type == MPIU_INT) {
2904       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
2905     } else if (type == MPIU_SCALAR) {
2906       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
2907     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
2908   }
2909   return 0;
2910 }
2911 
2912 #undef __FUNCT__
2913 #define __FUNCT__ "MatLoad_MPIAIJ"
2914 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2915 {
2916   PetscScalar    *vals,*svals;
2917   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2918   PetscErrorCode ierr;
2919   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2920   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
2921   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2922   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2923   PetscInt       cend,cstart,n,*rowners,sizesset=1;
2924   int            fd;
2925 
2926   PetscFunctionBegin;
2927   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2928   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2929   if (!rank) {
2930     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2931     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2932     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2933   }
2934 
2935   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
2936 
2937   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2938   M = header[1]; N = header[2];
2939   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2940   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
2941   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
2942 
2943   /* If global sizes are set, check if they are consistent with that given in the file */
2944   if (sizesset) {
2945     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
2946   }
2947   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2948   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2949 
2950   /* determine ownership of all rows */
2951   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
2952   else m = newMat->rmap->n; /* Set by user */
2953 
2954   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2955   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2956 
2957   /* First process needs enough room for process with most rows */
2958   if (!rank) {
2959     mmax       = rowners[1];
2960     for (i=2; i<size; i++) {
2961       mmax = PetscMax(mmax,rowners[i]);
2962     }
2963   } else mmax = m;
2964 
2965   rowners[0] = 0;
2966   for (i=2; i<=size; i++) {
2967     rowners[i] += rowners[i-1];
2968   }
2969   rstart = rowners[rank];
2970   rend   = rowners[rank+1];
2971 
2972   /* distribute row lengths to all processors */
2973   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2974   if (!rank) {
2975     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2976     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2977     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2978     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2979     for (j=0; j<m; j++) {
2980       procsnz[0] += ourlens[j];
2981     }
2982     for (i=1; i<size; i++) {
2983       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2984       /* calculate the number of nonzeros on each processor */
2985       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2986         procsnz[i] += rowlengths[j];
2987       }
2988       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2989     }
2990     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2991   } else {
2992     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2993   }
2994 
2995   if (!rank) {
2996     /* determine max buffer needed and allocate it */
2997     maxnz = 0;
2998     for (i=0; i<size; i++) {
2999       maxnz = PetscMax(maxnz,procsnz[i]);
3000     }
3001     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3002 
3003     /* read in my part of the matrix column indices  */
3004     nz   = procsnz[0];
3005     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3006     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3007 
3008     /* read in every one elses and ship off */
3009     for (i=1; i<size; i++) {
3010       nz     = procsnz[i];
3011       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3012       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3013     }
3014     ierr = PetscFree(cols);CHKERRQ(ierr);
3015   } else {
3016     /* determine buffer space needed for message */
3017     nz = 0;
3018     for (i=0; i<m; i++) {
3019       nz += ourlens[i];
3020     }
3021     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3022 
3023     /* receive message of column indices*/
3024     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3025   }
3026 
3027   /* determine column ownership if matrix is not square */
3028   if (N != M) {
3029     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3030     else n = newMat->cmap->n;
3031     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3032     cstart = cend - n;
3033   } else {
3034     cstart = rstart;
3035     cend   = rend;
3036     n      = cend - cstart;
3037   }
3038 
3039   /* loop over local rows, determining number of off diagonal entries */
3040   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3041   jj = 0;
3042   for (i=0; i<m; i++) {
3043     for (j=0; j<ourlens[i]; j++) {
3044       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3045       jj++;
3046     }
3047   }
3048 
3049   for (i=0; i<m; i++) {
3050     ourlens[i] -= offlens[i];
3051   }
3052   if (!sizesset) {
3053     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3054   }
3055   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3056 
3057   for (i=0; i<m; i++) {
3058     ourlens[i] += offlens[i];
3059   }
3060 
3061   if (!rank) {
3062     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3063 
3064     /* read in my part of the matrix numerical values  */
3065     nz   = procsnz[0];
3066     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3067 
3068     /* insert into matrix */
3069     jj      = rstart;
3070     smycols = mycols;
3071     svals   = vals;
3072     for (i=0; i<m; i++) {
3073       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3074       smycols += ourlens[i];
3075       svals   += ourlens[i];
3076       jj++;
3077     }
3078 
3079     /* read in other processors and ship out */
3080     for (i=1; i<size; i++) {
3081       nz     = procsnz[i];
3082       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3083       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3084     }
3085     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3086   } else {
3087     /* receive numeric values */
3088     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3089 
3090     /* receive message of values*/
3091     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3092 
3093     /* insert into matrix */
3094     jj      = rstart;
3095     smycols = mycols;
3096     svals   = vals;
3097     for (i=0; i<m; i++) {
3098       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3099       smycols += ourlens[i];
3100       svals   += ourlens[i];
3101       jj++;
3102     }
3103   }
3104   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3105   ierr = PetscFree(vals);CHKERRQ(ierr);
3106   ierr = PetscFree(mycols);CHKERRQ(ierr);
3107   ierr = PetscFree(rowners);CHKERRQ(ierr);
3108 
3109   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3110   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3111   PetscFunctionReturn(0);
3112 }
3113 
3114 #undef __FUNCT__
3115 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3116 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3117 {
3118   PetscErrorCode ierr;
3119   IS             iscol_local;
3120   PetscInt       csize;
3121 
3122   PetscFunctionBegin;
3123   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3124   if (call == MAT_REUSE_MATRIX) {
3125     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3126     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3127   } else {
3128     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3129   }
3130   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3131   if (call == MAT_INITIAL_MATRIX) {
3132     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3133     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3134   }
3135   PetscFunctionReturn(0);
3136 }
3137 
3138 #undef __FUNCT__
3139 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3140 /*
3141     Not great since it makes two copies of the submatrix, first an SeqAIJ
3142   in local and then by concatenating the local matrices the end result.
3143   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3144 
3145   Note: This requires a sequential iscol with all indices.
3146 */
3147 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3148 {
3149   PetscErrorCode ierr;
3150   PetscMPIInt    rank,size;
3151   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3152   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3153   Mat            *local,M,Mreuse;
3154   MatScalar      *vwork,*aa;
3155   MPI_Comm       comm = ((PetscObject)mat)->comm;
3156   Mat_SeqAIJ     *aij;
3157 
3158 
3159   PetscFunctionBegin;
3160   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3161   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3162 
3163   if (call ==  MAT_REUSE_MATRIX) {
3164     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3165     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3166     local = &Mreuse;
3167     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3168   } else {
3169     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3170     Mreuse = *local;
3171     ierr   = PetscFree(local);CHKERRQ(ierr);
3172   }
3173 
3174   /*
3175       m - number of local rows
3176       n - number of columns (same on all processors)
3177       rstart - first row in new global matrix generated
3178   */
3179   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3180   if (call == MAT_INITIAL_MATRIX) {
3181     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3182     ii  = aij->i;
3183     jj  = aij->j;
3184 
3185     /*
3186         Determine the number of non-zeros in the diagonal and off-diagonal
3187         portions of the matrix in order to do correct preallocation
3188     */
3189 
3190     /* first get start and end of "diagonal" columns */
3191     if (csize == PETSC_DECIDE) {
3192       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3193       if (mglobal == n) { /* square matrix */
3194 	nlocal = m;
3195       } else {
3196         nlocal = n/size + ((n % size) > rank);
3197       }
3198     } else {
3199       nlocal = csize;
3200     }
3201     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3202     rstart = rend - nlocal;
3203     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3204 
3205     /* next, compute all the lengths */
3206     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3207     olens = dlens + m;
3208     for (i=0; i<m; i++) {
3209       jend = ii[i+1] - ii[i];
3210       olen = 0;
3211       dlen = 0;
3212       for (j=0; j<jend; j++) {
3213         if (*jj < rstart || *jj >= rend) olen++;
3214         else dlen++;
3215         jj++;
3216       }
3217       olens[i] = olen;
3218       dlens[i] = dlen;
3219     }
3220     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3221     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3222     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3223     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3224     ierr = PetscFree(dlens);CHKERRQ(ierr);
3225   } else {
3226     PetscInt ml,nl;
3227 
3228     M = *newmat;
3229     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3230     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3231     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3232     /*
3233          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3234        rather than the slower MatSetValues().
3235     */
3236     M->was_assembled = PETSC_TRUE;
3237     M->assembled     = PETSC_FALSE;
3238   }
3239   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3240   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3241   ii  = aij->i;
3242   jj  = aij->j;
3243   aa  = aij->a;
3244   for (i=0; i<m; i++) {
3245     row   = rstart + i;
3246     nz    = ii[i+1] - ii[i];
3247     cwork = jj;     jj += nz;
3248     vwork = aa;     aa += nz;
3249     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3250   }
3251 
3252   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3253   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3254   *newmat = M;
3255 
3256   /* save submatrix used in processor for next request */
3257   if (call ==  MAT_INITIAL_MATRIX) {
3258     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3259     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3260   }
3261 
3262   PetscFunctionReturn(0);
3263 }
3264 
3265 EXTERN_C_BEGIN
3266 #undef __FUNCT__
3267 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3268 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3269 {
3270   PetscInt       m,cstart, cend,j,nnz,i,d;
3271   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3272   const PetscInt *JJ;
3273   PetscScalar    *values;
3274   PetscErrorCode ierr;
3275 
3276   PetscFunctionBegin;
3277   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3278 
3279   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3280   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3281   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3282   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3283   m      = B->rmap->n;
3284   cstart = B->cmap->rstart;
3285   cend   = B->cmap->rend;
3286   rstart = B->rmap->rstart;
3287 
3288   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3289 
3290 #if defined(PETSC_USE_DEBUGGING)
3291   for (i=0; i<m; i++) {
3292     nnz     = Ii[i+1]- Ii[i];
3293     JJ      = J + Ii[i];
3294     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3295     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3296     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3297   }
3298 #endif
3299 
3300   for (i=0; i<m; i++) {
3301     nnz     = Ii[i+1]- Ii[i];
3302     JJ      = J + Ii[i];
3303     nnz_max = PetscMax(nnz_max,nnz);
3304     d       = 0;
3305     for (j=0; j<nnz; j++) {
3306       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3307     }
3308     d_nnz[i] = d;
3309     o_nnz[i] = nnz - d;
3310   }
3311   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3312   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3313 
3314   if (v) values = (PetscScalar*)v;
3315   else {
3316     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3317     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3318   }
3319 
3320   for (i=0; i<m; i++) {
3321     ii   = i + rstart;
3322     nnz  = Ii[i+1]- Ii[i];
3323     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3324   }
3325   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3326   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3327 
3328   if (!v) {
3329     ierr = PetscFree(values);CHKERRQ(ierr);
3330   }
3331   PetscFunctionReturn(0);
3332 }
3333 EXTERN_C_END
3334 
3335 #undef __FUNCT__
3336 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3337 /*@
3338    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3339    (the default parallel PETSc format).
3340 
3341    Collective on MPI_Comm
3342 
3343    Input Parameters:
3344 +  B - the matrix
3345 .  i - the indices into j for the start of each local row (starts with zero)
3346 .  j - the column indices for each local row (starts with zero)
3347 -  v - optional values in the matrix
3348 
3349    Level: developer
3350 
3351    Notes:
3352        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3353      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3354      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3355 
3356        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3357 
3358        The format which is used for the sparse matrix input, is equivalent to a
3359     row-major ordering.. i.e for the following matrix, the input data expected is
3360     as shown:
3361 
3362         1 0 0
3363         2 0 3     P0
3364        -------
3365         4 5 6     P1
3366 
3367      Process0 [P0]: rows_owned=[0,1]
3368         i =  {0,1,3}  [size = nrow+1  = 2+1]
3369         j =  {0,0,2}  [size = nz = 6]
3370         v =  {1,2,3}  [size = nz = 6]
3371 
3372      Process1 [P1]: rows_owned=[2]
3373         i =  {0,3}    [size = nrow+1  = 1+1]
3374         j =  {0,1,2}  [size = nz = 6]
3375         v =  {4,5,6}  [size = nz = 6]
3376 
3377 .keywords: matrix, aij, compressed row, sparse, parallel
3378 
3379 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3380           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3381 @*/
3382 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3383 {
3384   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3385 
3386   PetscFunctionBegin;
3387   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3388   if (f) {
3389     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3390   }
3391   PetscFunctionReturn(0);
3392 }
3393 
3394 #undef __FUNCT__
3395 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3396 /*@C
3397    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3398    (the default parallel PETSc format).  For good matrix assembly performance
3399    the user should preallocate the matrix storage by setting the parameters
3400    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3401    performance can be increased by more than a factor of 50.
3402 
3403    Collective on MPI_Comm
3404 
3405    Input Parameters:
3406 +  A - the matrix
3407 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3408            (same value is used for all local rows)
3409 .  d_nnz - array containing the number of nonzeros in the various rows of the
3410            DIAGONAL portion of the local submatrix (possibly different for each row)
3411            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3412            The size of this array is equal to the number of local rows, i.e 'm'.
3413            You must leave room for the diagonal entry even if it is zero.
3414 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3415            submatrix (same value is used for all local rows).
3416 -  o_nnz - array containing the number of nonzeros in the various rows of the
3417            OFF-DIAGONAL portion of the local submatrix (possibly different for
3418            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3419            structure. The size of this array is equal to the number
3420            of local rows, i.e 'm'.
3421 
3422    If the *_nnz parameter is given then the *_nz parameter is ignored
3423 
3424    The AIJ format (also called the Yale sparse matrix format or
3425    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3426    storage.  The stored row and column indices begin with zero.
3427    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3428 
3429    The parallel matrix is partitioned such that the first m0 rows belong to
3430    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3431    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3432 
3433    The DIAGONAL portion of the local submatrix of a processor can be defined
3434    as the submatrix which is obtained by extraction the part corresponding to
3435    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3436    first row that belongs to the processor, r2 is the last row belonging to
3437    the this processor, and c1-c2 is range of indices of the local part of a
3438    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3439    common case of a square matrix, the row and column ranges are the same and
3440    the DIAGONAL part is also square. The remaining portion of the local
3441    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3442 
3443    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3444 
3445    You can call MatGetInfo() to get information on how effective the preallocation was;
3446    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3447    You can also run with the option -info and look for messages with the string
3448    malloc in them to see if additional memory allocation was needed.
3449 
3450    Example usage:
3451 
3452    Consider the following 8x8 matrix with 34 non-zero values, that is
3453    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3454    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3455    as follows:
3456 
3457 .vb
3458             1  2  0  |  0  3  0  |  0  4
3459     Proc0   0  5  6  |  7  0  0  |  8  0
3460             9  0 10  | 11  0  0  | 12  0
3461     -------------------------------------
3462            13  0 14  | 15 16 17  |  0  0
3463     Proc1   0 18  0  | 19 20 21  |  0  0
3464             0  0  0  | 22 23  0  | 24  0
3465     -------------------------------------
3466     Proc2  25 26 27  |  0  0 28  | 29  0
3467            30  0  0  | 31 32 33  |  0 34
3468 .ve
3469 
3470    This can be represented as a collection of submatrices as:
3471 
3472 .vb
3473       A B C
3474       D E F
3475       G H I
3476 .ve
3477 
3478    Where the submatrices A,B,C are owned by proc0, D,E,F are
3479    owned by proc1, G,H,I are owned by proc2.
3480 
3481    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3482    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3483    The 'M','N' parameters are 8,8, and have the same values on all procs.
3484 
3485    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3486    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3487    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3488    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3489    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3490    matrix, ans [DF] as another SeqAIJ matrix.
3491 
3492    When d_nz, o_nz parameters are specified, d_nz storage elements are
3493    allocated for every row of the local diagonal submatrix, and o_nz
3494    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3495    One way to choose d_nz and o_nz is to use the max nonzerors per local
3496    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3497    In this case, the values of d_nz,o_nz are:
3498 .vb
3499      proc0 : dnz = 2, o_nz = 2
3500      proc1 : dnz = 3, o_nz = 2
3501      proc2 : dnz = 1, o_nz = 4
3502 .ve
3503    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3504    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3505    for proc3. i.e we are using 12+15+10=37 storage locations to store
3506    34 values.
3507 
3508    When d_nnz, o_nnz parameters are specified, the storage is specified
3509    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3510    In the above case the values for d_nnz,o_nnz are:
3511 .vb
3512      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3513      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3514      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3515 .ve
3516    Here the space allocated is sum of all the above values i.e 34, and
3517    hence pre-allocation is perfect.
3518 
3519    Level: intermediate
3520 
3521 .keywords: matrix, aij, compressed row, sparse, parallel
3522 
3523 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3524           MPIAIJ, MatGetInfo()
3525 @*/
3526 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3527 {
3528   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3529 
3530   PetscFunctionBegin;
3531   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3532   if (f) {
3533     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3534   }
3535   PetscFunctionReturn(0);
3536 }
3537 
3538 #undef __FUNCT__
3539 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3540 /*@
3541      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3542          CSR format the local rows.
3543 
3544    Collective on MPI_Comm
3545 
3546    Input Parameters:
3547 +  comm - MPI communicator
3548 .  m - number of local rows (Cannot be PETSC_DECIDE)
3549 .  n - This value should be the same as the local size used in creating the
3550        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3551        calculated if N is given) For square matrices n is almost always m.
3552 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3553 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3554 .   i - row indices
3555 .   j - column indices
3556 -   a - matrix values
3557 
3558    Output Parameter:
3559 .   mat - the matrix
3560 
3561    Level: intermediate
3562 
3563    Notes:
3564        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3565      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3566      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3567 
3568        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3569 
3570        The format which is used for the sparse matrix input, is equivalent to a
3571     row-major ordering.. i.e for the following matrix, the input data expected is
3572     as shown:
3573 
3574         1 0 0
3575         2 0 3     P0
3576        -------
3577         4 5 6     P1
3578 
3579      Process0 [P0]: rows_owned=[0,1]
3580         i =  {0,1,3}  [size = nrow+1  = 2+1]
3581         j =  {0,0,2}  [size = nz = 6]
3582         v =  {1,2,3}  [size = nz = 6]
3583 
3584      Process1 [P1]: rows_owned=[2]
3585         i =  {0,3}    [size = nrow+1  = 1+1]
3586         j =  {0,1,2}  [size = nz = 6]
3587         v =  {4,5,6}  [size = nz = 6]
3588 
3589 .keywords: matrix, aij, compressed row, sparse, parallel
3590 
3591 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3592           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3593 @*/
3594 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3595 {
3596   PetscErrorCode ierr;
3597 
3598  PetscFunctionBegin;
3599   if (i[0]) {
3600     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3601   }
3602   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3603   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3604   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3605   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3606   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3607   PetscFunctionReturn(0);
3608 }
3609 
3610 #undef __FUNCT__
3611 #define __FUNCT__ "MatCreateMPIAIJ"
3612 /*@C
3613    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3614    (the default parallel PETSc format).  For good matrix assembly performance
3615    the user should preallocate the matrix storage by setting the parameters
3616    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3617    performance can be increased by more than a factor of 50.
3618 
3619    Collective on MPI_Comm
3620 
3621    Input Parameters:
3622 +  comm - MPI communicator
3623 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3624            This value should be the same as the local size used in creating the
3625            y vector for the matrix-vector product y = Ax.
3626 .  n - This value should be the same as the local size used in creating the
3627        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3628        calculated if N is given) For square matrices n is almost always m.
3629 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3630 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3631 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3632            (same value is used for all local rows)
3633 .  d_nnz - array containing the number of nonzeros in the various rows of the
3634            DIAGONAL portion of the local submatrix (possibly different for each row)
3635            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3636            The size of this array is equal to the number of local rows, i.e 'm'.
3637            You must leave room for the diagonal entry even if it is zero.
3638 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3639            submatrix (same value is used for all local rows).
3640 -  o_nnz - array containing the number of nonzeros in the various rows of the
3641            OFF-DIAGONAL portion of the local submatrix (possibly different for
3642            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3643            structure. The size of this array is equal to the number
3644            of local rows, i.e 'm'.
3645 
3646    Output Parameter:
3647 .  A - the matrix
3648 
3649    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3650    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3651    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3652 
3653    Notes:
3654    If the *_nnz parameter is given then the *_nz parameter is ignored
3655 
3656    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3657    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3658    storage requirements for this matrix.
3659 
3660    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3661    processor than it must be used on all processors that share the object for
3662    that argument.
3663 
3664    The user MUST specify either the local or global matrix dimensions
3665    (possibly both).
3666 
3667    The parallel matrix is partitioned across processors such that the
3668    first m0 rows belong to process 0, the next m1 rows belong to
3669    process 1, the next m2 rows belong to process 2 etc.. where
3670    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3671    values corresponding to [m x N] submatrix.
3672 
3673    The columns are logically partitioned with the n0 columns belonging
3674    to 0th partition, the next n1 columns belonging to the next
3675    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3676 
3677    The DIAGONAL portion of the local submatrix on any given processor
3678    is the submatrix corresponding to the rows and columns m,n
3679    corresponding to the given processor. i.e diagonal matrix on
3680    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3681    etc. The remaining portion of the local submatrix [m x (N-n)]
3682    constitute the OFF-DIAGONAL portion. The example below better
3683    illustrates this concept.
3684 
3685    For a square global matrix we define each processor's diagonal portion
3686    to be its local rows and the corresponding columns (a square submatrix);
3687    each processor's off-diagonal portion encompasses the remainder of the
3688    local matrix (a rectangular submatrix).
3689 
3690    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3691 
3692    When calling this routine with a single process communicator, a matrix of
3693    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3694    type of communicator, use the construction mechanism:
3695      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3696 
3697    By default, this format uses inodes (identical nodes) when possible.
3698    We search for consecutive rows with the same nonzero structure, thereby
3699    reusing matrix information to achieve increased efficiency.
3700 
3701    Options Database Keys:
3702 +  -mat_no_inode  - Do not use inodes
3703 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3704 -  -mat_aij_oneindex - Internally use indexing starting at 1
3705         rather than 0.  Note that when calling MatSetValues(),
3706         the user still MUST index entries starting at 0!
3707 
3708 
3709    Example usage:
3710 
3711    Consider the following 8x8 matrix with 34 non-zero values, that is
3712    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3713    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3714    as follows:
3715 
3716 .vb
3717             1  2  0  |  0  3  0  |  0  4
3718     Proc0   0  5  6  |  7  0  0  |  8  0
3719             9  0 10  | 11  0  0  | 12  0
3720     -------------------------------------
3721            13  0 14  | 15 16 17  |  0  0
3722     Proc1   0 18  0  | 19 20 21  |  0  0
3723             0  0  0  | 22 23  0  | 24  0
3724     -------------------------------------
3725     Proc2  25 26 27  |  0  0 28  | 29  0
3726            30  0  0  | 31 32 33  |  0 34
3727 .ve
3728 
3729    This can be represented as a collection of submatrices as:
3730 
3731 .vb
3732       A B C
3733       D E F
3734       G H I
3735 .ve
3736 
3737    Where the submatrices A,B,C are owned by proc0, D,E,F are
3738    owned by proc1, G,H,I are owned by proc2.
3739 
3740    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3741    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3742    The 'M','N' parameters are 8,8, and have the same values on all procs.
3743 
3744    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3745    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3746    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3747    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3748    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3749    matrix, ans [DF] as another SeqAIJ matrix.
3750 
3751    When d_nz, o_nz parameters are specified, d_nz storage elements are
3752    allocated for every row of the local diagonal submatrix, and o_nz
3753    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3754    One way to choose d_nz and o_nz is to use the max nonzerors per local
3755    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3756    In this case, the values of d_nz,o_nz are:
3757 .vb
3758      proc0 : dnz = 2, o_nz = 2
3759      proc1 : dnz = 3, o_nz = 2
3760      proc2 : dnz = 1, o_nz = 4
3761 .ve
3762    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3763    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3764    for proc3. i.e we are using 12+15+10=37 storage locations to store
3765    34 values.
3766 
3767    When d_nnz, o_nnz parameters are specified, the storage is specified
3768    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3769    In the above case the values for d_nnz,o_nnz are:
3770 .vb
3771      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3772      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3773      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3774 .ve
3775    Here the space allocated is sum of all the above values i.e 34, and
3776    hence pre-allocation is perfect.
3777 
3778    Level: intermediate
3779 
3780 .keywords: matrix, aij, compressed row, sparse, parallel
3781 
3782 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3783           MPIAIJ, MatCreateMPIAIJWithArrays()
3784 @*/
3785 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3786 {
3787   PetscErrorCode ierr;
3788   PetscMPIInt    size;
3789 
3790   PetscFunctionBegin;
3791   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3792   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3793   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3794   if (size > 1) {
3795     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3796     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3797   } else {
3798     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3799     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3800   }
3801   PetscFunctionReturn(0);
3802 }
3803 
3804 #undef __FUNCT__
3805 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3806 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3807 {
3808   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3809 
3810   PetscFunctionBegin;
3811   *Ad     = a->A;
3812   *Ao     = a->B;
3813   *colmap = a->garray;
3814   PetscFunctionReturn(0);
3815 }
3816 
3817 #undef __FUNCT__
3818 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3819 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3820 {
3821   PetscErrorCode ierr;
3822   PetscInt       i;
3823   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3824 
3825   PetscFunctionBegin;
3826   if (coloring->ctype == IS_COLORING_GLOBAL) {
3827     ISColoringValue *allcolors,*colors;
3828     ISColoring      ocoloring;
3829 
3830     /* set coloring for diagonal portion */
3831     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3832 
3833     /* set coloring for off-diagonal portion */
3834     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3835     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3836     for (i=0; i<a->B->cmap->n; i++) {
3837       colors[i] = allcolors[a->garray[i]];
3838     }
3839     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3840     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3841     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3842     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3843   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3844     ISColoringValue *colors;
3845     PetscInt        *larray;
3846     ISColoring      ocoloring;
3847 
3848     /* set coloring for diagonal portion */
3849     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3850     for (i=0; i<a->A->cmap->n; i++) {
3851       larray[i] = i + A->cmap->rstart;
3852     }
3853     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3854     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3855     for (i=0; i<a->A->cmap->n; i++) {
3856       colors[i] = coloring->colors[larray[i]];
3857     }
3858     ierr = PetscFree(larray);CHKERRQ(ierr);
3859     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3860     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3861     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3862 
3863     /* set coloring for off-diagonal portion */
3864     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3865     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3866     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3867     for (i=0; i<a->B->cmap->n; i++) {
3868       colors[i] = coloring->colors[larray[i]];
3869     }
3870     ierr = PetscFree(larray);CHKERRQ(ierr);
3871     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3872     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3873     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3874   } else {
3875     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3876   }
3877 
3878   PetscFunctionReturn(0);
3879 }
3880 
3881 #if defined(PETSC_HAVE_ADIC)
3882 #undef __FUNCT__
3883 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3884 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3885 {
3886   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3887   PetscErrorCode ierr;
3888 
3889   PetscFunctionBegin;
3890   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3891   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3892   PetscFunctionReturn(0);
3893 }
3894 #endif
3895 
3896 #undef __FUNCT__
3897 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3898 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3899 {
3900   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3901   PetscErrorCode ierr;
3902 
3903   PetscFunctionBegin;
3904   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3905   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3906   PetscFunctionReturn(0);
3907 }
3908 
3909 #undef __FUNCT__
3910 #define __FUNCT__ "MatMerge"
3911 /*@
3912       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3913                  matrices from each processor
3914 
3915     Collective on MPI_Comm
3916 
3917    Input Parameters:
3918 +    comm - the communicators the parallel matrix will live on
3919 .    inmat - the input sequential matrices
3920 .    n - number of local columns (or PETSC_DECIDE)
3921 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3922 
3923    Output Parameter:
3924 .    outmat - the parallel matrix generated
3925 
3926     Level: advanced
3927 
3928    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3929 
3930 @*/
3931 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3932 {
3933   PetscErrorCode ierr;
3934   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3935   PetscInt       *indx;
3936   PetscScalar    *values;
3937 
3938   PetscFunctionBegin;
3939   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3940   if (scall == MAT_INITIAL_MATRIX){
3941     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3942     if (n == PETSC_DECIDE){
3943       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3944     }
3945     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3946     rstart -= m;
3947 
3948     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3949     for (i=0;i<m;i++) {
3950       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3951       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3952       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3953     }
3954     /* This routine will ONLY return MPIAIJ type matrix */
3955     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3956     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3957     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3958     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3959     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3960 
3961   } else if (scall == MAT_REUSE_MATRIX){
3962     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3963   } else {
3964     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3965   }
3966 
3967   for (i=0;i<m;i++) {
3968     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3969     Ii    = i + rstart;
3970     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3971     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3972   }
3973   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3974   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3975   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3976 
3977   PetscFunctionReturn(0);
3978 }
3979 
3980 #undef __FUNCT__
3981 #define __FUNCT__ "MatFileSplit"
3982 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3983 {
3984   PetscErrorCode    ierr;
3985   PetscMPIInt       rank;
3986   PetscInt          m,N,i,rstart,nnz;
3987   size_t            len;
3988   const PetscInt    *indx;
3989   PetscViewer       out;
3990   char              *name;
3991   Mat               B;
3992   const PetscScalar *values;
3993 
3994   PetscFunctionBegin;
3995   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3996   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3997   /* Should this be the type of the diagonal block of A? */
3998   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3999   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4000   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4001   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4002   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4003   for (i=0;i<m;i++) {
4004     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4005     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4006     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4007   }
4008   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4009   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4010 
4011   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4012   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4013   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4014   sprintf(name,"%s.%d",outfile,rank);
4015   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4016   ierr = PetscFree(name);
4017   ierr = MatView(B,out);CHKERRQ(ierr);
4018   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
4019   ierr = MatDestroy(B);CHKERRQ(ierr);
4020   PetscFunctionReturn(0);
4021 }
4022 
4023 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
4024 #undef __FUNCT__
4025 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4026 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4027 {
4028   PetscErrorCode       ierr;
4029   Mat_Merge_SeqsToMPI  *merge;
4030   PetscContainer       container;
4031 
4032   PetscFunctionBegin;
4033   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4034   if (container) {
4035     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4036     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4037     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4038     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4039     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4040     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4041     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4042     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4043     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4044     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4045     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4046     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4047     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4048     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4049 
4050     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4051     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4052   }
4053   ierr = PetscFree(merge);CHKERRQ(ierr);
4054 
4055   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4056   PetscFunctionReturn(0);
4057 }
4058 
4059 #include "../src/mat/utils/freespace.h"
4060 #include "petscbt.h"
4061 
4062 #undef __FUNCT__
4063 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4064 /*@C
4065       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4066                  matrices from each processor
4067 
4068     Collective on MPI_Comm
4069 
4070    Input Parameters:
4071 +    comm - the communicators the parallel matrix will live on
4072 .    seqmat - the input sequential matrices
4073 .    m - number of local rows (or PETSC_DECIDE)
4074 .    n - number of local columns (or PETSC_DECIDE)
4075 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4076 
4077    Output Parameter:
4078 .    mpimat - the parallel matrix generated
4079 
4080     Level: advanced
4081 
4082    Notes:
4083      The dimensions of the sequential matrix in each processor MUST be the same.
4084      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4085      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4086 @*/
4087 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4088 {
4089   PetscErrorCode       ierr;
4090   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4091   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4092   PetscMPIInt          size,rank,taga,*len_s;
4093   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4094   PetscInt             proc,m;
4095   PetscInt             **buf_ri,**buf_rj;
4096   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4097   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4098   MPI_Request          *s_waits,*r_waits;
4099   MPI_Status           *status;
4100   MatScalar            *aa=a->a;
4101   MatScalar            **abuf_r,*ba_i;
4102   Mat_Merge_SeqsToMPI  *merge;
4103   PetscContainer       container;
4104 
4105   PetscFunctionBegin;
4106   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4107 
4108   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4109   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4110 
4111   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4112   if (container) {
4113     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4114   }
4115   bi     = merge->bi;
4116   bj     = merge->bj;
4117   buf_ri = merge->buf_ri;
4118   buf_rj = merge->buf_rj;
4119 
4120   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4121   owners = merge->rowmap->range;
4122   len_s  = merge->len_s;
4123 
4124   /* send and recv matrix values */
4125   /*-----------------------------*/
4126   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4127   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4128 
4129   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4130   for (proc=0,k=0; proc<size; proc++){
4131     if (!len_s[proc]) continue;
4132     i = owners[proc];
4133     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4134     k++;
4135   }
4136 
4137   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4138   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4139   ierr = PetscFree(status);CHKERRQ(ierr);
4140 
4141   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4142   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4143 
4144   /* insert mat values of mpimat */
4145   /*----------------------------*/
4146   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4147   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4148 
4149   for (k=0; k<merge->nrecv; k++){
4150     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4151     nrows = *(buf_ri_k[k]);
4152     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4153     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4154   }
4155 
4156   /* set values of ba */
4157   m = merge->rowmap->n;
4158   for (i=0; i<m; i++) {
4159     arow = owners[rank] + i;
4160     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4161     bnzi = bi[i+1] - bi[i];
4162     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4163 
4164     /* add local non-zero vals of this proc's seqmat into ba */
4165     anzi = ai[arow+1] - ai[arow];
4166     aj   = a->j + ai[arow];
4167     aa   = a->a + ai[arow];
4168     nextaj = 0;
4169     for (j=0; nextaj<anzi; j++){
4170       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4171         ba_i[j] += aa[nextaj++];
4172       }
4173     }
4174 
4175     /* add received vals into ba */
4176     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4177       /* i-th row */
4178       if (i == *nextrow[k]) {
4179         anzi = *(nextai[k]+1) - *nextai[k];
4180         aj   = buf_rj[k] + *(nextai[k]);
4181         aa   = abuf_r[k] + *(nextai[k]);
4182         nextaj = 0;
4183         for (j=0; nextaj<anzi; j++){
4184           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4185             ba_i[j] += aa[nextaj++];
4186           }
4187         }
4188         nextrow[k]++; nextai[k]++;
4189       }
4190     }
4191     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4192   }
4193   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4194   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4195 
4196   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4197   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4198   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4199   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4200   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4201   PetscFunctionReturn(0);
4202 }
4203 
4204 #undef __FUNCT__
4205 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4206 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4207 {
4208   PetscErrorCode       ierr;
4209   Mat                  B_mpi;
4210   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4211   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4212   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4213   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4214   PetscInt             len,proc,*dnz,*onz;
4215   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4216   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4217   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4218   MPI_Status           *status;
4219   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4220   PetscBT              lnkbt;
4221   Mat_Merge_SeqsToMPI  *merge;
4222   PetscContainer       container;
4223 
4224   PetscFunctionBegin;
4225   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4226 
4227   /* make sure it is a PETSc comm */
4228   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4229   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4230   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4231 
4232   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4233   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4234 
4235   /* determine row ownership */
4236   /*---------------------------------------------------------*/
4237   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4238   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4239   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4240   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4241   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4242   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4243   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4244 
4245   m      = merge->rowmap->n;
4246   M      = merge->rowmap->N;
4247   owners = merge->rowmap->range;
4248 
4249   /* determine the number of messages to send, their lengths */
4250   /*---------------------------------------------------------*/
4251   len_s  = merge->len_s;
4252 
4253   len = 0;  /* length of buf_si[] */
4254   merge->nsend = 0;
4255   for (proc=0; proc<size; proc++){
4256     len_si[proc] = 0;
4257     if (proc == rank){
4258       len_s[proc] = 0;
4259     } else {
4260       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4261       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4262     }
4263     if (len_s[proc]) {
4264       merge->nsend++;
4265       nrows = 0;
4266       for (i=owners[proc]; i<owners[proc+1]; i++){
4267         if (ai[i+1] > ai[i]) nrows++;
4268       }
4269       len_si[proc] = 2*(nrows+1);
4270       len += len_si[proc];
4271     }
4272   }
4273 
4274   /* determine the number and length of messages to receive for ij-structure */
4275   /*-------------------------------------------------------------------------*/
4276   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4277   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4278 
4279   /* post the Irecv of j-structure */
4280   /*-------------------------------*/
4281   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4282   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4283 
4284   /* post the Isend of j-structure */
4285   /*--------------------------------*/
4286   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4287 
4288   for (proc=0, k=0; proc<size; proc++){
4289     if (!len_s[proc]) continue;
4290     i = owners[proc];
4291     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4292     k++;
4293   }
4294 
4295   /* receives and sends of j-structure are complete */
4296   /*------------------------------------------------*/
4297   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4298   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4299 
4300   /* send and recv i-structure */
4301   /*---------------------------*/
4302   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4303   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4304 
4305   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4306   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4307   for (proc=0,k=0; proc<size; proc++){
4308     if (!len_s[proc]) continue;
4309     /* form outgoing message for i-structure:
4310          buf_si[0]:                 nrows to be sent
4311                [1:nrows]:           row index (global)
4312                [nrows+1:2*nrows+1]: i-structure index
4313     */
4314     /*-------------------------------------------*/
4315     nrows = len_si[proc]/2 - 1;
4316     buf_si_i    = buf_si + nrows+1;
4317     buf_si[0]   = nrows;
4318     buf_si_i[0] = 0;
4319     nrows = 0;
4320     for (i=owners[proc]; i<owners[proc+1]; i++){
4321       anzi = ai[i+1] - ai[i];
4322       if (anzi) {
4323         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4324         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4325         nrows++;
4326       }
4327     }
4328     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4329     k++;
4330     buf_si += len_si[proc];
4331   }
4332 
4333   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4334   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4335 
4336   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4337   for (i=0; i<merge->nrecv; i++){
4338     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4339   }
4340 
4341   ierr = PetscFree(len_si);CHKERRQ(ierr);
4342   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4343   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4344   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4345   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4346   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4347   ierr = PetscFree(status);CHKERRQ(ierr);
4348 
4349   /* compute a local seq matrix in each processor */
4350   /*----------------------------------------------*/
4351   /* allocate bi array and free space for accumulating nonzero column info */
4352   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4353   bi[0] = 0;
4354 
4355   /* create and initialize a linked list */
4356   nlnk = N+1;
4357   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4358 
4359   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4360   len = 0;
4361   len  = ai[owners[rank+1]] - ai[owners[rank]];
4362   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4363   current_space = free_space;
4364 
4365   /* determine symbolic info for each local row */
4366   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4367 
4368   for (k=0; k<merge->nrecv; k++){
4369     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4370     nrows = *buf_ri_k[k];
4371     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4372     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4373   }
4374 
4375   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4376   len = 0;
4377   for (i=0;i<m;i++) {
4378     bnzi   = 0;
4379     /* add local non-zero cols of this proc's seqmat into lnk */
4380     arow   = owners[rank] + i;
4381     anzi   = ai[arow+1] - ai[arow];
4382     aj     = a->j + ai[arow];
4383     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4384     bnzi += nlnk;
4385     /* add received col data into lnk */
4386     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4387       if (i == *nextrow[k]) { /* i-th row */
4388         anzi = *(nextai[k]+1) - *nextai[k];
4389         aj   = buf_rj[k] + *nextai[k];
4390         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4391         bnzi += nlnk;
4392         nextrow[k]++; nextai[k]++;
4393       }
4394     }
4395     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4396 
4397     /* if free space is not available, make more free space */
4398     if (current_space->local_remaining<bnzi) {
4399       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4400       nspacedouble++;
4401     }
4402     /* copy data into free space, then initialize lnk */
4403     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4404     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4405 
4406     current_space->array           += bnzi;
4407     current_space->local_used      += bnzi;
4408     current_space->local_remaining -= bnzi;
4409 
4410     bi[i+1] = bi[i] + bnzi;
4411   }
4412 
4413   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4414 
4415   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4416   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4417   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4418 
4419   /* create symbolic parallel matrix B_mpi */
4420   /*---------------------------------------*/
4421   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4422   if (n==PETSC_DECIDE) {
4423     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4424   } else {
4425     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4426   }
4427   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4428   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4429   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4430 
4431   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4432   B_mpi->assembled     = PETSC_FALSE;
4433   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4434   merge->bi            = bi;
4435   merge->bj            = bj;
4436   merge->buf_ri        = buf_ri;
4437   merge->buf_rj        = buf_rj;
4438   merge->coi           = PETSC_NULL;
4439   merge->coj           = PETSC_NULL;
4440   merge->owners_co     = PETSC_NULL;
4441 
4442   /* attach the supporting struct to B_mpi for reuse */
4443   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4444   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4445   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4446   *mpimat = B_mpi;
4447 
4448   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4449   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4450   PetscFunctionReturn(0);
4451 }
4452 
4453 #undef __FUNCT__
4454 #define __FUNCT__ "MatMerge_SeqsToMPI"
4455 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4456 {
4457   PetscErrorCode   ierr;
4458 
4459   PetscFunctionBegin;
4460   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4461   if (scall == MAT_INITIAL_MATRIX){
4462     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4463   }
4464   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4465   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4466   PetscFunctionReturn(0);
4467 }
4468 
4469 #undef __FUNCT__
4470 #define __FUNCT__ "MatGetLocalMat"
4471 /*@
4472      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4473 
4474     Not Collective
4475 
4476    Input Parameters:
4477 +    A - the matrix
4478 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4479 
4480    Output Parameter:
4481 .    A_loc - the local sequential matrix generated
4482 
4483     Level: developer
4484 
4485 @*/
4486 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4487 {
4488   PetscErrorCode  ierr;
4489   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4490   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4491   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4492   MatScalar       *aa=a->a,*ba=b->a,*cam;
4493   PetscScalar     *ca;
4494   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4495   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4496 
4497   PetscFunctionBegin;
4498   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4499   if (scall == MAT_INITIAL_MATRIX){
4500     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4501     ci[0] = 0;
4502     for (i=0; i<am; i++){
4503       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4504     }
4505     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4506     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4507     k = 0;
4508     for (i=0; i<am; i++) {
4509       ncols_o = bi[i+1] - bi[i];
4510       ncols_d = ai[i+1] - ai[i];
4511       /* off-diagonal portion of A */
4512       for (jo=0; jo<ncols_o; jo++) {
4513         col = cmap[*bj];
4514         if (col >= cstart) break;
4515         cj[k]   = col; bj++;
4516         ca[k++] = *ba++;
4517       }
4518       /* diagonal portion of A */
4519       for (j=0; j<ncols_d; j++) {
4520         cj[k]   = cstart + *aj++;
4521         ca[k++] = *aa++;
4522       }
4523       /* off-diagonal portion of A */
4524       for (j=jo; j<ncols_o; j++) {
4525         cj[k]   = cmap[*bj++];
4526         ca[k++] = *ba++;
4527       }
4528     }
4529     /* put together the new matrix */
4530     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4531     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4532     /* Since these are PETSc arrays, change flags to free them as necessary. */
4533     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4534     mat->free_a  = PETSC_TRUE;
4535     mat->free_ij = PETSC_TRUE;
4536     mat->nonew   = 0;
4537   } else if (scall == MAT_REUSE_MATRIX){
4538     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4539     ci = mat->i; cj = mat->j; cam = mat->a;
4540     for (i=0; i<am; i++) {
4541       /* off-diagonal portion of A */
4542       ncols_o = bi[i+1] - bi[i];
4543       for (jo=0; jo<ncols_o; jo++) {
4544         col = cmap[*bj];
4545         if (col >= cstart) break;
4546         *cam++ = *ba++; bj++;
4547       }
4548       /* diagonal portion of A */
4549       ncols_d = ai[i+1] - ai[i];
4550       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4551       /* off-diagonal portion of A */
4552       for (j=jo; j<ncols_o; j++) {
4553         *cam++ = *ba++; bj++;
4554       }
4555     }
4556   } else {
4557     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4558   }
4559 
4560   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4561   PetscFunctionReturn(0);
4562 }
4563 
4564 #undef __FUNCT__
4565 #define __FUNCT__ "MatGetLocalMatCondensed"
4566 /*@C
4567      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4568 
4569     Not Collective
4570 
4571    Input Parameters:
4572 +    A - the matrix
4573 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4574 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4575 
4576    Output Parameter:
4577 .    A_loc - the local sequential matrix generated
4578 
4579     Level: developer
4580 
4581 @*/
4582 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4583 {
4584   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4585   PetscErrorCode    ierr;
4586   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4587   IS                isrowa,iscola;
4588   Mat               *aloc;
4589 
4590   PetscFunctionBegin;
4591   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4592   if (!row){
4593     start = A->rmap->rstart; end = A->rmap->rend;
4594     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4595   } else {
4596     isrowa = *row;
4597   }
4598   if (!col){
4599     start = A->cmap->rstart;
4600     cmap  = a->garray;
4601     nzA   = a->A->cmap->n;
4602     nzB   = a->B->cmap->n;
4603     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4604     ncols = 0;
4605     for (i=0; i<nzB; i++) {
4606       if (cmap[i] < start) idx[ncols++] = cmap[i];
4607       else break;
4608     }
4609     imark = i;
4610     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4611     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4612     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4613     ierr = PetscFree(idx);CHKERRQ(ierr);
4614   } else {
4615     iscola = *col;
4616   }
4617   if (scall != MAT_INITIAL_MATRIX){
4618     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4619     aloc[0] = *A_loc;
4620   }
4621   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4622   *A_loc = aloc[0];
4623   ierr = PetscFree(aloc);CHKERRQ(ierr);
4624   if (!row){
4625     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4626   }
4627   if (!col){
4628     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4629   }
4630   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4631   PetscFunctionReturn(0);
4632 }
4633 
4634 #undef __FUNCT__
4635 #define __FUNCT__ "MatGetBrowsOfAcols"
4636 /*@C
4637     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4638 
4639     Collective on Mat
4640 
4641    Input Parameters:
4642 +    A,B - the matrices in mpiaij format
4643 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4644 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4645 
4646    Output Parameter:
4647 +    rowb, colb - index sets of rows and columns of B to extract
4648 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4649 -    B_seq - the sequential matrix generated
4650 
4651     Level: developer
4652 
4653 @*/
4654 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4655 {
4656   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4657   PetscErrorCode    ierr;
4658   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4659   IS                isrowb,iscolb;
4660   Mat               *bseq;
4661 
4662   PetscFunctionBegin;
4663   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4664     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4665   }
4666   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4667 
4668   if (scall == MAT_INITIAL_MATRIX){
4669     start = A->cmap->rstart;
4670     cmap  = a->garray;
4671     nzA   = a->A->cmap->n;
4672     nzB   = a->B->cmap->n;
4673     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4674     ncols = 0;
4675     for (i=0; i<nzB; i++) {  /* row < local row index */
4676       if (cmap[i] < start) idx[ncols++] = cmap[i];
4677       else break;
4678     }
4679     imark = i;
4680     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4681     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4682     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4683     ierr = PetscFree(idx);CHKERRQ(ierr);
4684     *brstart = imark;
4685     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4686   } else {
4687     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4688     isrowb = *rowb; iscolb = *colb;
4689     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4690     bseq[0] = *B_seq;
4691   }
4692   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4693   *B_seq = bseq[0];
4694   ierr = PetscFree(bseq);CHKERRQ(ierr);
4695   if (!rowb){
4696     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4697   } else {
4698     *rowb = isrowb;
4699   }
4700   if (!colb){
4701     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4702   } else {
4703     *colb = iscolb;
4704   }
4705   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4706   PetscFunctionReturn(0);
4707 }
4708 
4709 #undef __FUNCT__
4710 #define __FUNCT__ "MatGetBrowsOfAoCols"
4711 /*@C
4712     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4713     of the OFF-DIAGONAL portion of local A
4714 
4715     Collective on Mat
4716 
4717    Input Parameters:
4718 +    A,B - the matrices in mpiaij format
4719 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4720 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4721 .    startsj_r - similar to startsj for receives
4722 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4723 
4724    Output Parameter:
4725 +    B_oth - the sequential matrix generated
4726 
4727     Level: developer
4728 
4729 @*/
4730 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4731 {
4732   VecScatter_MPI_General *gen_to,*gen_from;
4733   PetscErrorCode         ierr;
4734   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4735   Mat_SeqAIJ             *b_oth;
4736   VecScatter             ctx=a->Mvctx;
4737   MPI_Comm               comm=((PetscObject)ctx)->comm;
4738   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4739   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4740   PetscScalar            *rvalues,*svalues;
4741   MatScalar              *b_otha,*bufa,*bufA;
4742   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4743   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4744   MPI_Status             *sstatus,rstatus;
4745   PetscMPIInt            jj;
4746   PetscInt               *cols,sbs,rbs;
4747   PetscScalar            *vals;
4748 
4749   PetscFunctionBegin;
4750   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4751     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4752   }
4753   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4754   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4755 
4756   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4757   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4758   rvalues  = gen_from->values; /* holds the length of receiving row */
4759   svalues  = gen_to->values;   /* holds the length of sending row */
4760   nrecvs   = gen_from->n;
4761   nsends   = gen_to->n;
4762 
4763   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4764   srow     = gen_to->indices;   /* local row index to be sent */
4765   sstarts  = gen_to->starts;
4766   sprocs   = gen_to->procs;
4767   sstatus  = gen_to->sstatus;
4768   sbs      = gen_to->bs;
4769   rstarts  = gen_from->starts;
4770   rprocs   = gen_from->procs;
4771   rbs      = gen_from->bs;
4772 
4773   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4774   if (scall == MAT_INITIAL_MATRIX){
4775     /* i-array */
4776     /*---------*/
4777     /*  post receives */
4778     for (i=0; i<nrecvs; i++){
4779       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4780       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4781       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4782     }
4783 
4784     /* pack the outgoing message */
4785     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4786     sstartsj[0] = 0;  rstartsj[0] = 0;
4787     len = 0; /* total length of j or a array to be sent */
4788     k = 0;
4789     for (i=0; i<nsends; i++){
4790       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4791       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4792       for (j=0; j<nrows; j++) {
4793         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4794         for (l=0; l<sbs; l++){
4795           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4796           rowlen[j*sbs+l] = ncols;
4797           len += ncols;
4798           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4799         }
4800         k++;
4801       }
4802       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4803       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4804     }
4805     /* recvs and sends of i-array are completed */
4806     i = nrecvs;
4807     while (i--) {
4808       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4809     }
4810     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4811 
4812     /* allocate buffers for sending j and a arrays */
4813     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4814     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4815 
4816     /* create i-array of B_oth */
4817     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4818     b_othi[0] = 0;
4819     len = 0; /* total length of j or a array to be received */
4820     k = 0;
4821     for (i=0; i<nrecvs; i++){
4822       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4823       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4824       for (j=0; j<nrows; j++) {
4825         b_othi[k+1] = b_othi[k] + rowlen[j];
4826         len += rowlen[j]; k++;
4827       }
4828       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4829     }
4830 
4831     /* allocate space for j and a arrrays of B_oth */
4832     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4833     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4834 
4835     /* j-array */
4836     /*---------*/
4837     /*  post receives of j-array */
4838     for (i=0; i<nrecvs; i++){
4839       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4840       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4841     }
4842 
4843     /* pack the outgoing message j-array */
4844     k = 0;
4845     for (i=0; i<nsends; i++){
4846       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4847       bufJ = bufj+sstartsj[i];
4848       for (j=0; j<nrows; j++) {
4849         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4850         for (ll=0; ll<sbs; ll++){
4851           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4852           for (l=0; l<ncols; l++){
4853             *bufJ++ = cols[l];
4854           }
4855           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4856         }
4857       }
4858       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4859     }
4860 
4861     /* recvs and sends of j-array are completed */
4862     i = nrecvs;
4863     while (i--) {
4864       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4865     }
4866     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4867   } else if (scall == MAT_REUSE_MATRIX){
4868     sstartsj = *startsj;
4869     rstartsj = *startsj_r;
4870     bufa     = *bufa_ptr;
4871     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4872     b_otha   = b_oth->a;
4873   } else {
4874     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4875   }
4876 
4877   /* a-array */
4878   /*---------*/
4879   /*  post receives of a-array */
4880   for (i=0; i<nrecvs; i++){
4881     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4882     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4883   }
4884 
4885   /* pack the outgoing message a-array */
4886   k = 0;
4887   for (i=0; i<nsends; i++){
4888     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4889     bufA = bufa+sstartsj[i];
4890     for (j=0; j<nrows; j++) {
4891       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4892       for (ll=0; ll<sbs; ll++){
4893         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4894         for (l=0; l<ncols; l++){
4895           *bufA++ = vals[l];
4896         }
4897         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4898       }
4899     }
4900     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4901   }
4902   /* recvs and sends of a-array are completed */
4903   i = nrecvs;
4904   while (i--) {
4905     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4906   }
4907   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4908   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4909 
4910   if (scall == MAT_INITIAL_MATRIX){
4911     /* put together the new matrix */
4912     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4913 
4914     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4915     /* Since these are PETSc arrays, change flags to free them as necessary. */
4916     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4917     b_oth->free_a  = PETSC_TRUE;
4918     b_oth->free_ij = PETSC_TRUE;
4919     b_oth->nonew   = 0;
4920 
4921     ierr = PetscFree(bufj);CHKERRQ(ierr);
4922     if (!startsj || !bufa_ptr){
4923       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4924       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4925     } else {
4926       *startsj   = sstartsj;
4927       *startsj_r = rstartsj;
4928       *bufa_ptr  = bufa;
4929     }
4930   }
4931   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4932   PetscFunctionReturn(0);
4933 }
4934 
4935 #undef __FUNCT__
4936 #define __FUNCT__ "MatGetCommunicationStructs"
4937 /*@C
4938   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4939 
4940   Not Collective
4941 
4942   Input Parameters:
4943 . A - The matrix in mpiaij format
4944 
4945   Output Parameter:
4946 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4947 . colmap - A map from global column index to local index into lvec
4948 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4949 
4950   Level: developer
4951 
4952 @*/
4953 #if defined (PETSC_USE_CTABLE)
4954 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4955 #else
4956 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4957 #endif
4958 {
4959   Mat_MPIAIJ *a;
4960 
4961   PetscFunctionBegin;
4962   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4963   PetscValidPointer(lvec, 2);
4964   PetscValidPointer(colmap, 3);
4965   PetscValidPointer(multScatter, 4);
4966   a = (Mat_MPIAIJ *) A->data;
4967   if (lvec) *lvec = a->lvec;
4968   if (colmap) *colmap = a->colmap;
4969   if (multScatter) *multScatter = a->Mvctx;
4970   PetscFunctionReturn(0);
4971 }
4972 
4973 EXTERN_C_BEGIN
4974 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
4975 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
4976 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
4977 EXTERN_C_END
4978 
4979 #undef __FUNCT__
4980 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4981 /*
4982     Computes (B'*A')' since computing B*A directly is untenable
4983 
4984                n                       p                          p
4985         (              )       (              )         (                  )
4986       m (      A       )  *  n (       B      )   =   m (         C        )
4987         (              )       (              )         (                  )
4988 
4989 */
4990 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4991 {
4992   PetscErrorCode     ierr;
4993   Mat                At,Bt,Ct;
4994 
4995   PetscFunctionBegin;
4996   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4997   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4998   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4999   ierr = MatDestroy(At);CHKERRQ(ierr);
5000   ierr = MatDestroy(Bt);CHKERRQ(ierr);
5001   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5002   ierr = MatDestroy(Ct);CHKERRQ(ierr);
5003   PetscFunctionReturn(0);
5004 }
5005 
5006 #undef __FUNCT__
5007 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5008 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5009 {
5010   PetscErrorCode ierr;
5011   PetscInt       m=A->rmap->n,n=B->cmap->n;
5012   Mat            Cmat;
5013 
5014   PetscFunctionBegin;
5015   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5016   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5017   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5018   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5019   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5020   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5021   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5022   *C   = Cmat;
5023   PetscFunctionReturn(0);
5024 }
5025 
5026 /* ----------------------------------------------------------------*/
5027 #undef __FUNCT__
5028 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5029 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5030 {
5031   PetscErrorCode ierr;
5032 
5033   PetscFunctionBegin;
5034   if (scall == MAT_INITIAL_MATRIX){
5035     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5036   }
5037   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5038   PetscFunctionReturn(0);
5039 }
5040 
5041 EXTERN_C_BEGIN
5042 #if defined(PETSC_HAVE_MUMPS)
5043 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5044 #endif
5045 #if defined(PETSC_HAVE_PASTIX)
5046 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5047 #endif
5048 #if defined(PETSC_HAVE_SUPERLU_DIST)
5049 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5050 #endif
5051 #if defined(PETSC_HAVE_SPOOLES)
5052 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5053 #endif
5054 EXTERN_C_END
5055 
5056 /*MC
5057    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5058 
5059    Options Database Keys:
5060 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5061 
5062   Level: beginner
5063 
5064 .seealso: MatCreateMPIAIJ()
5065 M*/
5066 
5067 EXTERN_C_BEGIN
5068 #undef __FUNCT__
5069 #define __FUNCT__ "MatCreate_MPIAIJ"
5070 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5071 {
5072   Mat_MPIAIJ     *b;
5073   PetscErrorCode ierr;
5074   PetscMPIInt    size;
5075 
5076   PetscFunctionBegin;
5077   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5078 
5079   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5080   B->data         = (void*)b;
5081   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5082   B->rmap->bs     = 1;
5083   B->assembled    = PETSC_FALSE;
5084   B->mapping      = 0;
5085 
5086   B->insertmode   = NOT_SET_VALUES;
5087   b->size         = size;
5088   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5089 
5090   /* build cache for off array entries formed */
5091   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5092   b->donotstash  = PETSC_FALSE;
5093   b->colmap      = 0;
5094   b->garray      = 0;
5095   b->roworiented = PETSC_TRUE;
5096 
5097   /* stuff used for matrix vector multiply */
5098   b->lvec      = PETSC_NULL;
5099   b->Mvctx     = PETSC_NULL;
5100 
5101   /* stuff for MatGetRow() */
5102   b->rowindices   = 0;
5103   b->rowvalues    = 0;
5104   b->getrowactive = PETSC_FALSE;
5105 
5106 #if defined(PETSC_HAVE_SPOOLES)
5107   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5108                                      "MatGetFactor_mpiaij_spooles",
5109                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5110 #endif
5111 #if defined(PETSC_HAVE_MUMPS)
5112   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5113                                      "MatGetFactor_aij_mumps",
5114                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5115 #endif
5116 #if defined(PETSC_HAVE_PASTIX)
5117   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5118 					   "MatGetFactor_mpiaij_pastix",
5119 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5120 #endif
5121 #if defined(PETSC_HAVE_SUPERLU_DIST)
5122   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5123                                      "MatGetFactor_mpiaij_superlu_dist",
5124                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5125 #endif
5126   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5127                                      "MatStoreValues_MPIAIJ",
5128                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5129   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5130                                      "MatRetrieveValues_MPIAIJ",
5131                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5132   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5133 				     "MatGetDiagonalBlock_MPIAIJ",
5134                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5135   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5136 				     "MatIsTranspose_MPIAIJ",
5137 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5138   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5139 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5140 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5141   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5142 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5143 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5144   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5145 				     "MatDiagonalScaleLocal_MPIAIJ",
5146 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5147   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5148                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5149                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5150   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5151                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5152                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5153   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5154                                      "MatConvert_MPIAIJ_MPISBAIJ",
5155                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5156   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5157                                      "MatMatMult_MPIDense_MPIAIJ",
5158                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5159   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5160                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5161                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5162   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5163                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5164                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5165   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5166   PetscFunctionReturn(0);
5167 }
5168 EXTERN_C_END
5169 
5170 #undef __FUNCT__
5171 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5172 /*@
5173      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5174          and "off-diagonal" part of the matrix in CSR format.
5175 
5176    Collective on MPI_Comm
5177 
5178    Input Parameters:
5179 +  comm - MPI communicator
5180 .  m - number of local rows (Cannot be PETSC_DECIDE)
5181 .  n - This value should be the same as the local size used in creating the
5182        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5183        calculated if N is given) For square matrices n is almost always m.
5184 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5185 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5186 .   i - row indices for "diagonal" portion of matrix
5187 .   j - column indices
5188 .   a - matrix values
5189 .   oi - row indices for "off-diagonal" portion of matrix
5190 .   oj - column indices
5191 -   oa - matrix values
5192 
5193    Output Parameter:
5194 .   mat - the matrix
5195 
5196    Level: advanced
5197 
5198    Notes:
5199        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5200 
5201        The i and j indices are 0 based
5202 
5203        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5204 
5205        This sets local rows and cannot be used to set off-processor values.
5206 
5207        You cannot later use MatSetValues() to change values in this matrix.
5208 
5209 .keywords: matrix, aij, compressed row, sparse, parallel
5210 
5211 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5212           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5213 @*/
5214 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5215 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5216 {
5217   PetscErrorCode ierr;
5218   Mat_MPIAIJ     *maij;
5219 
5220  PetscFunctionBegin;
5221   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5222   if (i[0]) {
5223     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5224   }
5225   if (oi[0]) {
5226     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5227   }
5228   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5229   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5230   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5231   maij = (Mat_MPIAIJ*) (*mat)->data;
5232   maij->donotstash     = PETSC_TRUE;
5233   (*mat)->preallocated = PETSC_TRUE;
5234 
5235   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5236   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5237   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5238   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5239 
5240   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5241   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5242 
5243   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5244   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5245   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5246   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5247 
5248   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5249   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5250   PetscFunctionReturn(0);
5251 }
5252 
5253 /*
5254     Special version for direct calls from Fortran
5255 */
5256 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5257 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5258 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5259 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5260 #endif
5261 
5262 /* Change these macros so can be used in void function */
5263 #undef CHKERRQ
5264 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5265 #undef SETERRQ2
5266 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5267 #undef SETERRQ
5268 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5269 
5270 EXTERN_C_BEGIN
5271 #undef __FUNCT__
5272 #define __FUNCT__ "matsetvaluesmpiaij_"
5273 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5274 {
5275   Mat             mat = *mmat;
5276   PetscInt        m = *mm, n = *mn;
5277   InsertMode      addv = *maddv;
5278   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5279   PetscScalar     value;
5280   PetscErrorCode  ierr;
5281 
5282   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5283   if (mat->insertmode == NOT_SET_VALUES) {
5284     mat->insertmode = addv;
5285   }
5286 #if defined(PETSC_USE_DEBUG)
5287   else if (mat->insertmode != addv) {
5288     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5289   }
5290 #endif
5291   {
5292   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5293   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5294   PetscTruth      roworiented = aij->roworiented;
5295 
5296   /* Some Variables required in the macro */
5297   Mat             A = aij->A;
5298   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5299   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5300   MatScalar       *aa = a->a;
5301   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5302   Mat             B = aij->B;
5303   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5304   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5305   MatScalar       *ba = b->a;
5306 
5307   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5308   PetscInt        nonew = a->nonew;
5309   MatScalar       *ap1,*ap2;
5310 
5311   PetscFunctionBegin;
5312   for (i=0; i<m; i++) {
5313     if (im[i] < 0) continue;
5314 #if defined(PETSC_USE_DEBUG)
5315     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5316 #endif
5317     if (im[i] >= rstart && im[i] < rend) {
5318       row      = im[i] - rstart;
5319       lastcol1 = -1;
5320       rp1      = aj + ai[row];
5321       ap1      = aa + ai[row];
5322       rmax1    = aimax[row];
5323       nrow1    = ailen[row];
5324       low1     = 0;
5325       high1    = nrow1;
5326       lastcol2 = -1;
5327       rp2      = bj + bi[row];
5328       ap2      = ba + bi[row];
5329       rmax2    = bimax[row];
5330       nrow2    = bilen[row];
5331       low2     = 0;
5332       high2    = nrow2;
5333 
5334       for (j=0; j<n; j++) {
5335         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5336         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5337         if (in[j] >= cstart && in[j] < cend){
5338           col = in[j] - cstart;
5339           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5340         } else if (in[j] < 0) continue;
5341 #if defined(PETSC_USE_DEBUG)
5342         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5343 #endif
5344         else {
5345           if (mat->was_assembled) {
5346             if (!aij->colmap) {
5347               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5348             }
5349 #if defined (PETSC_USE_CTABLE)
5350             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5351 	    col--;
5352 #else
5353             col = aij->colmap[in[j]] - 1;
5354 #endif
5355             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5356               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5357               col =  in[j];
5358               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5359               B = aij->B;
5360               b = (Mat_SeqAIJ*)B->data;
5361               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5362               rp2      = bj + bi[row];
5363               ap2      = ba + bi[row];
5364               rmax2    = bimax[row];
5365               nrow2    = bilen[row];
5366               low2     = 0;
5367               high2    = nrow2;
5368               bm       = aij->B->rmap->n;
5369               ba = b->a;
5370             }
5371           } else col = in[j];
5372           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5373         }
5374       }
5375     } else {
5376       if (!aij->donotstash) {
5377         if (roworiented) {
5378           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5379         } else {
5380           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5381         }
5382       }
5383     }
5384   }}
5385   PetscFunctionReturnVoid();
5386 }
5387 EXTERN_C_END
5388 
5389