xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision ac199c897b92e43b66f3d89832efd26e2fbb07b0)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
207 { \
208     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
209     lastcol1 = col;\
210     while (high1-low1 > 5) { \
211       t = (low1+high1)/2; \
212       if (rp1[t] > col) high1 = t; \
213       else             low1  = t; \
214     } \
215       for (_i=low1; _i<high1; _i++) { \
216         if (rp1[_i] > col) break; \
217         if (rp1[_i] == col) { \
218           if (addv == ADD_VALUES) ap1[_i] += value;   \
219           else                    ap1[_i] = value; \
220           goto a_noinsert; \
221         } \
222       }  \
223       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
224       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
225       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
226       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
227       N = nrow1++ - 1; a->nz++; high1++; \
228       /* shift up all the later entries in this row */ \
229       for (ii=N; ii>=_i; ii--) { \
230         rp1[ii+1] = rp1[ii]; \
231         ap1[ii+1] = ap1[ii]; \
232       } \
233       rp1[_i] = col;  \
234       ap1[_i] = value;  \
235       a_noinsert: ; \
236       ailen[row] = nrow1; \
237 }
238 
239 
240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
241 { \
242     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
243     lastcol2 = col;\
244     while (high2-low2 > 5) { \
245       t = (low2+high2)/2; \
246       if (rp2[t] > col) high2 = t; \
247       else             low2  = t; \
248     } \
249     for (_i=low2; _i<high2; _i++) {		\
250       if (rp2[_i] > col) break;			\
251       if (rp2[_i] == col) {			      \
252 	if (addv == ADD_VALUES) ap2[_i] += value;     \
253 	else                    ap2[_i] = value;      \
254 	goto b_noinsert;			      \
255       }						      \
256     }							      \
257     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
258     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
259     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
260     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
261     N = nrow2++ - 1; b->nz++; high2++;					\
262     /* shift up all the later entries in this row */			\
263     for (ii=N; ii>=_i; ii--) {						\
264       rp2[ii+1] = rp2[ii];						\
265       ap2[ii+1] = ap2[ii];						\
266     }									\
267     rp2[_i] = col;							\
268     ap2[_i] = value;							\
269     b_noinsert: ;								\
270     bilen[row] = nrow2;							\
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
276 {
277   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
278   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
279   PetscErrorCode ierr;
280   PetscInt       l,*garray = mat->garray,diag;
281 
282   PetscFunctionBegin;
283   /* code only works for square matrices A */
284 
285   /* find size of row to the left of the diagonal part */
286   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
287   row  = row - diag;
288   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
289     if (garray[b->j[b->i[row]+l]] > diag) break;
290   }
291   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
292 
293   /* diagonal part */
294   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
295 
296   /* right of diagonal part */
297   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatSetValues_MPIAIJ"
303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscScalar    value;
307   PetscErrorCode ierr;
308   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
309   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
310   PetscTruth     roworiented = aij->roworiented;
311 
312   /* Some Variables required in the macro */
313   Mat            A = aij->A;
314   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
315   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
316   MatScalar      *aa = a->a;
317   PetscTruth     ignorezeroentries = a->ignorezeroentries;
318   Mat            B = aij->B;
319   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
320   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
321   MatScalar      *ba = b->a;
322 
323   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
324   PetscInt       nonew = a->nonew;
325   MatScalar      *ap1,*ap2;
326 
327   PetscFunctionBegin;
328   if (v) PetscValidScalarPointer(v,6);
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (!aij->donotstash) {
394         if (roworiented) {
395           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
396         } else {
397           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
398         }
399       }
400     }
401   }
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatGetValues_MPIAIJ"
407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
408 {
409   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
410   PetscErrorCode ierr;
411   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
412   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
413 
414   PetscFunctionBegin;
415   for (i=0; i<m; i++) {
416     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
417     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
418     if (idxm[i] >= rstart && idxm[i] < rend) {
419       row = idxm[i] - rstart;
420       for (j=0; j<n; j++) {
421         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
422         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
423         if (idxn[j] >= cstart && idxn[j] < cend){
424           col = idxn[j] - cstart;
425           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
426         } else {
427           if (!aij->colmap) {
428             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
429           }
430 #if defined (PETSC_USE_CTABLE)
431           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
432           col --;
433 #else
434           col = aij->colmap[idxn[j]] - 1;
435 #endif
436           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
437           else {
438             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
439           }
440         }
441       }
442     } else {
443       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
444     }
445   }
446   PetscFunctionReturn(0);
447 }
448 
449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
454 {
455   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
456   PetscErrorCode ierr;
457   PetscInt       nstash,reallocs;
458   InsertMode     addv;
459 
460   PetscFunctionBegin;
461   if (aij->donotstash) {
462     PetscFunctionReturn(0);
463   }
464 
465   /* make sure all processors are either in INSERTMODE or ADDMODE */
466   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
467   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
468   mat->insertmode = addv; /* in case this processor had no cache */
469 
470   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
471   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
472   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNCT__
477 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
478 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
479 {
480   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
481   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
482   PetscErrorCode ierr;
483   PetscMPIInt    n;
484   PetscInt       i,j,rstart,ncols,flg;
485   PetscInt       *row,*col;
486   PetscTruth     other_disassembled;
487   PetscScalar    *val;
488   InsertMode     addv = mat->insertmode;
489 
490   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
491   PetscFunctionBegin;
492   if (!aij->donotstash) {
493     while (1) {
494       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
495       if (!flg) break;
496 
497       for (i=0; i<n;) {
498         /* Now identify the consecutive vals belonging to the same row */
499         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
500         if (j < n) ncols = j-i;
501         else       ncols = n-i;
502         /* Now assemble all these values with a single function call */
503         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
504         i = j;
505       }
506     }
507     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
508   }
509   a->compressedrow.use     = PETSC_FALSE;
510   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
511   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
512 
513   /* determine if any processor has disassembled, if so we must
514      also disassemble ourselfs, in order that we may reassemble. */
515   /*
516      if nonzero structure of submatrix B cannot change then we know that
517      no processor disassembled thus we can skip this stuff
518   */
519   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
520     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
521     if (mat->was_assembled && !other_disassembled) {
522       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523     }
524   }
525   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
526     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
527   }
528   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
529   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
530   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
531   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
532 
533   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
534   aij->rowvalues = 0;
535 
536   /* used by MatAXPY() */
537   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
538   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
539 
540   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
541   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
542   PetscFunctionReturn(0);
543 }
544 
545 #undef __FUNCT__
546 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
547 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
548 {
549   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
550   PetscErrorCode ierr;
551 
552   PetscFunctionBegin;
553   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
554   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
555   PetscFunctionReturn(0);
556 }
557 
558 #undef __FUNCT__
559 #define __FUNCT__ "MatZeroRows_MPIAIJ"
560 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
561 {
562   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
563   PetscErrorCode ierr;
564   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
565   PetscInt       i,*owners = A->rmap->range;
566   PetscInt       *nprocs,j,idx,nsends,row;
567   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
568   PetscInt       *rvalues,count,base,slen,*source;
569   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
570   MPI_Comm       comm = ((PetscObject)A)->comm;
571   MPI_Request    *send_waits,*recv_waits;
572   MPI_Status     recv_status,*send_status;
573 #if defined(PETSC_DEBUG)
574   PetscTruth     found = PETSC_FALSE;
575 #endif
576 
577   PetscFunctionBegin;
578   /*  first count number of contributors to each processor */
579   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
580   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
581   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
582   j = 0;
583   for (i=0; i<N; i++) {
584     if (lastidx > (idx = rows[i])) j = 0;
585     lastidx = idx;
586     for (; j<size; j++) {
587       if (idx >= owners[j] && idx < owners[j+1]) {
588         nprocs[2*j]++;
589         nprocs[2*j+1] = 1;
590         owner[i] = j;
591 #if defined(PETSC_DEBUG)
592         found = PETSC_TRUE;
593 #endif
594         break;
595       }
596     }
597 #if defined(PETSC_DEBUG)
598     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
599     found = PETSC_FALSE;
600 #endif
601   }
602   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
603 
604   /* inform other processors of number of messages and max length*/
605   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
606 
607   /* post receives:   */
608   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
609   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
610   for (i=0; i<nrecvs; i++) {
611     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
612   }
613 
614   /* do sends:
615       1) starts[i] gives the starting index in svalues for stuff going to
616          the ith processor
617   */
618   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
619   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
620   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
621   starts[0] = 0;
622   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
623   for (i=0; i<N; i++) {
624     svalues[starts[owner[i]]++] = rows[i];
625   }
626 
627   starts[0] = 0;
628   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
629   count = 0;
630   for (i=0; i<size; i++) {
631     if (nprocs[2*i+1]) {
632       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
633     }
634   }
635   ierr = PetscFree(starts);CHKERRQ(ierr);
636 
637   base = owners[rank];
638 
639   /*  wait on receives */
640   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
641   count  = nrecvs; slen = 0;
642   while (count) {
643     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
644     /* unpack receives into our local space */
645     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
646     source[imdex]  = recv_status.MPI_SOURCE;
647     lens[imdex]    = n;
648     slen          += n;
649     count--;
650   }
651   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
652 
653   /* move the data into the send scatter */
654   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
655   count = 0;
656   for (i=0; i<nrecvs; i++) {
657     values = rvalues + i*nmax;
658     for (j=0; j<lens[i]; j++) {
659       lrows[count++] = values[j] - base;
660     }
661   }
662   ierr = PetscFree(rvalues);CHKERRQ(ierr);
663   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
664   ierr = PetscFree(owner);CHKERRQ(ierr);
665   ierr = PetscFree(nprocs);CHKERRQ(ierr);
666 
667   /* actually zap the local rows */
668   /*
669         Zero the required rows. If the "diagonal block" of the matrix
670      is square and the user wishes to set the diagonal we use separate
671      code so that MatSetValues() is not called for each diagonal allocating
672      new memory, thus calling lots of mallocs and slowing things down.
673 
674   */
675   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
676   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
677   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
678     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
679   } else if (diag != 0.0) {
680     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
681     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
682       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
683 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
684     }
685     for (i = 0; i < slen; i++) {
686       row  = lrows[i] + rstart;
687       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
688     }
689     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
690     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
691   } else {
692     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
693   }
694   ierr = PetscFree(lrows);CHKERRQ(ierr);
695 
696   /* wait on sends */
697   if (nsends) {
698     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
699     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
700     ierr = PetscFree(send_status);CHKERRQ(ierr);
701   }
702   ierr = PetscFree(send_waits);CHKERRQ(ierr);
703   ierr = PetscFree(svalues);CHKERRQ(ierr);
704 
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatMult_MPIAIJ"
710 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
711 {
712   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
713   PetscErrorCode ierr;
714   PetscInt       nt;
715 
716   PetscFunctionBegin;
717   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
718   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
719   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
720   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
721   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
722   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
723   PetscFunctionReturn(0);
724 }
725 
726 #undef __FUNCT__
727 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
728 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
729 {
730   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
731   PetscErrorCode ierr;
732 
733   PetscFunctionBegin;
734   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
735   PetscFunctionReturn(0);
736 }
737 
738 #undef __FUNCT__
739 #define __FUNCT__ "MatMultAdd_MPIAIJ"
740 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
741 {
742   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
743   PetscErrorCode ierr;
744 
745   PetscFunctionBegin;
746   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
747   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
748   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
749   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
750   PetscFunctionReturn(0);
751 }
752 
753 #undef __FUNCT__
754 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
755 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
756 {
757   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
758   PetscErrorCode ierr;
759   PetscTruth     merged;
760 
761   PetscFunctionBegin;
762   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
763   /* do nondiagonal part */
764   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
765   if (!merged) {
766     /* send it on its way */
767     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
768     /* do local part */
769     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
770     /* receive remote parts: note this assumes the values are not actually */
771     /* added in yy until the next line, */
772     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773   } else {
774     /* do local part */
775     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
776     /* send it on its way */
777     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
778     /* values actually were received in the Begin() but we need to call this nop */
779     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
780   }
781   PetscFunctionReturn(0);
782 }
783 
784 EXTERN_C_BEGIN
785 #undef __FUNCT__
786 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
787 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
788 {
789   MPI_Comm       comm;
790   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
791   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
792   IS             Me,Notme;
793   PetscErrorCode ierr;
794   PetscInt       M,N,first,last,*notme,i;
795   PetscMPIInt    size;
796 
797   PetscFunctionBegin;
798 
799   /* Easy test: symmetric diagonal block */
800   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
801   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
802   if (!*f) PetscFunctionReturn(0);
803   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
804   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
805   if (size == 1) PetscFunctionReturn(0);
806 
807   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
808   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
809   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
810   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
811   for (i=0; i<first; i++) notme[i] = i;
812   for (i=last; i<M; i++) notme[i-last+first] = i;
813   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
814   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
815   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
816   Aoff = Aoffs[0];
817   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
818   Boff = Boffs[0];
819   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
820   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
821   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
822   ierr = ISDestroy(Me);CHKERRQ(ierr);
823   ierr = ISDestroy(Notme);CHKERRQ(ierr);
824 
825   PetscFunctionReturn(0);
826 }
827 EXTERN_C_END
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
831 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
832 {
833   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
834   PetscErrorCode ierr;
835 
836   PetscFunctionBegin;
837   /* do nondiagonal part */
838   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
839   /* send it on its way */
840   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
841   /* do local part */
842   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
843   /* receive remote parts */
844   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
845   PetscFunctionReturn(0);
846 }
847 
848 /*
849   This only works correctly for square matrices where the subblock A->A is the
850    diagonal block
851 */
852 #undef __FUNCT__
853 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
854 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
855 {
856   PetscErrorCode ierr;
857   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
858 
859   PetscFunctionBegin;
860   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
861   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
862   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatScale_MPIAIJ"
868 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
869 {
870   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
871   PetscErrorCode ierr;
872 
873   PetscFunctionBegin;
874   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
875   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
876   PetscFunctionReturn(0);
877 }
878 
879 #undef __FUNCT__
880 #define __FUNCT__ "MatDestroy_MPIAIJ"
881 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
882 {
883   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
884   PetscErrorCode ierr;
885 
886   PetscFunctionBegin;
887 #if defined(PETSC_USE_LOG)
888   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
889 #endif
890   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
891   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
892   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
893   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
894 #if defined (PETSC_USE_CTABLE)
895   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
896 #else
897   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
898 #endif
899   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
900   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
901   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
902   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
903   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
904   ierr = PetscFree(aij);CHKERRQ(ierr);
905 
906   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
908   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
909   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
910   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
911   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
912   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
913   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
915   PetscFunctionReturn(0);
916 }
917 
918 #undef __FUNCT__
919 #define __FUNCT__ "MatView_MPIAIJ_Binary"
920 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
921 {
922   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
923   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
924   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
925   PetscErrorCode    ierr;
926   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
927   int               fd;
928   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
929   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
930   PetscScalar       *column_values;
931 
932   PetscFunctionBegin;
933   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
934   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
935   nz   = A->nz + B->nz;
936   if (!rank) {
937     header[0] = MAT_FILE_CLASSID;
938     header[1] = mat->rmap->N;
939     header[2] = mat->cmap->N;
940     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
941     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
942     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
943     /* get largest number of rows any processor has */
944     rlen = mat->rmap->n;
945     range = mat->rmap->range;
946     for (i=1; i<size; i++) {
947       rlen = PetscMax(rlen,range[i+1] - range[i]);
948     }
949   } else {
950     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
951     rlen = mat->rmap->n;
952   }
953 
954   /* load up the local row counts */
955   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
956   for (i=0; i<mat->rmap->n; i++) {
957     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
958   }
959 
960   /* store the row lengths to the file */
961   if (!rank) {
962     MPI_Status status;
963     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
964     for (i=1; i<size; i++) {
965       rlen = range[i+1] - range[i];
966       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
967       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
968     }
969   } else {
970     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
971   }
972   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
973 
974   /* load up the local column indices */
975   nzmax = nz; /* )th processor needs space a largest processor needs */
976   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
977   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
978   cnt  = 0;
979   for (i=0; i<mat->rmap->n; i++) {
980     for (j=B->i[i]; j<B->i[i+1]; j++) {
981       if ( (col = garray[B->j[j]]) > cstart) break;
982       column_indices[cnt++] = col;
983     }
984     for (k=A->i[i]; k<A->i[i+1]; k++) {
985       column_indices[cnt++] = A->j[k] + cstart;
986     }
987     for (; j<B->i[i+1]; j++) {
988       column_indices[cnt++] = garray[B->j[j]];
989     }
990   }
991   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
992 
993   /* store the column indices to the file */
994   if (!rank) {
995     MPI_Status status;
996     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
997     for (i=1; i<size; i++) {
998       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
999       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1000       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1001       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1002     }
1003   } else {
1004     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1005     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1006   }
1007   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1008 
1009   /* load up the local column values */
1010   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1011   cnt  = 0;
1012   for (i=0; i<mat->rmap->n; i++) {
1013     for (j=B->i[i]; j<B->i[i+1]; j++) {
1014       if ( garray[B->j[j]] > cstart) break;
1015       column_values[cnt++] = B->a[j];
1016     }
1017     for (k=A->i[i]; k<A->i[i+1]; k++) {
1018       column_values[cnt++] = A->a[k];
1019     }
1020     for (; j<B->i[i+1]; j++) {
1021       column_values[cnt++] = B->a[j];
1022     }
1023   }
1024   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1025 
1026   /* store the column values to the file */
1027   if (!rank) {
1028     MPI_Status status;
1029     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1030     for (i=1; i<size; i++) {
1031       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1032       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1033       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1034       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1035     }
1036   } else {
1037     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1038     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1039   }
1040   ierr = PetscFree(column_values);CHKERRQ(ierr);
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 #undef __FUNCT__
1045 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1046 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1047 {
1048   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1049   PetscErrorCode    ierr;
1050   PetscMPIInt       rank = aij->rank,size = aij->size;
1051   PetscTruth        isdraw,iascii,isbinary;
1052   PetscViewer       sviewer;
1053   PetscViewerFormat format;
1054 
1055   PetscFunctionBegin;
1056   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1057   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1058   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1059   if (iascii) {
1060     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1061     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1062       MatInfo    info;
1063       PetscTruth inodes;
1064 
1065       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1066       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1067       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1068       if (!inodes) {
1069         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1070 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1071       } else {
1072         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1073 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1074       }
1075       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1076       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1077       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1078       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1079       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1080       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1081       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1082       PetscFunctionReturn(0);
1083     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1084       PetscInt   inodecount,inodelimit,*inodes;
1085       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1086       if (inodes) {
1087         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1088       } else {
1089         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1090       }
1091       PetscFunctionReturn(0);
1092     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1093       PetscFunctionReturn(0);
1094     }
1095   } else if (isbinary) {
1096     if (size == 1) {
1097       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1098       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1099     } else {
1100       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1101     }
1102     PetscFunctionReturn(0);
1103   } else if (isdraw) {
1104     PetscDraw  draw;
1105     PetscTruth isnull;
1106     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1107     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1108   }
1109 
1110   if (size == 1) {
1111     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1112     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1113   } else {
1114     /* assemble the entire matrix onto first processor. */
1115     Mat         A;
1116     Mat_SeqAIJ  *Aloc;
1117     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1118     MatScalar   *a;
1119 
1120     if (mat->rmap->N > 1024) {
1121       PetscTruth flg = PETSC_FALSE;
1122 
1123       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1124       if (!flg) {
1125         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1126       }
1127     }
1128 
1129     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1130     if (!rank) {
1131       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1132     } else {
1133       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1134     }
1135     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1136     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1137     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1138     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1139 
1140     /* copy over the A part */
1141     Aloc = (Mat_SeqAIJ*)aij->A->data;
1142     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1143     row = mat->rmap->rstart;
1144     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1145     for (i=0; i<m; i++) {
1146       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1147       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1148     }
1149     aj = Aloc->j;
1150     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1151 
1152     /* copy over the B part */
1153     Aloc = (Mat_SeqAIJ*)aij->B->data;
1154     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1155     row  = mat->rmap->rstart;
1156     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1157     ct   = cols;
1158     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1159     for (i=0; i<m; i++) {
1160       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1161       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1162     }
1163     ierr = PetscFree(ct);CHKERRQ(ierr);
1164     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1165     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1166     /*
1167        Everyone has to call to draw the matrix since the graphics waits are
1168        synchronized across all processors that share the PetscDraw object
1169     */
1170     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1171     if (!rank) {
1172       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1173       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1174     }
1175     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1176     ierr = MatDestroy(A);CHKERRQ(ierr);
1177   }
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 #undef __FUNCT__
1182 #define __FUNCT__ "MatView_MPIAIJ"
1183 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1184 {
1185   PetscErrorCode ierr;
1186   PetscTruth     iascii,isdraw,issocket,isbinary;
1187 
1188   PetscFunctionBegin;
1189   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1190   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1191   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1192   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1193   if (iascii || isdraw || isbinary || issocket) {
1194     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1195   } else {
1196     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1197   }
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatSOR_MPIAIJ"
1203 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1204 {
1205   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1206   PetscErrorCode ierr;
1207   Vec            bb1 = 0;
1208   PetscTruth     hasop;
1209 
1210   PetscFunctionBegin;
1211   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1212     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1213   }
1214 
1215   if (flag == SOR_APPLY_UPPER) {
1216     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1217     PetscFunctionReturn(0);
1218   }
1219 
1220   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1221     if (flag & SOR_ZERO_INITIAL_GUESS) {
1222       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1223       its--;
1224     }
1225 
1226     while (its--) {
1227       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1228       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1229 
1230       /* update rhs: bb1 = bb - B*x */
1231       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1232       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1233 
1234       /* local sweep */
1235       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1236     }
1237   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1238     if (flag & SOR_ZERO_INITIAL_GUESS) {
1239       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1240       its--;
1241     }
1242     while (its--) {
1243       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1244       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245 
1246       /* update rhs: bb1 = bb - B*x */
1247       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1248       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1249 
1250       /* local sweep */
1251       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1252     }
1253   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1254     if (flag & SOR_ZERO_INITIAL_GUESS) {
1255       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1256       its--;
1257     }
1258     while (its--) {
1259       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1260       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1261 
1262       /* update rhs: bb1 = bb - B*x */
1263       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1264       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1265 
1266       /* local sweep */
1267       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1268     }
1269   }  else if (flag & SOR_EISENSTAT) {
1270     Vec         xx1;
1271 
1272     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1273     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1274 
1275     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1276     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1277     if (!mat->diag) {
1278       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1279       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1280     }
1281     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1282     if (hasop) {
1283       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1284     } else {
1285       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1286     }
1287     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1288 
1289     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1290 
1291     /* local sweep */
1292     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1293     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1294     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1295   } else {
1296     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1297   }
1298 
1299   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatPermute_MPIAIJ"
1305 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1306 {
1307   MPI_Comm       comm,pcomm;
1308   PetscInt       first,local_size,nrows;
1309   const PetscInt *rows;
1310   PetscMPIInt    size;
1311   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1312   PetscErrorCode ierr;
1313 
1314   PetscFunctionBegin;
1315   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1316   /* make a collective version of 'rowp' */
1317   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1318   if (pcomm==comm) {
1319     crowp = rowp;
1320   } else {
1321     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1322     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1323     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1324     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1325   }
1326   /* collect the global row permutation and invert it */
1327   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1328   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1329   if (pcomm!=comm) {
1330     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1331   }
1332   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1333   /* get the local target indices */
1334   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1335   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1336   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1337   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1338   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1339   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1340   /* the column permutation is so much easier;
1341      make a local version of 'colp' and invert it */
1342   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1343   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1344   if (size==1) {
1345     lcolp = colp;
1346   } else {
1347     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1348     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1349     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1350   }
1351   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1352   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1353   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1354   if (size>1) {
1355     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1356     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1357   }
1358   /* now we just get the submatrix */
1359   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1360   /* clean up */
1361   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1362   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1368 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1369 {
1370   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1371   Mat            A = mat->A,B = mat->B;
1372   PetscErrorCode ierr;
1373   PetscReal      isend[5],irecv[5];
1374 
1375   PetscFunctionBegin;
1376   info->block_size     = 1.0;
1377   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1378   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1379   isend[3] = info->memory;  isend[4] = info->mallocs;
1380   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1381   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1382   isend[3] += info->memory;  isend[4] += info->mallocs;
1383   if (flag == MAT_LOCAL) {
1384     info->nz_used      = isend[0];
1385     info->nz_allocated = isend[1];
1386     info->nz_unneeded  = isend[2];
1387     info->memory       = isend[3];
1388     info->mallocs      = isend[4];
1389   } else if (flag == MAT_GLOBAL_MAX) {
1390     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1391     info->nz_used      = irecv[0];
1392     info->nz_allocated = irecv[1];
1393     info->nz_unneeded  = irecv[2];
1394     info->memory       = irecv[3];
1395     info->mallocs      = irecv[4];
1396   } else if (flag == MAT_GLOBAL_SUM) {
1397     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1398     info->nz_used      = irecv[0];
1399     info->nz_allocated = irecv[1];
1400     info->nz_unneeded  = irecv[2];
1401     info->memory       = irecv[3];
1402     info->mallocs      = irecv[4];
1403   }
1404   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1405   info->fill_ratio_needed = 0;
1406   info->factor_mallocs    = 0;
1407 
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatSetOption_MPIAIJ"
1413 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1414 {
1415   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1416   PetscErrorCode ierr;
1417 
1418   PetscFunctionBegin;
1419   switch (op) {
1420   case MAT_NEW_NONZERO_LOCATIONS:
1421   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1422   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1423   case MAT_KEEP_NONZERO_PATTERN:
1424   case MAT_NEW_NONZERO_LOCATION_ERR:
1425   case MAT_USE_INODES:
1426   case MAT_IGNORE_ZERO_ENTRIES:
1427     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1428     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1429     break;
1430   case MAT_ROW_ORIENTED:
1431     a->roworiented = flg;
1432     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1433     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1434     break;
1435   case MAT_NEW_DIAGONALS:
1436     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1437     break;
1438   case MAT_IGNORE_OFF_PROC_ENTRIES:
1439     a->donotstash = PETSC_TRUE;
1440     break;
1441   case MAT_SPD:
1442     A->spd_set                         = PETSC_TRUE;
1443     A->spd                             = flg;
1444     if (flg) {
1445       A->symmetric                     = PETSC_TRUE;
1446       A->structurally_symmetric        = PETSC_TRUE;
1447       A->symmetric_set                 = PETSC_TRUE;
1448       A->structurally_symmetric_set    = PETSC_TRUE;
1449     }
1450     break;
1451   case MAT_SYMMETRIC:
1452     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1453     break;
1454   case MAT_STRUCTURALLY_SYMMETRIC:
1455     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1456     break;
1457   case MAT_HERMITIAN:
1458     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1459     break;
1460   case MAT_SYMMETRY_ETERNAL:
1461     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1462     break;
1463   default:
1464     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1465   }
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 #undef __FUNCT__
1470 #define __FUNCT__ "MatGetRow_MPIAIJ"
1471 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1472 {
1473   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1474   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1475   PetscErrorCode ierr;
1476   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1477   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1478   PetscInt       *cmap,*idx_p;
1479 
1480   PetscFunctionBegin;
1481   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1482   mat->getrowactive = PETSC_TRUE;
1483 
1484   if (!mat->rowvalues && (idx || v)) {
1485     /*
1486         allocate enough space to hold information from the longest row.
1487     */
1488     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1489     PetscInt   max = 1,tmp;
1490     for (i=0; i<matin->rmap->n; i++) {
1491       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1492       if (max < tmp) { max = tmp; }
1493     }
1494     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1495   }
1496 
1497   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1498   lrow = row - rstart;
1499 
1500   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1501   if (!v)   {pvA = 0; pvB = 0;}
1502   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1503   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1504   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1505   nztot = nzA + nzB;
1506 
1507   cmap  = mat->garray;
1508   if (v  || idx) {
1509     if (nztot) {
1510       /* Sort by increasing column numbers, assuming A and B already sorted */
1511       PetscInt imark = -1;
1512       if (v) {
1513         *v = v_p = mat->rowvalues;
1514         for (i=0; i<nzB; i++) {
1515           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1516           else break;
1517         }
1518         imark = i;
1519         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1520         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1521       }
1522       if (idx) {
1523         *idx = idx_p = mat->rowindices;
1524         if (imark > -1) {
1525           for (i=0; i<imark; i++) {
1526             idx_p[i] = cmap[cworkB[i]];
1527           }
1528         } else {
1529           for (i=0; i<nzB; i++) {
1530             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1531             else break;
1532           }
1533           imark = i;
1534         }
1535         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1536         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1537       }
1538     } else {
1539       if (idx) *idx = 0;
1540       if (v)   *v   = 0;
1541     }
1542   }
1543   *nz = nztot;
1544   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1545   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1546   PetscFunctionReturn(0);
1547 }
1548 
1549 #undef __FUNCT__
1550 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1551 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1552 {
1553   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1554 
1555   PetscFunctionBegin;
1556   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1557   aij->getrowactive = PETSC_FALSE;
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 #undef __FUNCT__
1562 #define __FUNCT__ "MatNorm_MPIAIJ"
1563 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1564 {
1565   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1566   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1567   PetscErrorCode ierr;
1568   PetscInt       i,j,cstart = mat->cmap->rstart;
1569   PetscReal      sum = 0.0;
1570   MatScalar      *v;
1571 
1572   PetscFunctionBegin;
1573   if (aij->size == 1) {
1574     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1575   } else {
1576     if (type == NORM_FROBENIUS) {
1577       v = amat->a;
1578       for (i=0; i<amat->nz; i++) {
1579 #if defined(PETSC_USE_COMPLEX)
1580         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1581 #else
1582         sum += (*v)*(*v); v++;
1583 #endif
1584       }
1585       v = bmat->a;
1586       for (i=0; i<bmat->nz; i++) {
1587 #if defined(PETSC_USE_COMPLEX)
1588         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1589 #else
1590         sum += (*v)*(*v); v++;
1591 #endif
1592       }
1593       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1594       *norm = sqrt(*norm);
1595     } else if (type == NORM_1) { /* max column norm */
1596       PetscReal *tmp,*tmp2;
1597       PetscInt  *jj,*garray = aij->garray;
1598       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1599       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1600       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1601       *norm = 0.0;
1602       v = amat->a; jj = amat->j;
1603       for (j=0; j<amat->nz; j++) {
1604         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1605       }
1606       v = bmat->a; jj = bmat->j;
1607       for (j=0; j<bmat->nz; j++) {
1608         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1609       }
1610       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1611       for (j=0; j<mat->cmap->N; j++) {
1612         if (tmp2[j] > *norm) *norm = tmp2[j];
1613       }
1614       ierr = PetscFree(tmp);CHKERRQ(ierr);
1615       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1616     } else if (type == NORM_INFINITY) { /* max row norm */
1617       PetscReal ntemp = 0.0;
1618       for (j=0; j<aij->A->rmap->n; j++) {
1619         v = amat->a + amat->i[j];
1620         sum = 0.0;
1621         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1622           sum += PetscAbsScalar(*v); v++;
1623         }
1624         v = bmat->a + bmat->i[j];
1625         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1626           sum += PetscAbsScalar(*v); v++;
1627         }
1628         if (sum > ntemp) ntemp = sum;
1629       }
1630       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1631     } else {
1632       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1633     }
1634   }
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 #undef __FUNCT__
1639 #define __FUNCT__ "MatTranspose_MPIAIJ"
1640 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1641 {
1642   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1643   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1644   PetscErrorCode ierr;
1645   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1646   PetscInt       cstart=A->cmap->rstart,ncol;
1647   Mat            B;
1648   MatScalar      *array;
1649 
1650   PetscFunctionBegin;
1651   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1652 
1653   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1654   ai = Aloc->i; aj = Aloc->j;
1655   bi = Bloc->i; bj = Bloc->j;
1656   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1657     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1658     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1659     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1660     for (i=0; i<ai[ma]; i++){
1661       d_nnz[aj[i]] ++;
1662       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1663     }
1664 
1665     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1666     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1667     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1668     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1669     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1670   } else {
1671     B = *matout;
1672   }
1673 
1674   /* copy over the A part */
1675   array = Aloc->a;
1676   row = A->rmap->rstart;
1677   for (i=0; i<ma; i++) {
1678     ncol = ai[i+1]-ai[i];
1679     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1680     row++; array += ncol; aj += ncol;
1681   }
1682   aj = Aloc->j;
1683   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1684 
1685   /* copy over the B part */
1686   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1687   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1688   array = Bloc->a;
1689   row = A->rmap->rstart;
1690   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1691   cols_tmp = cols;
1692   for (i=0; i<mb; i++) {
1693     ncol = bi[i+1]-bi[i];
1694     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1695     row++; array += ncol; cols_tmp += ncol;
1696   }
1697   ierr = PetscFree(cols);CHKERRQ(ierr);
1698 
1699   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1700   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1701   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1702     *matout = B;
1703   } else {
1704     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1705   }
1706   PetscFunctionReturn(0);
1707 }
1708 
1709 #undef __FUNCT__
1710 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1711 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1712 {
1713   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1714   Mat            a = aij->A,b = aij->B;
1715   PetscErrorCode ierr;
1716   PetscInt       s1,s2,s3;
1717 
1718   PetscFunctionBegin;
1719   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1720   if (rr) {
1721     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1722     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1723     /* Overlap communication with computation. */
1724     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1725   }
1726   if (ll) {
1727     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1728     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1729     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1730   }
1731   /* scale  the diagonal block */
1732   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1733 
1734   if (rr) {
1735     /* Do a scatter end and then right scale the off-diagonal block */
1736     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1737     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1738   }
1739 
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 #undef __FUNCT__
1744 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1745 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1746 {
1747   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1748   PetscErrorCode ierr;
1749 
1750   PetscFunctionBegin;
1751   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1752   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1753   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1754   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1755   PetscFunctionReturn(0);
1756 }
1757 #undef __FUNCT__
1758 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1759 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1760 {
1761   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1762   PetscErrorCode ierr;
1763 
1764   PetscFunctionBegin;
1765   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1766   PetscFunctionReturn(0);
1767 }
1768 
1769 #undef __FUNCT__
1770 #define __FUNCT__ "MatEqual_MPIAIJ"
1771 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1772 {
1773   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1774   Mat            a,b,c,d;
1775   PetscTruth     flg;
1776   PetscErrorCode ierr;
1777 
1778   PetscFunctionBegin;
1779   a = matA->A; b = matA->B;
1780   c = matB->A; d = matB->B;
1781 
1782   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1783   if (flg) {
1784     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1785   }
1786   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 #undef __FUNCT__
1791 #define __FUNCT__ "MatCopy_MPIAIJ"
1792 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1793 {
1794   PetscErrorCode ierr;
1795   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1796   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1797 
1798   PetscFunctionBegin;
1799   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1800   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1801     /* because of the column compression in the off-processor part of the matrix a->B,
1802        the number of columns in a->B and b->B may be different, hence we cannot call
1803        the MatCopy() directly on the two parts. If need be, we can provide a more
1804        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1805        then copying the submatrices */
1806     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1807   } else {
1808     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1809     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1810   }
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 #undef __FUNCT__
1815 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1816 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1817 {
1818   PetscErrorCode ierr;
1819 
1820   PetscFunctionBegin;
1821   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 #undef __FUNCT__
1826 #define __FUNCT__ "MatAXPY_MPIAIJ"
1827 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1828 {
1829   PetscErrorCode ierr;
1830   PetscInt       i;
1831   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1832   PetscBLASInt   bnz,one=1;
1833   Mat_SeqAIJ     *x,*y;
1834 
1835   PetscFunctionBegin;
1836   if (str == SAME_NONZERO_PATTERN) {
1837     PetscScalar alpha = a;
1838     x = (Mat_SeqAIJ *)xx->A->data;
1839     y = (Mat_SeqAIJ *)yy->A->data;
1840     bnz = PetscBLASIntCast(x->nz);
1841     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1842     x = (Mat_SeqAIJ *)xx->B->data;
1843     y = (Mat_SeqAIJ *)yy->B->data;
1844     bnz = PetscBLASIntCast(x->nz);
1845     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1846   } else if (str == SUBSET_NONZERO_PATTERN) {
1847     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1848 
1849     x = (Mat_SeqAIJ *)xx->B->data;
1850     y = (Mat_SeqAIJ *)yy->B->data;
1851     if (y->xtoy && y->XtoY != xx->B) {
1852       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1853       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1854     }
1855     if (!y->xtoy) { /* get xtoy */
1856       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1857       y->XtoY = xx->B;
1858       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1859     }
1860     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1861   } else {
1862     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1863   }
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatConjugate_MPIAIJ"
1871 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1872 {
1873 #if defined(PETSC_USE_COMPLEX)
1874   PetscErrorCode ierr;
1875   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1876 
1877   PetscFunctionBegin;
1878   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1879   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1880 #else
1881   PetscFunctionBegin;
1882 #endif
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 #undef __FUNCT__
1887 #define __FUNCT__ "MatRealPart_MPIAIJ"
1888 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1889 {
1890   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1891   PetscErrorCode ierr;
1892 
1893   PetscFunctionBegin;
1894   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1895   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1901 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1902 {
1903   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1904   PetscErrorCode ierr;
1905 
1906   PetscFunctionBegin;
1907   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1908   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 #ifdef PETSC_HAVE_PBGL
1913 
1914 #include <boost/parallel/mpi/bsp_process_group.hpp>
1915 #include <boost/graph/distributed/ilu_default_graph.hpp>
1916 #include <boost/graph/distributed/ilu_0_block.hpp>
1917 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1918 #include <boost/graph/distributed/petsc/interface.hpp>
1919 #include <boost/multi_array.hpp>
1920 #include <boost/parallel/distributed_property_map->hpp>
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1924 /*
1925   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1926 */
1927 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1928 {
1929   namespace petsc = boost::distributed::petsc;
1930 
1931   namespace graph_dist = boost::graph::distributed;
1932   using boost::graph::distributed::ilu_default::process_group_type;
1933   using boost::graph::ilu_permuted;
1934 
1935   PetscTruth      row_identity, col_identity;
1936   PetscContainer  c;
1937   PetscInt        m, n, M, N;
1938   PetscErrorCode  ierr;
1939 
1940   PetscFunctionBegin;
1941   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1942   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1943   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1944   if (!row_identity || !col_identity) {
1945     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1946   }
1947 
1948   process_group_type pg;
1949   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1950   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1951   lgraph_type&   level_graph = *lgraph_p;
1952   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1953 
1954   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1955   ilu_permuted(level_graph);
1956 
1957   /* put together the new matrix */
1958   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1959   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1960   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1961   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1962   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1963   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1964   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1965 
1966   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1967   ierr = PetscContainerSetPointer(c, lgraph_p);
1968   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1974 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1975 {
1976   PetscFunctionBegin;
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 #undef __FUNCT__
1981 #define __FUNCT__ "MatSolve_MPIAIJ"
1982 /*
1983   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1984 */
1985 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1986 {
1987   namespace graph_dist = boost::graph::distributed;
1988 
1989   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1990   lgraph_type*   lgraph_p;
1991   PetscContainer c;
1992   PetscErrorCode ierr;
1993 
1994   PetscFunctionBegin;
1995   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1996   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1997   ierr = VecCopy(b, x);CHKERRQ(ierr);
1998 
1999   PetscScalar* array_x;
2000   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2001   PetscInt sx;
2002   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2003 
2004   PetscScalar* array_b;
2005   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2006   PetscInt sb;
2007   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2008 
2009   lgraph_type&   level_graph = *lgraph_p;
2010   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2011 
2012   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2013   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2014                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2015 
2016   typedef boost::iterator_property_map<array_ref_type::iterator,
2017                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2018   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2019                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2020 
2021   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2022 
2023   PetscFunctionReturn(0);
2024 }
2025 #endif
2026 
2027 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2028   PetscInt       nzlocal,nsends,nrecvs;
2029   PetscMPIInt    *send_rank,*recv_rank;
2030   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2031   PetscScalar    *sbuf_a,**rbuf_a;
2032   PetscErrorCode (*MatDestroy)(Mat);
2033 } Mat_Redundant;
2034 
2035 #undef __FUNCT__
2036 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2037 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2038 {
2039   PetscErrorCode       ierr;
2040   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2041   PetscInt             i;
2042 
2043   PetscFunctionBegin;
2044   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2045   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2046   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2047   for (i=0; i<redund->nrecvs; i++){
2048     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2049     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2050   }
2051   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2052   ierr = PetscFree(redund);CHKERRQ(ierr);
2053   PetscFunctionReturn(0);
2054 }
2055 
2056 #undef __FUNCT__
2057 #define __FUNCT__ "MatDestroy_MatRedundant"
2058 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2059 {
2060   PetscErrorCode  ierr;
2061   PetscContainer  container;
2062   Mat_Redundant   *redund=PETSC_NULL;
2063 
2064   PetscFunctionBegin;
2065   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2066   if (container) {
2067     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2068   } else {
2069     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2070   }
2071   A->ops->destroy = redund->MatDestroy;
2072   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2073   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2074   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 #undef __FUNCT__
2079 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2080 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2081 {
2082   PetscMPIInt    rank,size;
2083   MPI_Comm       comm=((PetscObject)mat)->comm;
2084   PetscErrorCode ierr;
2085   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2086   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2087   PetscInt       *rowrange=mat->rmap->range;
2088   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2089   Mat            A=aij->A,B=aij->B,C=*matredundant;
2090   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2091   PetscScalar    *sbuf_a;
2092   PetscInt       nzlocal=a->nz+b->nz;
2093   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2094   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2095   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2096   MatScalar      *aworkA,*aworkB;
2097   PetscScalar    *vals;
2098   PetscMPIInt    tag1,tag2,tag3,imdex;
2099   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2100                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2101   MPI_Status     recv_status,*send_status;
2102   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2103   PetscInt       **rbuf_j=PETSC_NULL;
2104   PetscScalar    **rbuf_a=PETSC_NULL;
2105   Mat_Redundant  *redund=PETSC_NULL;
2106   PetscContainer container;
2107 
2108   PetscFunctionBegin;
2109   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2110   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2111 
2112   if (reuse == MAT_REUSE_MATRIX) {
2113     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2114     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2115     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2116     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2117     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2118     if (container) {
2119       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2120     } else {
2121       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2122     }
2123     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2124 
2125     nsends    = redund->nsends;
2126     nrecvs    = redund->nrecvs;
2127     send_rank = redund->send_rank;
2128     recv_rank = redund->recv_rank;
2129     sbuf_nz   = redund->sbuf_nz;
2130     rbuf_nz   = redund->rbuf_nz;
2131     sbuf_j    = redund->sbuf_j;
2132     sbuf_a    = redund->sbuf_a;
2133     rbuf_j    = redund->rbuf_j;
2134     rbuf_a    = redund->rbuf_a;
2135   }
2136 
2137   if (reuse == MAT_INITIAL_MATRIX){
2138     PetscMPIInt  subrank,subsize;
2139     PetscInt     nleftover,np_subcomm;
2140     /* get the destination processors' id send_rank, nsends and nrecvs */
2141     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2142     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2143     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2144     np_subcomm = size/nsubcomm;
2145     nleftover  = size - nsubcomm*np_subcomm;
2146     nsends = 0; nrecvs = 0;
2147     for (i=0; i<size; i++){ /* i=rank*/
2148       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2149         send_rank[nsends] = i; nsends++;
2150         recv_rank[nrecvs++] = i;
2151       }
2152     }
2153     if (rank >= size - nleftover){/* this proc is a leftover processor */
2154       i = size-nleftover-1;
2155       j = 0;
2156       while (j < nsubcomm - nleftover){
2157         send_rank[nsends++] = i;
2158         i--; j++;
2159       }
2160     }
2161 
2162     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2163       for (i=0; i<nleftover; i++){
2164         recv_rank[nrecvs++] = size-nleftover+i;
2165       }
2166     }
2167 
2168     /* allocate sbuf_j, sbuf_a */
2169     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2170     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2171     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2172   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2173 
2174   /* copy mat's local entries into the buffers */
2175   if (reuse == MAT_INITIAL_MATRIX){
2176     rownz_max = 0;
2177     rptr = sbuf_j;
2178     cols = sbuf_j + rend-rstart + 1;
2179     vals = sbuf_a;
2180     rptr[0] = 0;
2181     for (i=0; i<rend-rstart; i++){
2182       row = i + rstart;
2183       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2184       ncols  = nzA + nzB;
2185       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2186       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2187       /* load the column indices for this row into cols */
2188       lwrite = 0;
2189       for (l=0; l<nzB; l++) {
2190         if ((ctmp = bmap[cworkB[l]]) < cstart){
2191           vals[lwrite]   = aworkB[l];
2192           cols[lwrite++] = ctmp;
2193         }
2194       }
2195       for (l=0; l<nzA; l++){
2196         vals[lwrite]   = aworkA[l];
2197         cols[lwrite++] = cstart + cworkA[l];
2198       }
2199       for (l=0; l<nzB; l++) {
2200         if ((ctmp = bmap[cworkB[l]]) >= cend){
2201           vals[lwrite]   = aworkB[l];
2202           cols[lwrite++] = ctmp;
2203         }
2204       }
2205       vals += ncols;
2206       cols += ncols;
2207       rptr[i+1] = rptr[i] + ncols;
2208       if (rownz_max < ncols) rownz_max = ncols;
2209     }
2210     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2211   } else { /* only copy matrix values into sbuf_a */
2212     rptr = sbuf_j;
2213     vals = sbuf_a;
2214     rptr[0] = 0;
2215     for (i=0; i<rend-rstart; i++){
2216       row = i + rstart;
2217       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2218       ncols  = nzA + nzB;
2219       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2220       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2221       lwrite = 0;
2222       for (l=0; l<nzB; l++) {
2223         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2224       }
2225       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2226       for (l=0; l<nzB; l++) {
2227         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2228       }
2229       vals += ncols;
2230       rptr[i+1] = rptr[i] + ncols;
2231     }
2232   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2233 
2234   /* send nzlocal to others, and recv other's nzlocal */
2235   /*--------------------------------------------------*/
2236   if (reuse == MAT_INITIAL_MATRIX){
2237     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2238     s_waits2 = s_waits3 + nsends;
2239     s_waits1 = s_waits2 + nsends;
2240     r_waits1 = s_waits1 + nsends;
2241     r_waits2 = r_waits1 + nrecvs;
2242     r_waits3 = r_waits2 + nrecvs;
2243   } else {
2244     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2245     r_waits3 = s_waits3 + nsends;
2246   }
2247 
2248   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2249   if (reuse == MAT_INITIAL_MATRIX){
2250     /* get new tags to keep the communication clean */
2251     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2252     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2253     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2254 
2255     /* post receives of other's nzlocal */
2256     for (i=0; i<nrecvs; i++){
2257       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2258     }
2259     /* send nzlocal to others */
2260     for (i=0; i<nsends; i++){
2261       sbuf_nz[i] = nzlocal;
2262       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2263     }
2264     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2265     count = nrecvs;
2266     while (count) {
2267       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2268       recv_rank[imdex] = recv_status.MPI_SOURCE;
2269       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2270       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2271 
2272       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2273       rbuf_nz[imdex] += i + 2;
2274       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2275       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2276       count--;
2277     }
2278     /* wait on sends of nzlocal */
2279     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2280     /* send mat->i,j to others, and recv from other's */
2281     /*------------------------------------------------*/
2282     for (i=0; i<nsends; i++){
2283       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2284       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2285     }
2286     /* wait on receives of mat->i,j */
2287     /*------------------------------*/
2288     count = nrecvs;
2289     while (count) {
2290       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2291       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2292       count--;
2293     }
2294     /* wait on sends of mat->i,j */
2295     /*---------------------------*/
2296     if (nsends) {
2297       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2298     }
2299   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2300 
2301   /* post receives, send and receive mat->a */
2302   /*----------------------------------------*/
2303   for (imdex=0; imdex<nrecvs; imdex++) {
2304     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2305   }
2306   for (i=0; i<nsends; i++){
2307     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2308   }
2309   count = nrecvs;
2310   while (count) {
2311     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2312     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2313     count--;
2314   }
2315   if (nsends) {
2316     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2317   }
2318 
2319   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2320 
2321   /* create redundant matrix */
2322   /*-------------------------*/
2323   if (reuse == MAT_INITIAL_MATRIX){
2324     /* compute rownz_max for preallocation */
2325     for (imdex=0; imdex<nrecvs; imdex++){
2326       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2327       rptr = rbuf_j[imdex];
2328       for (i=0; i<j; i++){
2329         ncols = rptr[i+1] - rptr[i];
2330         if (rownz_max < ncols) rownz_max = ncols;
2331       }
2332     }
2333 
2334     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2335     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2336     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2337     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2338     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2339   } else {
2340     C = *matredundant;
2341   }
2342 
2343   /* insert local matrix entries */
2344   rptr = sbuf_j;
2345   cols = sbuf_j + rend-rstart + 1;
2346   vals = sbuf_a;
2347   for (i=0; i<rend-rstart; i++){
2348     row   = i + rstart;
2349     ncols = rptr[i+1] - rptr[i];
2350     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2351     vals += ncols;
2352     cols += ncols;
2353   }
2354   /* insert received matrix entries */
2355   for (imdex=0; imdex<nrecvs; imdex++){
2356     rstart = rowrange[recv_rank[imdex]];
2357     rend   = rowrange[recv_rank[imdex]+1];
2358     rptr = rbuf_j[imdex];
2359     cols = rbuf_j[imdex] + rend-rstart + 1;
2360     vals = rbuf_a[imdex];
2361     for (i=0; i<rend-rstart; i++){
2362       row   = i + rstart;
2363       ncols = rptr[i+1] - rptr[i];
2364       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2365       vals += ncols;
2366       cols += ncols;
2367     }
2368   }
2369   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2370   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2371   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2372   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2373   if (reuse == MAT_INITIAL_MATRIX){
2374     PetscContainer container;
2375     *matredundant = C;
2376     /* create a supporting struct and attach it to C for reuse */
2377     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2378     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2379     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2380     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2381     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2382 
2383     redund->nzlocal = nzlocal;
2384     redund->nsends  = nsends;
2385     redund->nrecvs  = nrecvs;
2386     redund->send_rank = send_rank;
2387     redund->recv_rank = recv_rank;
2388     redund->sbuf_nz = sbuf_nz;
2389     redund->rbuf_nz = rbuf_nz;
2390     redund->sbuf_j  = sbuf_j;
2391     redund->sbuf_a  = sbuf_a;
2392     redund->rbuf_j  = rbuf_j;
2393     redund->rbuf_a  = rbuf_a;
2394 
2395     redund->MatDestroy = C->ops->destroy;
2396     C->ops->destroy    = MatDestroy_MatRedundant;
2397   }
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2403 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2404 {
2405   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2406   PetscErrorCode ierr;
2407   PetscInt       i,*idxb = 0;
2408   PetscScalar    *va,*vb;
2409   Vec            vtmp;
2410 
2411   PetscFunctionBegin;
2412   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2413   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2414   if (idx) {
2415     for (i=0; i<A->rmap->n; i++) {
2416       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2417     }
2418   }
2419 
2420   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2421   if (idx) {
2422     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2423   }
2424   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2425   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2426 
2427   for (i=0; i<A->rmap->n; i++){
2428     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2429       va[i] = vb[i];
2430       if (idx) idx[i] = a->garray[idxb[i]];
2431     }
2432   }
2433 
2434   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2435   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2436   if (idxb) {
2437     ierr = PetscFree(idxb);CHKERRQ(ierr);
2438   }
2439   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 #undef __FUNCT__
2444 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2445 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2446 {
2447   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2448   PetscErrorCode ierr;
2449   PetscInt       i,*idxb = 0;
2450   PetscScalar    *va,*vb;
2451   Vec            vtmp;
2452 
2453   PetscFunctionBegin;
2454   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2455   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2456   if (idx) {
2457     for (i=0; i<A->cmap->n; i++) {
2458       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2459     }
2460   }
2461 
2462   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2463   if (idx) {
2464     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2465   }
2466   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2467   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2468 
2469   for (i=0; i<A->rmap->n; i++){
2470     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2471       va[i] = vb[i];
2472       if (idx) idx[i] = a->garray[idxb[i]];
2473     }
2474   }
2475 
2476   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2477   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2478   if (idxb) {
2479     ierr = PetscFree(idxb);CHKERRQ(ierr);
2480   }
2481   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 #undef __FUNCT__
2486 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2487 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2488 {
2489   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2490   PetscInt       n      = A->rmap->n;
2491   PetscInt       cstart = A->cmap->rstart;
2492   PetscInt      *cmap   = mat->garray;
2493   PetscInt      *diagIdx, *offdiagIdx;
2494   Vec            diagV, offdiagV;
2495   PetscScalar   *a, *diagA, *offdiagA;
2496   PetscInt       r;
2497   PetscErrorCode ierr;
2498 
2499   PetscFunctionBegin;
2500   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2501   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2502   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2503   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2504   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2505   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2506   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2507   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2508   for(r = 0; r < n; ++r) {
2509     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2510       a[r]   = diagA[r];
2511       idx[r] = cstart + diagIdx[r];
2512     } else {
2513       a[r]   = offdiagA[r];
2514       idx[r] = cmap[offdiagIdx[r]];
2515     }
2516   }
2517   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2518   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2519   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2520   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2521   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2522   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 #undef __FUNCT__
2527 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2528 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2529 {
2530   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2531   PetscInt       n      = A->rmap->n;
2532   PetscInt       cstart = A->cmap->rstart;
2533   PetscInt      *cmap   = mat->garray;
2534   PetscInt      *diagIdx, *offdiagIdx;
2535   Vec            diagV, offdiagV;
2536   PetscScalar   *a, *diagA, *offdiagA;
2537   PetscInt       r;
2538   PetscErrorCode ierr;
2539 
2540   PetscFunctionBegin;
2541   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2542   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2543   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2544   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2545   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2546   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2547   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2548   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2549   for(r = 0; r < n; ++r) {
2550     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2551       a[r]   = diagA[r];
2552       idx[r] = cstart + diagIdx[r];
2553     } else {
2554       a[r]   = offdiagA[r];
2555       idx[r] = cmap[offdiagIdx[r]];
2556     }
2557   }
2558   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2559   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2560   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2561   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2562   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2563   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2564   PetscFunctionReturn(0);
2565 }
2566 
2567 #undef __FUNCT__
2568 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2569 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2570 {
2571   PetscErrorCode ierr;
2572   Mat            *dummy;
2573 
2574   PetscFunctionBegin;
2575   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2576   *newmat = *dummy;
2577   ierr = PetscFree(dummy);CHKERRQ(ierr);
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2582 /* -------------------------------------------------------------------*/
2583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2584        MatGetRow_MPIAIJ,
2585        MatRestoreRow_MPIAIJ,
2586        MatMult_MPIAIJ,
2587 /* 4*/ MatMultAdd_MPIAIJ,
2588        MatMultTranspose_MPIAIJ,
2589        MatMultTransposeAdd_MPIAIJ,
2590 #ifdef PETSC_HAVE_PBGL
2591        MatSolve_MPIAIJ,
2592 #else
2593        0,
2594 #endif
2595        0,
2596        0,
2597 /*10*/ 0,
2598        0,
2599        0,
2600        MatSOR_MPIAIJ,
2601        MatTranspose_MPIAIJ,
2602 /*15*/ MatGetInfo_MPIAIJ,
2603        MatEqual_MPIAIJ,
2604        MatGetDiagonal_MPIAIJ,
2605        MatDiagonalScale_MPIAIJ,
2606        MatNorm_MPIAIJ,
2607 /*20*/ MatAssemblyBegin_MPIAIJ,
2608        MatAssemblyEnd_MPIAIJ,
2609        MatSetOption_MPIAIJ,
2610        MatZeroEntries_MPIAIJ,
2611 /*24*/ MatZeroRows_MPIAIJ,
2612        0,
2613 #ifdef PETSC_HAVE_PBGL
2614        0,
2615 #else
2616        0,
2617 #endif
2618        0,
2619        0,
2620 /*29*/ MatSetUpPreallocation_MPIAIJ,
2621 #ifdef PETSC_HAVE_PBGL
2622        0,
2623 #else
2624        0,
2625 #endif
2626        0,
2627        0,
2628        0,
2629 /*34*/ MatDuplicate_MPIAIJ,
2630        0,
2631        0,
2632        0,
2633        0,
2634 /*39*/ MatAXPY_MPIAIJ,
2635        MatGetSubMatrices_MPIAIJ,
2636        MatIncreaseOverlap_MPIAIJ,
2637        MatGetValues_MPIAIJ,
2638        MatCopy_MPIAIJ,
2639 /*44*/ MatGetRowMax_MPIAIJ,
2640        MatScale_MPIAIJ,
2641        0,
2642        0,
2643        0,
2644 /*49*/ MatSetBlockSize_MPIAIJ,
2645        0,
2646        0,
2647        0,
2648        0,
2649 /*54*/ MatFDColoringCreate_MPIAIJ,
2650        0,
2651        MatSetUnfactored_MPIAIJ,
2652        MatPermute_MPIAIJ,
2653        0,
2654 /*59*/ MatGetSubMatrix_MPIAIJ,
2655        MatDestroy_MPIAIJ,
2656        MatView_MPIAIJ,
2657        0,
2658        0,
2659 /*64*/ 0,
2660        0,
2661        0,
2662        0,
2663        0,
2664 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2665        MatGetRowMinAbs_MPIAIJ,
2666        0,
2667        MatSetColoring_MPIAIJ,
2668 #if defined(PETSC_HAVE_ADIC)
2669        MatSetValuesAdic_MPIAIJ,
2670 #else
2671        0,
2672 #endif
2673        MatSetValuesAdifor_MPIAIJ,
2674 /*75*/ MatFDColoringApply_AIJ,
2675        0,
2676        0,
2677        0,
2678        0,
2679 /*80*/ 0,
2680        0,
2681        0,
2682 /*83*/ MatLoad_MPIAIJ,
2683        0,
2684        0,
2685        0,
2686        0,
2687        0,
2688 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2689        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2690        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2691        MatPtAP_Basic,
2692        MatPtAPSymbolic_MPIAIJ,
2693 /*94*/ MatPtAPNumeric_MPIAIJ,
2694        0,
2695        0,
2696        0,
2697        0,
2698 /*99*/ 0,
2699        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2700        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2701        MatConjugate_MPIAIJ,
2702        0,
2703 /*104*/MatSetValuesRow_MPIAIJ,
2704        MatRealPart_MPIAIJ,
2705        MatImaginaryPart_MPIAIJ,
2706        0,
2707        0,
2708 /*109*/0,
2709        MatGetRedundantMatrix_MPIAIJ,
2710        MatGetRowMin_MPIAIJ,
2711        0,
2712        0,
2713 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2714        0,
2715        0,
2716        0,
2717        0,
2718 /*119*/0,
2719        0,
2720        0,
2721        0
2722 };
2723 
2724 /* ----------------------------------------------------------------------------------------*/
2725 
2726 EXTERN_C_BEGIN
2727 #undef __FUNCT__
2728 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2729 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2730 {
2731   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2732   PetscErrorCode ierr;
2733 
2734   PetscFunctionBegin;
2735   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2736   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2737   PetscFunctionReturn(0);
2738 }
2739 EXTERN_C_END
2740 
2741 EXTERN_C_BEGIN
2742 #undef __FUNCT__
2743 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2744 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2745 {
2746   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2747   PetscErrorCode ierr;
2748 
2749   PetscFunctionBegin;
2750   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2751   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2752   PetscFunctionReturn(0);
2753 }
2754 EXTERN_C_END
2755 
2756 EXTERN_C_BEGIN
2757 #undef __FUNCT__
2758 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2759 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2760 {
2761   Mat_MPIAIJ     *b;
2762   PetscErrorCode ierr;
2763   PetscInt       i;
2764 
2765   PetscFunctionBegin;
2766   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2767   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2768   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2769   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2770 
2771   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2772   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2773   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2774   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2775   if (d_nnz) {
2776     for (i=0; i<B->rmap->n; i++) {
2777       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2778     }
2779   }
2780   if (o_nnz) {
2781     for (i=0; i<B->rmap->n; i++) {
2782       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2783     }
2784   }
2785   b = (Mat_MPIAIJ*)B->data;
2786 
2787   if (!B->preallocated) {
2788     /* Explicitly create 2 MATSEQAIJ matrices. */
2789     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2790     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2791     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2792     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2793     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2794     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2795     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2796     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2797   }
2798 
2799   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2800   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2801   B->preallocated = PETSC_TRUE;
2802   PetscFunctionReturn(0);
2803 }
2804 EXTERN_C_END
2805 
2806 #undef __FUNCT__
2807 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2808 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2809 {
2810   Mat            mat;
2811   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2812   PetscErrorCode ierr;
2813 
2814   PetscFunctionBegin;
2815   *newmat       = 0;
2816   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2817   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2818   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2819   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2820   a    = (Mat_MPIAIJ*)mat->data;
2821 
2822   mat->factortype    = matin->factortype;
2823   mat->rmap->bs      = matin->rmap->bs;
2824   mat->assembled    = PETSC_TRUE;
2825   mat->insertmode   = NOT_SET_VALUES;
2826   mat->preallocated = PETSC_TRUE;
2827 
2828   a->size           = oldmat->size;
2829   a->rank           = oldmat->rank;
2830   a->donotstash     = oldmat->donotstash;
2831   a->roworiented    = oldmat->roworiented;
2832   a->rowindices     = 0;
2833   a->rowvalues      = 0;
2834   a->getrowactive   = PETSC_FALSE;
2835 
2836   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2837   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2838 
2839   if (oldmat->colmap) {
2840 #if defined (PETSC_USE_CTABLE)
2841     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2842 #else
2843     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2844     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2845     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2846 #endif
2847   } else a->colmap = 0;
2848   if (oldmat->garray) {
2849     PetscInt len;
2850     len  = oldmat->B->cmap->n;
2851     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2852     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2853     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2854   } else a->garray = 0;
2855 
2856   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2857   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2858   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2859   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2860   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2861   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2862   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2863   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2864   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2865   *newmat = mat;
2866   PetscFunctionReturn(0);
2867 }
2868 
2869 /*
2870     Allows sending/receiving larger messages then 2 gigabytes in a single call
2871 */
2872 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm)
2873 {
2874   int             ierr;
2875   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
2876   PetscInt        i,numchunks;
2877   PetscMPIInt     icnt;
2878 
2879   numchunks = cnt/CHUNKSIZE + 1;
2880   for (i=0; i<numchunks; i++) {
2881     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
2882     ierr = MPI_Send(mess,icnt,type,to,tag,comm);
2883     if (type == MPIU_INT) {
2884       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
2885     } else if (type == MPIU_SCALAR) {
2886       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
2887     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
2888   }
2889   return 0;
2890 }
2891 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm)
2892 {
2893   int             ierr;
2894   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
2895   MPI_Status      status;
2896   PetscInt        i,numchunks;
2897   PetscMPIInt     icnt;
2898 
2899   numchunks = cnt/CHUNKSIZE + 1;
2900   for (i=0; i<numchunks; i++) {
2901     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
2902     ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);
2903     if (type == MPIU_INT) {
2904       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
2905     } else if (type == MPIU_SCALAR) {
2906       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
2907     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
2908   }
2909   return 0;
2910 }
2911 
2912 #undef __FUNCT__
2913 #define __FUNCT__ "MatLoad_MPIAIJ"
2914 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2915 {
2916   PetscScalar    *vals,*svals;
2917   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2918   PetscErrorCode ierr;
2919   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2920   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
2921   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2922   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2923   PetscInt       cend,cstart,n,*rowners,sizesset=1;
2924   int            fd;
2925 
2926   PetscFunctionBegin;
2927   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2928   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2929   if (!rank) {
2930     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2931     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2932     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2933   }
2934 
2935   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
2936 
2937   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2938   M = header[1]; N = header[2];
2939   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2940   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
2941   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
2942 
2943   /* If global sizes are set, check if they are consistent with that given in the file */
2944   if (sizesset) {
2945     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
2946   }
2947   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2948   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2949 
2950   /* determine ownership of all rows */
2951   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
2952   else m = newMat->rmap->n; /* Set by user */
2953 
2954   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2955   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2956 
2957   /* First process needs enough room for process with most rows */
2958   if (!rank) {
2959     mmax       = rowners[1];
2960     for (i=2; i<size; i++) {
2961       mmax = PetscMax(mmax,rowners[i]);
2962     }
2963   } else mmax = m;
2964 
2965   rowners[0] = 0;
2966   for (i=2; i<=size; i++) {
2967     rowners[i] += rowners[i-1];
2968   }
2969   rstart = rowners[rank];
2970   rend   = rowners[rank+1];
2971 
2972   /* distribute row lengths to all processors */
2973   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2974   if (!rank) {
2975     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2976     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2977     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2978     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2979     for (j=0; j<m; j++) {
2980       procsnz[0] += ourlens[j];
2981     }
2982     for (i=1; i<size; i++) {
2983       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2984       /* calculate the number of nonzeros on each processor */
2985       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2986         procsnz[i] += rowlengths[j];
2987       }
2988       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2989     }
2990     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2991   } else {
2992     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2993   }
2994 
2995   if (!rank) {
2996     /* determine max buffer needed and allocate it */
2997     maxnz = 0;
2998     for (i=0; i<size; i++) {
2999       maxnz = PetscMax(maxnz,procsnz[i]);
3000     }
3001     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3002 
3003     /* read in my part of the matrix column indices  */
3004     nz   = procsnz[0];
3005     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3006     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3007 
3008     /* read in every one elses and ship off */
3009     for (i=1; i<size; i++) {
3010       nz     = procsnz[i];
3011       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3012       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3013     }
3014     ierr = PetscFree(cols);CHKERRQ(ierr);
3015   } else {
3016     /* determine buffer space needed for message */
3017     nz = 0;
3018     for (i=0; i<m; i++) {
3019       nz += ourlens[i];
3020     }
3021     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3022 
3023     /* receive message of column indices*/
3024     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3025   }
3026 
3027   /* determine column ownership if matrix is not square */
3028   if (N != M) {
3029     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3030     else n = newMat->cmap->n;
3031     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3032     cstart = cend - n;
3033   } else {
3034     cstart = rstart;
3035     cend   = rend;
3036     n      = cend - cstart;
3037   }
3038 
3039   /* loop over local rows, determining number of off diagonal entries */
3040   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3041   jj = 0;
3042   for (i=0; i<m; i++) {
3043     for (j=0; j<ourlens[i]; j++) {
3044       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3045       jj++;
3046     }
3047   }
3048 
3049   for (i=0; i<m; i++) {
3050     ourlens[i] -= offlens[i];
3051   }
3052   if (!sizesset) {
3053     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3054   }
3055   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3056 
3057   for (i=0; i<m; i++) {
3058     ourlens[i] += offlens[i];
3059   }
3060 
3061   if (!rank) {
3062     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3063 
3064     /* read in my part of the matrix numerical values  */
3065     nz   = procsnz[0];
3066     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3067 
3068     /* insert into matrix */
3069     jj      = rstart;
3070     smycols = mycols;
3071     svals   = vals;
3072     for (i=0; i<m; i++) {
3073       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3074       smycols += ourlens[i];
3075       svals   += ourlens[i];
3076       jj++;
3077     }
3078 
3079     /* read in other processors and ship out */
3080     for (i=1; i<size; i++) {
3081       nz     = procsnz[i];
3082       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3083       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3084     }
3085     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3086   } else {
3087     /* receive numeric values */
3088     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3089 
3090     /* receive message of values*/
3091     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3092 
3093     /* insert into matrix */
3094     jj      = rstart;
3095     smycols = mycols;
3096     svals   = vals;
3097     for (i=0; i<m; i++) {
3098       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3099       smycols += ourlens[i];
3100       svals   += ourlens[i];
3101       jj++;
3102     }
3103   }
3104   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3105   ierr = PetscFree(vals);CHKERRQ(ierr);
3106   ierr = PetscFree(mycols);CHKERRQ(ierr);
3107   ierr = PetscFree(rowners);CHKERRQ(ierr);
3108 
3109   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3110   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3111   PetscFunctionReturn(0);
3112 }
3113 
3114 #undef __FUNCT__
3115 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3116 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3117 {
3118   PetscErrorCode ierr;
3119   IS             iscol_local;
3120   PetscInt       csize;
3121 
3122   PetscFunctionBegin;
3123   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3124   if (call == MAT_REUSE_MATRIX) {
3125     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3126     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3127   } else {
3128     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3129   }
3130   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3131   if (call == MAT_INITIAL_MATRIX) {
3132     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3133     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3134   }
3135   PetscFunctionReturn(0);
3136 }
3137 
3138 #undef __FUNCT__
3139 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3140 /*
3141     Not great since it makes two copies of the submatrix, first an SeqAIJ
3142   in local and then by concatenating the local matrices the end result.
3143   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3144 
3145   Note: This requires a sequential iscol with all indices.
3146 */
3147 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3148 {
3149   PetscErrorCode ierr;
3150   PetscMPIInt    rank,size;
3151   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3152   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3153   Mat            *local,M,Mreuse;
3154   MatScalar      *vwork,*aa;
3155   MPI_Comm       comm = ((PetscObject)mat)->comm;
3156   Mat_SeqAIJ     *aij;
3157 
3158 
3159   PetscFunctionBegin;
3160   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3161   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3162 
3163   if (call ==  MAT_REUSE_MATRIX) {
3164     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3165     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3166     local = &Mreuse;
3167     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3168   } else {
3169     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3170     Mreuse = *local;
3171     ierr   = PetscFree(local);CHKERRQ(ierr);
3172   }
3173 
3174   /*
3175       m - number of local rows
3176       n - number of columns (same on all processors)
3177       rstart - first row in new global matrix generated
3178   */
3179   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3180   if (call == MAT_INITIAL_MATRIX) {
3181     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3182     ii  = aij->i;
3183     jj  = aij->j;
3184 
3185     /*
3186         Determine the number of non-zeros in the diagonal and off-diagonal
3187         portions of the matrix in order to do correct preallocation
3188     */
3189 
3190     /* first get start and end of "diagonal" columns */
3191     if (csize == PETSC_DECIDE) {
3192       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3193       if (mglobal == n) { /* square matrix */
3194 	nlocal = m;
3195       } else {
3196         nlocal = n/size + ((n % size) > rank);
3197       }
3198     } else {
3199       nlocal = csize;
3200     }
3201     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3202     rstart = rend - nlocal;
3203     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3204 
3205     /* next, compute all the lengths */
3206     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3207     olens = dlens + m;
3208     for (i=0; i<m; i++) {
3209       jend = ii[i+1] - ii[i];
3210       olen = 0;
3211       dlen = 0;
3212       for (j=0; j<jend; j++) {
3213         if (*jj < rstart || *jj >= rend) olen++;
3214         else dlen++;
3215         jj++;
3216       }
3217       olens[i] = olen;
3218       dlens[i] = dlen;
3219     }
3220     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3221     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3222     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3223     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3224     ierr = PetscFree(dlens);CHKERRQ(ierr);
3225   } else {
3226     PetscInt ml,nl;
3227 
3228     M = *newmat;
3229     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3230     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3231     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3232     /*
3233          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3234        rather than the slower MatSetValues().
3235     */
3236     M->was_assembled = PETSC_TRUE;
3237     M->assembled     = PETSC_FALSE;
3238   }
3239   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3240   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3241   ii  = aij->i;
3242   jj  = aij->j;
3243   aa  = aij->a;
3244   for (i=0; i<m; i++) {
3245     row   = rstart + i;
3246     nz    = ii[i+1] - ii[i];
3247     cwork = jj;     jj += nz;
3248     vwork = aa;     aa += nz;
3249     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3250   }
3251 
3252   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3253   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3254   *newmat = M;
3255 
3256   /* save submatrix used in processor for next request */
3257   if (call ==  MAT_INITIAL_MATRIX) {
3258     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3259     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3260   }
3261 
3262   PetscFunctionReturn(0);
3263 }
3264 
3265 EXTERN_C_BEGIN
3266 #undef __FUNCT__
3267 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3268 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3269 {
3270   PetscInt       m,cstart, cend,j,nnz,i,d;
3271   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3272   const PetscInt *JJ;
3273   PetscScalar    *values;
3274   PetscErrorCode ierr;
3275 
3276   PetscFunctionBegin;
3277   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3278 
3279   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3280   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3281   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3282   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3283   m      = B->rmap->n;
3284   cstart = B->cmap->rstart;
3285   cend   = B->cmap->rend;
3286   rstart = B->rmap->rstart;
3287 
3288   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3289 
3290 #if defined(PETSC_USE_DEBUGGING)
3291   for (i=0; i<m; i++) {
3292     nnz     = Ii[i+1]- Ii[i];
3293     JJ      = J + Ii[i];
3294     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3295     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3296     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3297   }
3298 #endif
3299 
3300   for (i=0; i<m; i++) {
3301     nnz     = Ii[i+1]- Ii[i];
3302     JJ      = J + Ii[i];
3303     nnz_max = PetscMax(nnz_max,nnz);
3304     d       = 0;
3305     for (j=0; j<nnz; j++) {
3306       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3307     }
3308     d_nnz[i] = d;
3309     o_nnz[i] = nnz - d;
3310   }
3311   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3312   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3313 
3314   if (v) values = (PetscScalar*)v;
3315   else {
3316     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3317     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3318   }
3319 
3320   for (i=0; i<m; i++) {
3321     ii   = i + rstart;
3322     nnz  = Ii[i+1]- Ii[i];
3323     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3324   }
3325   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3326   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3327 
3328   if (!v) {
3329     ierr = PetscFree(values);CHKERRQ(ierr);
3330   }
3331   PetscFunctionReturn(0);
3332 }
3333 EXTERN_C_END
3334 
3335 #undef __FUNCT__
3336 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3337 /*@
3338    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3339    (the default parallel PETSc format).
3340 
3341    Collective on MPI_Comm
3342 
3343    Input Parameters:
3344 +  B - the matrix
3345 .  i - the indices into j for the start of each local row (starts with zero)
3346 .  j - the column indices for each local row (starts with zero)
3347 -  v - optional values in the matrix
3348 
3349    Level: developer
3350 
3351    Notes:
3352        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3353      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3354      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3355 
3356        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3357 
3358        The format which is used for the sparse matrix input, is equivalent to a
3359     row-major ordering.. i.e for the following matrix, the input data expected is
3360     as shown:
3361 
3362         1 0 0
3363         2 0 3     P0
3364        -------
3365         4 5 6     P1
3366 
3367      Process0 [P0]: rows_owned=[0,1]
3368         i =  {0,1,3}  [size = nrow+1  = 2+1]
3369         j =  {0,0,2}  [size = nz = 6]
3370         v =  {1,2,3}  [size = nz = 6]
3371 
3372      Process1 [P1]: rows_owned=[2]
3373         i =  {0,3}    [size = nrow+1  = 1+1]
3374         j =  {0,1,2}  [size = nz = 6]
3375         v =  {4,5,6}  [size = nz = 6]
3376 
3377 .keywords: matrix, aij, compressed row, sparse, parallel
3378 
3379 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3380           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3381 @*/
3382 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3383 {
3384   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3385 
3386   PetscFunctionBegin;
3387   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3388   if (f) {
3389     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3390   }
3391   PetscFunctionReturn(0);
3392 }
3393 
3394 #undef __FUNCT__
3395 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3396 /*@C
3397    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3398    (the default parallel PETSc format).  For good matrix assembly performance
3399    the user should preallocate the matrix storage by setting the parameters
3400    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3401    performance can be increased by more than a factor of 50.
3402 
3403    Collective on MPI_Comm
3404 
3405    Input Parameters:
3406 +  A - the matrix
3407 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3408            (same value is used for all local rows)
3409 .  d_nnz - array containing the number of nonzeros in the various rows of the
3410            DIAGONAL portion of the local submatrix (possibly different for each row)
3411            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3412            The size of this array is equal to the number of local rows, i.e 'm'.
3413            You must leave room for the diagonal entry even if it is zero.
3414 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3415            submatrix (same value is used for all local rows).
3416 -  o_nnz - array containing the number of nonzeros in the various rows of the
3417            OFF-DIAGONAL portion of the local submatrix (possibly different for
3418            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3419            structure. The size of this array is equal to the number
3420            of local rows, i.e 'm'.
3421 
3422    If the *_nnz parameter is given then the *_nz parameter is ignored
3423 
3424    The AIJ format (also called the Yale sparse matrix format or
3425    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3426    storage.  The stored row and column indices begin with zero.
3427    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3428 
3429    The parallel matrix is partitioned such that the first m0 rows belong to
3430    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3431    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3432 
3433    The DIAGONAL portion of the local submatrix of a processor can be defined
3434    as the submatrix which is obtained by extraction the part corresponding
3435    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3436    first row that belongs to the processor, and r2 is the last row belonging
3437    to the this processor. This is a square mxm matrix. The remaining portion
3438    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3439 
3440    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3441 
3442    You can call MatGetInfo() to get information on how effective the preallocation was;
3443    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3444    You can also run with the option -info and look for messages with the string
3445    malloc in them to see if additional memory allocation was needed.
3446 
3447    Example usage:
3448 
3449    Consider the following 8x8 matrix with 34 non-zero values, that is
3450    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3451    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3452    as follows:
3453 
3454 .vb
3455             1  2  0  |  0  3  0  |  0  4
3456     Proc0   0  5  6  |  7  0  0  |  8  0
3457             9  0 10  | 11  0  0  | 12  0
3458     -------------------------------------
3459            13  0 14  | 15 16 17  |  0  0
3460     Proc1   0 18  0  | 19 20 21  |  0  0
3461             0  0  0  | 22 23  0  | 24  0
3462     -------------------------------------
3463     Proc2  25 26 27  |  0  0 28  | 29  0
3464            30  0  0  | 31 32 33  |  0 34
3465 .ve
3466 
3467    This can be represented as a collection of submatrices as:
3468 
3469 .vb
3470       A B C
3471       D E F
3472       G H I
3473 .ve
3474 
3475    Where the submatrices A,B,C are owned by proc0, D,E,F are
3476    owned by proc1, G,H,I are owned by proc2.
3477 
3478    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3479    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3480    The 'M','N' parameters are 8,8, and have the same values on all procs.
3481 
3482    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3483    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3484    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3485    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3486    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3487    matrix, ans [DF] as another SeqAIJ matrix.
3488 
3489    When d_nz, o_nz parameters are specified, d_nz storage elements are
3490    allocated for every row of the local diagonal submatrix, and o_nz
3491    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3492    One way to choose d_nz and o_nz is to use the max nonzerors per local
3493    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3494    In this case, the values of d_nz,o_nz are:
3495 .vb
3496      proc0 : dnz = 2, o_nz = 2
3497      proc1 : dnz = 3, o_nz = 2
3498      proc2 : dnz = 1, o_nz = 4
3499 .ve
3500    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3501    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3502    for proc3. i.e we are using 12+15+10=37 storage locations to store
3503    34 values.
3504 
3505    When d_nnz, o_nnz parameters are specified, the storage is specified
3506    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3507    In the above case the values for d_nnz,o_nnz are:
3508 .vb
3509      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3510      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3511      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3512 .ve
3513    Here the space allocated is sum of all the above values i.e 34, and
3514    hence pre-allocation is perfect.
3515 
3516    Level: intermediate
3517 
3518 .keywords: matrix, aij, compressed row, sparse, parallel
3519 
3520 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3521           MPIAIJ, MatGetInfo()
3522 @*/
3523 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3524 {
3525   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3526 
3527   PetscFunctionBegin;
3528   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3529   if (f) {
3530     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3531   }
3532   PetscFunctionReturn(0);
3533 }
3534 
3535 #undef __FUNCT__
3536 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3537 /*@
3538      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3539          CSR format the local rows.
3540 
3541    Collective on MPI_Comm
3542 
3543    Input Parameters:
3544 +  comm - MPI communicator
3545 .  m - number of local rows (Cannot be PETSC_DECIDE)
3546 .  n - This value should be the same as the local size used in creating the
3547        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3548        calculated if N is given) For square matrices n is almost always m.
3549 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3550 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3551 .   i - row indices
3552 .   j - column indices
3553 -   a - matrix values
3554 
3555    Output Parameter:
3556 .   mat - the matrix
3557 
3558    Level: intermediate
3559 
3560    Notes:
3561        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3562      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3563      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3564 
3565        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3566 
3567        The format which is used for the sparse matrix input, is equivalent to a
3568     row-major ordering.. i.e for the following matrix, the input data expected is
3569     as shown:
3570 
3571         1 0 0
3572         2 0 3     P0
3573        -------
3574         4 5 6     P1
3575 
3576      Process0 [P0]: rows_owned=[0,1]
3577         i =  {0,1,3}  [size = nrow+1  = 2+1]
3578         j =  {0,0,2}  [size = nz = 6]
3579         v =  {1,2,3}  [size = nz = 6]
3580 
3581      Process1 [P1]: rows_owned=[2]
3582         i =  {0,3}    [size = nrow+1  = 1+1]
3583         j =  {0,1,2}  [size = nz = 6]
3584         v =  {4,5,6}  [size = nz = 6]
3585 
3586 .keywords: matrix, aij, compressed row, sparse, parallel
3587 
3588 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3589           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3590 @*/
3591 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3592 {
3593   PetscErrorCode ierr;
3594 
3595  PetscFunctionBegin;
3596   if (i[0]) {
3597     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3598   }
3599   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3600   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3601   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3602   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3603   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3604   PetscFunctionReturn(0);
3605 }
3606 
3607 #undef __FUNCT__
3608 #define __FUNCT__ "MatCreateMPIAIJ"
3609 /*@C
3610    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3611    (the default parallel PETSc format).  For good matrix assembly performance
3612    the user should preallocate the matrix storage by setting the parameters
3613    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3614    performance can be increased by more than a factor of 50.
3615 
3616    Collective on MPI_Comm
3617 
3618    Input Parameters:
3619 +  comm - MPI communicator
3620 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3621            This value should be the same as the local size used in creating the
3622            y vector for the matrix-vector product y = Ax.
3623 .  n - This value should be the same as the local size used in creating the
3624        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3625        calculated if N is given) For square matrices n is almost always m.
3626 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3627 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3628 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3629            (same value is used for all local rows)
3630 .  d_nnz - array containing the number of nonzeros in the various rows of the
3631            DIAGONAL portion of the local submatrix (possibly different for each row)
3632            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3633            The size of this array is equal to the number of local rows, i.e 'm'.
3634            You must leave room for the diagonal entry even if it is zero.
3635 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3636            submatrix (same value is used for all local rows).
3637 -  o_nnz - array containing the number of nonzeros in the various rows of the
3638            OFF-DIAGONAL portion of the local submatrix (possibly different for
3639            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3640            structure. The size of this array is equal to the number
3641            of local rows, i.e 'm'.
3642 
3643    Output Parameter:
3644 .  A - the matrix
3645 
3646    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3647    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3648    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3649 
3650    Notes:
3651    If the *_nnz parameter is given then the *_nz parameter is ignored
3652 
3653    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3654    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3655    storage requirements for this matrix.
3656 
3657    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3658    processor than it must be used on all processors that share the object for
3659    that argument.
3660 
3661    The user MUST specify either the local or global matrix dimensions
3662    (possibly both).
3663 
3664    The parallel matrix is partitioned across processors such that the
3665    first m0 rows belong to process 0, the next m1 rows belong to
3666    process 1, the next m2 rows belong to process 2 etc.. where
3667    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3668    values corresponding to [m x N] submatrix.
3669 
3670    The columns are logically partitioned with the n0 columns belonging
3671    to 0th partition, the next n1 columns belonging to the next
3672    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3673 
3674    The DIAGONAL portion of the local submatrix on any given processor
3675    is the submatrix corresponding to the rows and columns m,n
3676    corresponding to the given processor. i.e diagonal matrix on
3677    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3678    etc. The remaining portion of the local submatrix [m x (N-n)]
3679    constitute the OFF-DIAGONAL portion. The example below better
3680    illustrates this concept.
3681 
3682    For a square global matrix we define each processor's diagonal portion
3683    to be its local rows and the corresponding columns (a square submatrix);
3684    each processor's off-diagonal portion encompasses the remainder of the
3685    local matrix (a rectangular submatrix).
3686 
3687    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3688 
3689    When calling this routine with a single process communicator, a matrix of
3690    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3691    type of communicator, use the construction mechanism:
3692      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3693 
3694    By default, this format uses inodes (identical nodes) when possible.
3695    We search for consecutive rows with the same nonzero structure, thereby
3696    reusing matrix information to achieve increased efficiency.
3697 
3698    Options Database Keys:
3699 +  -mat_no_inode  - Do not use inodes
3700 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3701 -  -mat_aij_oneindex - Internally use indexing starting at 1
3702         rather than 0.  Note that when calling MatSetValues(),
3703         the user still MUST index entries starting at 0!
3704 
3705 
3706    Example usage:
3707 
3708    Consider the following 8x8 matrix with 34 non-zero values, that is
3709    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3710    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3711    as follows:
3712 
3713 .vb
3714             1  2  0  |  0  3  0  |  0  4
3715     Proc0   0  5  6  |  7  0  0  |  8  0
3716             9  0 10  | 11  0  0  | 12  0
3717     -------------------------------------
3718            13  0 14  | 15 16 17  |  0  0
3719     Proc1   0 18  0  | 19 20 21  |  0  0
3720             0  0  0  | 22 23  0  | 24  0
3721     -------------------------------------
3722     Proc2  25 26 27  |  0  0 28  | 29  0
3723            30  0  0  | 31 32 33  |  0 34
3724 .ve
3725 
3726    This can be represented as a collection of submatrices as:
3727 
3728 .vb
3729       A B C
3730       D E F
3731       G H I
3732 .ve
3733 
3734    Where the submatrices A,B,C are owned by proc0, D,E,F are
3735    owned by proc1, G,H,I are owned by proc2.
3736 
3737    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3738    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3739    The 'M','N' parameters are 8,8, and have the same values on all procs.
3740 
3741    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3742    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3743    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3744    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3745    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3746    matrix, ans [DF] as another SeqAIJ matrix.
3747 
3748    When d_nz, o_nz parameters are specified, d_nz storage elements are
3749    allocated for every row of the local diagonal submatrix, and o_nz
3750    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3751    One way to choose d_nz and o_nz is to use the max nonzerors per local
3752    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3753    In this case, the values of d_nz,o_nz are:
3754 .vb
3755      proc0 : dnz = 2, o_nz = 2
3756      proc1 : dnz = 3, o_nz = 2
3757      proc2 : dnz = 1, o_nz = 4
3758 .ve
3759    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3760    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3761    for proc3. i.e we are using 12+15+10=37 storage locations to store
3762    34 values.
3763 
3764    When d_nnz, o_nnz parameters are specified, the storage is specified
3765    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3766    In the above case the values for d_nnz,o_nnz are:
3767 .vb
3768      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3769      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3770      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3771 .ve
3772    Here the space allocated is sum of all the above values i.e 34, and
3773    hence pre-allocation is perfect.
3774 
3775    Level: intermediate
3776 
3777 .keywords: matrix, aij, compressed row, sparse, parallel
3778 
3779 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3780           MPIAIJ, MatCreateMPIAIJWithArrays()
3781 @*/
3782 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3783 {
3784   PetscErrorCode ierr;
3785   PetscMPIInt    size;
3786 
3787   PetscFunctionBegin;
3788   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3789   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3790   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3791   if (size > 1) {
3792     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3793     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3794   } else {
3795     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3796     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3797   }
3798   PetscFunctionReturn(0);
3799 }
3800 
3801 #undef __FUNCT__
3802 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3803 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3804 {
3805   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3806 
3807   PetscFunctionBegin;
3808   *Ad     = a->A;
3809   *Ao     = a->B;
3810   *colmap = a->garray;
3811   PetscFunctionReturn(0);
3812 }
3813 
3814 #undef __FUNCT__
3815 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3816 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3817 {
3818   PetscErrorCode ierr;
3819   PetscInt       i;
3820   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3821 
3822   PetscFunctionBegin;
3823   if (coloring->ctype == IS_COLORING_GLOBAL) {
3824     ISColoringValue *allcolors,*colors;
3825     ISColoring      ocoloring;
3826 
3827     /* set coloring for diagonal portion */
3828     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3829 
3830     /* set coloring for off-diagonal portion */
3831     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3832     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3833     for (i=0; i<a->B->cmap->n; i++) {
3834       colors[i] = allcolors[a->garray[i]];
3835     }
3836     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3837     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3838     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3839     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3840   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3841     ISColoringValue *colors;
3842     PetscInt        *larray;
3843     ISColoring      ocoloring;
3844 
3845     /* set coloring for diagonal portion */
3846     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3847     for (i=0; i<a->A->cmap->n; i++) {
3848       larray[i] = i + A->cmap->rstart;
3849     }
3850     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3851     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3852     for (i=0; i<a->A->cmap->n; i++) {
3853       colors[i] = coloring->colors[larray[i]];
3854     }
3855     ierr = PetscFree(larray);CHKERRQ(ierr);
3856     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3857     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3858     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3859 
3860     /* set coloring for off-diagonal portion */
3861     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3862     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3863     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3864     for (i=0; i<a->B->cmap->n; i++) {
3865       colors[i] = coloring->colors[larray[i]];
3866     }
3867     ierr = PetscFree(larray);CHKERRQ(ierr);
3868     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3869     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3870     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3871   } else {
3872     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3873   }
3874 
3875   PetscFunctionReturn(0);
3876 }
3877 
3878 #if defined(PETSC_HAVE_ADIC)
3879 #undef __FUNCT__
3880 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3881 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3882 {
3883   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3884   PetscErrorCode ierr;
3885 
3886   PetscFunctionBegin;
3887   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3888   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3889   PetscFunctionReturn(0);
3890 }
3891 #endif
3892 
3893 #undef __FUNCT__
3894 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3895 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3896 {
3897   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3898   PetscErrorCode ierr;
3899 
3900   PetscFunctionBegin;
3901   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3902   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3903   PetscFunctionReturn(0);
3904 }
3905 
3906 #undef __FUNCT__
3907 #define __FUNCT__ "MatMerge"
3908 /*@
3909       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3910                  matrices from each processor
3911 
3912     Collective on MPI_Comm
3913 
3914    Input Parameters:
3915 +    comm - the communicators the parallel matrix will live on
3916 .    inmat - the input sequential matrices
3917 .    n - number of local columns (or PETSC_DECIDE)
3918 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3919 
3920    Output Parameter:
3921 .    outmat - the parallel matrix generated
3922 
3923     Level: advanced
3924 
3925    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3926 
3927 @*/
3928 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3929 {
3930   PetscErrorCode ierr;
3931   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3932   PetscInt       *indx;
3933   PetscScalar    *values;
3934 
3935   PetscFunctionBegin;
3936   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3937   if (scall == MAT_INITIAL_MATRIX){
3938     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3939     if (n == PETSC_DECIDE){
3940       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3941     }
3942     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3943     rstart -= m;
3944 
3945     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3946     for (i=0;i<m;i++) {
3947       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3948       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3949       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3950     }
3951     /* This routine will ONLY return MPIAIJ type matrix */
3952     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3953     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3954     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3955     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3956     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3957 
3958   } else if (scall == MAT_REUSE_MATRIX){
3959     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3960   } else {
3961     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3962   }
3963 
3964   for (i=0;i<m;i++) {
3965     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3966     Ii    = i + rstart;
3967     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3968     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3969   }
3970   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3971   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3972   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3973 
3974   PetscFunctionReturn(0);
3975 }
3976 
3977 #undef __FUNCT__
3978 #define __FUNCT__ "MatFileSplit"
3979 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3980 {
3981   PetscErrorCode    ierr;
3982   PetscMPIInt       rank;
3983   PetscInt          m,N,i,rstart,nnz;
3984   size_t            len;
3985   const PetscInt    *indx;
3986   PetscViewer       out;
3987   char              *name;
3988   Mat               B;
3989   const PetscScalar *values;
3990 
3991   PetscFunctionBegin;
3992   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3993   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3994   /* Should this be the type of the diagonal block of A? */
3995   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3996   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3997   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3998   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3999   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4000   for (i=0;i<m;i++) {
4001     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4002     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4003     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4004   }
4005   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4006   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4007 
4008   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4009   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4010   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4011   sprintf(name,"%s.%d",outfile,rank);
4012   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4013   ierr = PetscFree(name);
4014   ierr = MatView(B,out);CHKERRQ(ierr);
4015   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
4016   ierr = MatDestroy(B);CHKERRQ(ierr);
4017   PetscFunctionReturn(0);
4018 }
4019 
4020 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
4021 #undef __FUNCT__
4022 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4023 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4024 {
4025   PetscErrorCode       ierr;
4026   Mat_Merge_SeqsToMPI  *merge;
4027   PetscContainer       container;
4028 
4029   PetscFunctionBegin;
4030   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4031   if (container) {
4032     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4033     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4034     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4035     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4036     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4037     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4038     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4039     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4040     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4041     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4042     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4043     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4044     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4045     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4046 
4047     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4048     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4049   }
4050   ierr = PetscFree(merge);CHKERRQ(ierr);
4051 
4052   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4053   PetscFunctionReturn(0);
4054 }
4055 
4056 #include "../src/mat/utils/freespace.h"
4057 #include "petscbt.h"
4058 
4059 #undef __FUNCT__
4060 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4061 /*@C
4062       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4063                  matrices from each processor
4064 
4065     Collective on MPI_Comm
4066 
4067    Input Parameters:
4068 +    comm - the communicators the parallel matrix will live on
4069 .    seqmat - the input sequential matrices
4070 .    m - number of local rows (or PETSC_DECIDE)
4071 .    n - number of local columns (or PETSC_DECIDE)
4072 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4073 
4074    Output Parameter:
4075 .    mpimat - the parallel matrix generated
4076 
4077     Level: advanced
4078 
4079    Notes:
4080      The dimensions of the sequential matrix in each processor MUST be the same.
4081      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4082      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4083 @*/
4084 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4085 {
4086   PetscErrorCode       ierr;
4087   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4088   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4089   PetscMPIInt          size,rank,taga,*len_s;
4090   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4091   PetscInt             proc,m;
4092   PetscInt             **buf_ri,**buf_rj;
4093   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4094   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4095   MPI_Request          *s_waits,*r_waits;
4096   MPI_Status           *status;
4097   MatScalar            *aa=a->a;
4098   MatScalar            **abuf_r,*ba_i;
4099   Mat_Merge_SeqsToMPI  *merge;
4100   PetscContainer       container;
4101 
4102   PetscFunctionBegin;
4103   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4104 
4105   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4106   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4107 
4108   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4109   if (container) {
4110     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4111   }
4112   bi     = merge->bi;
4113   bj     = merge->bj;
4114   buf_ri = merge->buf_ri;
4115   buf_rj = merge->buf_rj;
4116 
4117   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4118   owners = merge->rowmap->range;
4119   len_s  = merge->len_s;
4120 
4121   /* send and recv matrix values */
4122   /*-----------------------------*/
4123   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4124   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4125 
4126   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4127   for (proc=0,k=0; proc<size; proc++){
4128     if (!len_s[proc]) continue;
4129     i = owners[proc];
4130     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4131     k++;
4132   }
4133 
4134   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4135   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4136   ierr = PetscFree(status);CHKERRQ(ierr);
4137 
4138   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4139   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4140 
4141   /* insert mat values of mpimat */
4142   /*----------------------------*/
4143   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4144   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4145 
4146   for (k=0; k<merge->nrecv; k++){
4147     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4148     nrows = *(buf_ri_k[k]);
4149     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4150     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4151   }
4152 
4153   /* set values of ba */
4154   m = merge->rowmap->n;
4155   for (i=0; i<m; i++) {
4156     arow = owners[rank] + i;
4157     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4158     bnzi = bi[i+1] - bi[i];
4159     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4160 
4161     /* add local non-zero vals of this proc's seqmat into ba */
4162     anzi = ai[arow+1] - ai[arow];
4163     aj   = a->j + ai[arow];
4164     aa   = a->a + ai[arow];
4165     nextaj = 0;
4166     for (j=0; nextaj<anzi; j++){
4167       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4168         ba_i[j] += aa[nextaj++];
4169       }
4170     }
4171 
4172     /* add received vals into ba */
4173     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4174       /* i-th row */
4175       if (i == *nextrow[k]) {
4176         anzi = *(nextai[k]+1) - *nextai[k];
4177         aj   = buf_rj[k] + *(nextai[k]);
4178         aa   = abuf_r[k] + *(nextai[k]);
4179         nextaj = 0;
4180         for (j=0; nextaj<anzi; j++){
4181           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4182             ba_i[j] += aa[nextaj++];
4183           }
4184         }
4185         nextrow[k]++; nextai[k]++;
4186       }
4187     }
4188     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4189   }
4190   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4191   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4192 
4193   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4194   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4195   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4196   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4197   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4198   PetscFunctionReturn(0);
4199 }
4200 
4201 #undef __FUNCT__
4202 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4203 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4204 {
4205   PetscErrorCode       ierr;
4206   Mat                  B_mpi;
4207   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4208   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4209   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4210   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4211   PetscInt             len,proc,*dnz,*onz;
4212   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4213   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4214   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4215   MPI_Status           *status;
4216   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4217   PetscBT              lnkbt;
4218   Mat_Merge_SeqsToMPI  *merge;
4219   PetscContainer       container;
4220 
4221   PetscFunctionBegin;
4222   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4223 
4224   /* make sure it is a PETSc comm */
4225   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4226   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4227   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4228 
4229   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4230   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4231 
4232   /* determine row ownership */
4233   /*---------------------------------------------------------*/
4234   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4235   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4236   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4237   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4238   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4239   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4240   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4241 
4242   m      = merge->rowmap->n;
4243   M      = merge->rowmap->N;
4244   owners = merge->rowmap->range;
4245 
4246   /* determine the number of messages to send, their lengths */
4247   /*---------------------------------------------------------*/
4248   len_s  = merge->len_s;
4249 
4250   len = 0;  /* length of buf_si[] */
4251   merge->nsend = 0;
4252   for (proc=0; proc<size; proc++){
4253     len_si[proc] = 0;
4254     if (proc == rank){
4255       len_s[proc] = 0;
4256     } else {
4257       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4258       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4259     }
4260     if (len_s[proc]) {
4261       merge->nsend++;
4262       nrows = 0;
4263       for (i=owners[proc]; i<owners[proc+1]; i++){
4264         if (ai[i+1] > ai[i]) nrows++;
4265       }
4266       len_si[proc] = 2*(nrows+1);
4267       len += len_si[proc];
4268     }
4269   }
4270 
4271   /* determine the number and length of messages to receive for ij-structure */
4272   /*-------------------------------------------------------------------------*/
4273   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4274   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4275 
4276   /* post the Irecv of j-structure */
4277   /*-------------------------------*/
4278   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4279   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4280 
4281   /* post the Isend of j-structure */
4282   /*--------------------------------*/
4283   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4284 
4285   for (proc=0, k=0; proc<size; proc++){
4286     if (!len_s[proc]) continue;
4287     i = owners[proc];
4288     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4289     k++;
4290   }
4291 
4292   /* receives and sends of j-structure are complete */
4293   /*------------------------------------------------*/
4294   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4295   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4296 
4297   /* send and recv i-structure */
4298   /*---------------------------*/
4299   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4300   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4301 
4302   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4303   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4304   for (proc=0,k=0; proc<size; proc++){
4305     if (!len_s[proc]) continue;
4306     /* form outgoing message for i-structure:
4307          buf_si[0]:                 nrows to be sent
4308                [1:nrows]:           row index (global)
4309                [nrows+1:2*nrows+1]: i-structure index
4310     */
4311     /*-------------------------------------------*/
4312     nrows = len_si[proc]/2 - 1;
4313     buf_si_i    = buf_si + nrows+1;
4314     buf_si[0]   = nrows;
4315     buf_si_i[0] = 0;
4316     nrows = 0;
4317     for (i=owners[proc]; i<owners[proc+1]; i++){
4318       anzi = ai[i+1] - ai[i];
4319       if (anzi) {
4320         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4321         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4322         nrows++;
4323       }
4324     }
4325     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4326     k++;
4327     buf_si += len_si[proc];
4328   }
4329 
4330   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4331   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4332 
4333   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4334   for (i=0; i<merge->nrecv; i++){
4335     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4336   }
4337 
4338   ierr = PetscFree(len_si);CHKERRQ(ierr);
4339   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4340   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4341   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4342   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4343   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4344   ierr = PetscFree(status);CHKERRQ(ierr);
4345 
4346   /* compute a local seq matrix in each processor */
4347   /*----------------------------------------------*/
4348   /* allocate bi array and free space for accumulating nonzero column info */
4349   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4350   bi[0] = 0;
4351 
4352   /* create and initialize a linked list */
4353   nlnk = N+1;
4354   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4355 
4356   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4357   len = 0;
4358   len  = ai[owners[rank+1]] - ai[owners[rank]];
4359   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4360   current_space = free_space;
4361 
4362   /* determine symbolic info for each local row */
4363   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4364 
4365   for (k=0; k<merge->nrecv; k++){
4366     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4367     nrows = *buf_ri_k[k];
4368     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4369     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4370   }
4371 
4372   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4373   len = 0;
4374   for (i=0;i<m;i++) {
4375     bnzi   = 0;
4376     /* add local non-zero cols of this proc's seqmat into lnk */
4377     arow   = owners[rank] + i;
4378     anzi   = ai[arow+1] - ai[arow];
4379     aj     = a->j + ai[arow];
4380     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4381     bnzi += nlnk;
4382     /* add received col data into lnk */
4383     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4384       if (i == *nextrow[k]) { /* i-th row */
4385         anzi = *(nextai[k]+1) - *nextai[k];
4386         aj   = buf_rj[k] + *nextai[k];
4387         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4388         bnzi += nlnk;
4389         nextrow[k]++; nextai[k]++;
4390       }
4391     }
4392     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4393 
4394     /* if free space is not available, make more free space */
4395     if (current_space->local_remaining<bnzi) {
4396       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4397       nspacedouble++;
4398     }
4399     /* copy data into free space, then initialize lnk */
4400     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4401     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4402 
4403     current_space->array           += bnzi;
4404     current_space->local_used      += bnzi;
4405     current_space->local_remaining -= bnzi;
4406 
4407     bi[i+1] = bi[i] + bnzi;
4408   }
4409 
4410   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4411 
4412   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4413   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4414   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4415 
4416   /* create symbolic parallel matrix B_mpi */
4417   /*---------------------------------------*/
4418   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4419   if (n==PETSC_DECIDE) {
4420     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4421   } else {
4422     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4423   }
4424   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4425   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4426   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4427 
4428   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4429   B_mpi->assembled     = PETSC_FALSE;
4430   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4431   merge->bi            = bi;
4432   merge->bj            = bj;
4433   merge->buf_ri        = buf_ri;
4434   merge->buf_rj        = buf_rj;
4435   merge->coi           = PETSC_NULL;
4436   merge->coj           = PETSC_NULL;
4437   merge->owners_co     = PETSC_NULL;
4438 
4439   /* attach the supporting struct to B_mpi for reuse */
4440   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4441   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4442   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4443   *mpimat = B_mpi;
4444 
4445   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4446   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4447   PetscFunctionReturn(0);
4448 }
4449 
4450 #undef __FUNCT__
4451 #define __FUNCT__ "MatMerge_SeqsToMPI"
4452 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4453 {
4454   PetscErrorCode   ierr;
4455 
4456   PetscFunctionBegin;
4457   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4458   if (scall == MAT_INITIAL_MATRIX){
4459     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4460   }
4461   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4462   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4463   PetscFunctionReturn(0);
4464 }
4465 
4466 #undef __FUNCT__
4467 #define __FUNCT__ "MatGetLocalMat"
4468 /*@
4469      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4470 
4471     Not Collective
4472 
4473    Input Parameters:
4474 +    A - the matrix
4475 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4476 
4477    Output Parameter:
4478 .    A_loc - the local sequential matrix generated
4479 
4480     Level: developer
4481 
4482 @*/
4483 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4484 {
4485   PetscErrorCode  ierr;
4486   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4487   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4488   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4489   MatScalar       *aa=a->a,*ba=b->a,*cam;
4490   PetscScalar     *ca;
4491   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4492   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4493 
4494   PetscFunctionBegin;
4495   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4496   if (scall == MAT_INITIAL_MATRIX){
4497     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4498     ci[0] = 0;
4499     for (i=0; i<am; i++){
4500       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4501     }
4502     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4503     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4504     k = 0;
4505     for (i=0; i<am; i++) {
4506       ncols_o = bi[i+1] - bi[i];
4507       ncols_d = ai[i+1] - ai[i];
4508       /* off-diagonal portion of A */
4509       for (jo=0; jo<ncols_o; jo++) {
4510         col = cmap[*bj];
4511         if (col >= cstart) break;
4512         cj[k]   = col; bj++;
4513         ca[k++] = *ba++;
4514       }
4515       /* diagonal portion of A */
4516       for (j=0; j<ncols_d; j++) {
4517         cj[k]   = cstart + *aj++;
4518         ca[k++] = *aa++;
4519       }
4520       /* off-diagonal portion of A */
4521       for (j=jo; j<ncols_o; j++) {
4522         cj[k]   = cmap[*bj++];
4523         ca[k++] = *ba++;
4524       }
4525     }
4526     /* put together the new matrix */
4527     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4528     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4529     /* Since these are PETSc arrays, change flags to free them as necessary. */
4530     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4531     mat->free_a  = PETSC_TRUE;
4532     mat->free_ij = PETSC_TRUE;
4533     mat->nonew   = 0;
4534   } else if (scall == MAT_REUSE_MATRIX){
4535     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4536     ci = mat->i; cj = mat->j; cam = mat->a;
4537     for (i=0; i<am; i++) {
4538       /* off-diagonal portion of A */
4539       ncols_o = bi[i+1] - bi[i];
4540       for (jo=0; jo<ncols_o; jo++) {
4541         col = cmap[*bj];
4542         if (col >= cstart) break;
4543         *cam++ = *ba++; bj++;
4544       }
4545       /* diagonal portion of A */
4546       ncols_d = ai[i+1] - ai[i];
4547       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4548       /* off-diagonal portion of A */
4549       for (j=jo; j<ncols_o; j++) {
4550         *cam++ = *ba++; bj++;
4551       }
4552     }
4553   } else {
4554     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4555   }
4556 
4557   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4558   PetscFunctionReturn(0);
4559 }
4560 
4561 #undef __FUNCT__
4562 #define __FUNCT__ "MatGetLocalMatCondensed"
4563 /*@C
4564      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4565 
4566     Not Collective
4567 
4568    Input Parameters:
4569 +    A - the matrix
4570 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4571 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4572 
4573    Output Parameter:
4574 .    A_loc - the local sequential matrix generated
4575 
4576     Level: developer
4577 
4578 @*/
4579 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4580 {
4581   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4582   PetscErrorCode    ierr;
4583   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4584   IS                isrowa,iscola;
4585   Mat               *aloc;
4586 
4587   PetscFunctionBegin;
4588   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4589   if (!row){
4590     start = A->rmap->rstart; end = A->rmap->rend;
4591     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4592   } else {
4593     isrowa = *row;
4594   }
4595   if (!col){
4596     start = A->cmap->rstart;
4597     cmap  = a->garray;
4598     nzA   = a->A->cmap->n;
4599     nzB   = a->B->cmap->n;
4600     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4601     ncols = 0;
4602     for (i=0; i<nzB; i++) {
4603       if (cmap[i] < start) idx[ncols++] = cmap[i];
4604       else break;
4605     }
4606     imark = i;
4607     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4608     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4609     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4610     ierr = PetscFree(idx);CHKERRQ(ierr);
4611   } else {
4612     iscola = *col;
4613   }
4614   if (scall != MAT_INITIAL_MATRIX){
4615     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4616     aloc[0] = *A_loc;
4617   }
4618   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4619   *A_loc = aloc[0];
4620   ierr = PetscFree(aloc);CHKERRQ(ierr);
4621   if (!row){
4622     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4623   }
4624   if (!col){
4625     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4626   }
4627   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4628   PetscFunctionReturn(0);
4629 }
4630 
4631 #undef __FUNCT__
4632 #define __FUNCT__ "MatGetBrowsOfAcols"
4633 /*@C
4634     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4635 
4636     Collective on Mat
4637 
4638    Input Parameters:
4639 +    A,B - the matrices in mpiaij format
4640 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4641 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4642 
4643    Output Parameter:
4644 +    rowb, colb - index sets of rows and columns of B to extract
4645 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4646 -    B_seq - the sequential matrix generated
4647 
4648     Level: developer
4649 
4650 @*/
4651 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4652 {
4653   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4654   PetscErrorCode    ierr;
4655   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4656   IS                isrowb,iscolb;
4657   Mat               *bseq;
4658 
4659   PetscFunctionBegin;
4660   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4661     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4662   }
4663   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4664 
4665   if (scall == MAT_INITIAL_MATRIX){
4666     start = A->cmap->rstart;
4667     cmap  = a->garray;
4668     nzA   = a->A->cmap->n;
4669     nzB   = a->B->cmap->n;
4670     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4671     ncols = 0;
4672     for (i=0; i<nzB; i++) {  /* row < local row index */
4673       if (cmap[i] < start) idx[ncols++] = cmap[i];
4674       else break;
4675     }
4676     imark = i;
4677     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4678     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4679     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4680     ierr = PetscFree(idx);CHKERRQ(ierr);
4681     *brstart = imark;
4682     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4683   } else {
4684     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4685     isrowb = *rowb; iscolb = *colb;
4686     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4687     bseq[0] = *B_seq;
4688   }
4689   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4690   *B_seq = bseq[0];
4691   ierr = PetscFree(bseq);CHKERRQ(ierr);
4692   if (!rowb){
4693     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4694   } else {
4695     *rowb = isrowb;
4696   }
4697   if (!colb){
4698     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4699   } else {
4700     *colb = iscolb;
4701   }
4702   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4703   PetscFunctionReturn(0);
4704 }
4705 
4706 #undef __FUNCT__
4707 #define __FUNCT__ "MatGetBrowsOfAoCols"
4708 /*@C
4709     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4710     of the OFF-DIAGONAL portion of local A
4711 
4712     Collective on Mat
4713 
4714    Input Parameters:
4715 +    A,B - the matrices in mpiaij format
4716 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4717 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4718 .    startsj_r - similar to startsj for receives
4719 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4720 
4721    Output Parameter:
4722 +    B_oth - the sequential matrix generated
4723 
4724     Level: developer
4725 
4726 @*/
4727 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4728 {
4729   VecScatter_MPI_General *gen_to,*gen_from;
4730   PetscErrorCode         ierr;
4731   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4732   Mat_SeqAIJ             *b_oth;
4733   VecScatter             ctx=a->Mvctx;
4734   MPI_Comm               comm=((PetscObject)ctx)->comm;
4735   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4736   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4737   PetscScalar            *rvalues,*svalues;
4738   MatScalar              *b_otha,*bufa,*bufA;
4739   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4740   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4741   MPI_Status             *sstatus,rstatus;
4742   PetscMPIInt            jj;
4743   PetscInt               *cols,sbs,rbs;
4744   PetscScalar            *vals;
4745 
4746   PetscFunctionBegin;
4747   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4748     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4749   }
4750   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4751   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4752 
4753   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4754   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4755   rvalues  = gen_from->values; /* holds the length of receiving row */
4756   svalues  = gen_to->values;   /* holds the length of sending row */
4757   nrecvs   = gen_from->n;
4758   nsends   = gen_to->n;
4759 
4760   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4761   srow     = gen_to->indices;   /* local row index to be sent */
4762   sstarts  = gen_to->starts;
4763   sprocs   = gen_to->procs;
4764   sstatus  = gen_to->sstatus;
4765   sbs      = gen_to->bs;
4766   rstarts  = gen_from->starts;
4767   rprocs   = gen_from->procs;
4768   rbs      = gen_from->bs;
4769 
4770   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4771   if (scall == MAT_INITIAL_MATRIX){
4772     /* i-array */
4773     /*---------*/
4774     /*  post receives */
4775     for (i=0; i<nrecvs; i++){
4776       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4777       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4778       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4779     }
4780 
4781     /* pack the outgoing message */
4782     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4783     sstartsj[0] = 0;  rstartsj[0] = 0;
4784     len = 0; /* total length of j or a array to be sent */
4785     k = 0;
4786     for (i=0; i<nsends; i++){
4787       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4788       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4789       for (j=0; j<nrows; j++) {
4790         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4791         for (l=0; l<sbs; l++){
4792           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4793           rowlen[j*sbs+l] = ncols;
4794           len += ncols;
4795           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4796         }
4797         k++;
4798       }
4799       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4800       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4801     }
4802     /* recvs and sends of i-array are completed */
4803     i = nrecvs;
4804     while (i--) {
4805       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4806     }
4807     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4808 
4809     /* allocate buffers for sending j and a arrays */
4810     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4811     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4812 
4813     /* create i-array of B_oth */
4814     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4815     b_othi[0] = 0;
4816     len = 0; /* total length of j or a array to be received */
4817     k = 0;
4818     for (i=0; i<nrecvs; i++){
4819       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4820       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4821       for (j=0; j<nrows; j++) {
4822         b_othi[k+1] = b_othi[k] + rowlen[j];
4823         len += rowlen[j]; k++;
4824       }
4825       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4826     }
4827 
4828     /* allocate space for j and a arrrays of B_oth */
4829     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4830     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4831 
4832     /* j-array */
4833     /*---------*/
4834     /*  post receives of j-array */
4835     for (i=0; i<nrecvs; i++){
4836       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4837       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4838     }
4839 
4840     /* pack the outgoing message j-array */
4841     k = 0;
4842     for (i=0; i<nsends; i++){
4843       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4844       bufJ = bufj+sstartsj[i];
4845       for (j=0; j<nrows; j++) {
4846         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4847         for (ll=0; ll<sbs; ll++){
4848           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4849           for (l=0; l<ncols; l++){
4850             *bufJ++ = cols[l];
4851           }
4852           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4853         }
4854       }
4855       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4856     }
4857 
4858     /* recvs and sends of j-array are completed */
4859     i = nrecvs;
4860     while (i--) {
4861       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4862     }
4863     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4864   } else if (scall == MAT_REUSE_MATRIX){
4865     sstartsj = *startsj;
4866     rstartsj = *startsj_r;
4867     bufa     = *bufa_ptr;
4868     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4869     b_otha   = b_oth->a;
4870   } else {
4871     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4872   }
4873 
4874   /* a-array */
4875   /*---------*/
4876   /*  post receives of a-array */
4877   for (i=0; i<nrecvs; i++){
4878     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4879     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4880   }
4881 
4882   /* pack the outgoing message a-array */
4883   k = 0;
4884   for (i=0; i<nsends; i++){
4885     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4886     bufA = bufa+sstartsj[i];
4887     for (j=0; j<nrows; j++) {
4888       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4889       for (ll=0; ll<sbs; ll++){
4890         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4891         for (l=0; l<ncols; l++){
4892           *bufA++ = vals[l];
4893         }
4894         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4895       }
4896     }
4897     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4898   }
4899   /* recvs and sends of a-array are completed */
4900   i = nrecvs;
4901   while (i--) {
4902     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4903   }
4904   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4905   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4906 
4907   if (scall == MAT_INITIAL_MATRIX){
4908     /* put together the new matrix */
4909     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4910 
4911     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4912     /* Since these are PETSc arrays, change flags to free them as necessary. */
4913     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4914     b_oth->free_a  = PETSC_TRUE;
4915     b_oth->free_ij = PETSC_TRUE;
4916     b_oth->nonew   = 0;
4917 
4918     ierr = PetscFree(bufj);CHKERRQ(ierr);
4919     if (!startsj || !bufa_ptr){
4920       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4921       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4922     } else {
4923       *startsj   = sstartsj;
4924       *startsj_r = rstartsj;
4925       *bufa_ptr  = bufa;
4926     }
4927   }
4928   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4929   PetscFunctionReturn(0);
4930 }
4931 
4932 #undef __FUNCT__
4933 #define __FUNCT__ "MatGetCommunicationStructs"
4934 /*@C
4935   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4936 
4937   Not Collective
4938 
4939   Input Parameters:
4940 . A - The matrix in mpiaij format
4941 
4942   Output Parameter:
4943 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4944 . colmap - A map from global column index to local index into lvec
4945 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4946 
4947   Level: developer
4948 
4949 @*/
4950 #if defined (PETSC_USE_CTABLE)
4951 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4952 #else
4953 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4954 #endif
4955 {
4956   Mat_MPIAIJ *a;
4957 
4958   PetscFunctionBegin;
4959   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4960   PetscValidPointer(lvec, 2);
4961   PetscValidPointer(colmap, 3);
4962   PetscValidPointer(multScatter, 4);
4963   a = (Mat_MPIAIJ *) A->data;
4964   if (lvec) *lvec = a->lvec;
4965   if (colmap) *colmap = a->colmap;
4966   if (multScatter) *multScatter = a->Mvctx;
4967   PetscFunctionReturn(0);
4968 }
4969 
4970 EXTERN_C_BEGIN
4971 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
4972 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
4973 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
4974 EXTERN_C_END
4975 
4976 #undef __FUNCT__
4977 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4978 /*
4979     Computes (B'*A')' since computing B*A directly is untenable
4980 
4981                n                       p                          p
4982         (              )       (              )         (                  )
4983       m (      A       )  *  n (       B      )   =   m (         C        )
4984         (              )       (              )         (                  )
4985 
4986 */
4987 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4988 {
4989   PetscErrorCode     ierr;
4990   Mat                At,Bt,Ct;
4991 
4992   PetscFunctionBegin;
4993   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4994   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4995   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4996   ierr = MatDestroy(At);CHKERRQ(ierr);
4997   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4998   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4999   ierr = MatDestroy(Ct);CHKERRQ(ierr);
5000   PetscFunctionReturn(0);
5001 }
5002 
5003 #undef __FUNCT__
5004 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5005 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5006 {
5007   PetscErrorCode ierr;
5008   PetscInt       m=A->rmap->n,n=B->cmap->n;
5009   Mat            Cmat;
5010 
5011   PetscFunctionBegin;
5012   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5013   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5014   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5015   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5016   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5017   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5018   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5019   *C   = Cmat;
5020   PetscFunctionReturn(0);
5021 }
5022 
5023 /* ----------------------------------------------------------------*/
5024 #undef __FUNCT__
5025 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5026 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5027 {
5028   PetscErrorCode ierr;
5029 
5030   PetscFunctionBegin;
5031   if (scall == MAT_INITIAL_MATRIX){
5032     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5033   }
5034   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5035   PetscFunctionReturn(0);
5036 }
5037 
5038 EXTERN_C_BEGIN
5039 #if defined(PETSC_HAVE_MUMPS)
5040 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5041 #endif
5042 #if defined(PETSC_HAVE_PASTIX)
5043 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5044 #endif
5045 #if defined(PETSC_HAVE_SUPERLU_DIST)
5046 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5047 #endif
5048 #if defined(PETSC_HAVE_SPOOLES)
5049 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5050 #endif
5051 EXTERN_C_END
5052 
5053 /*MC
5054    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5055 
5056    Options Database Keys:
5057 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5058 
5059   Level: beginner
5060 
5061 .seealso: MatCreateMPIAIJ()
5062 M*/
5063 
5064 EXTERN_C_BEGIN
5065 #undef __FUNCT__
5066 #define __FUNCT__ "MatCreate_MPIAIJ"
5067 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5068 {
5069   Mat_MPIAIJ     *b;
5070   PetscErrorCode ierr;
5071   PetscMPIInt    size;
5072 
5073   PetscFunctionBegin;
5074   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5075 
5076   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5077   B->data         = (void*)b;
5078   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5079   B->rmap->bs     = 1;
5080   B->assembled    = PETSC_FALSE;
5081   B->mapping      = 0;
5082 
5083   B->insertmode   = NOT_SET_VALUES;
5084   b->size         = size;
5085   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5086 
5087   /* build cache for off array entries formed */
5088   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5089   b->donotstash  = PETSC_FALSE;
5090   b->colmap      = 0;
5091   b->garray      = 0;
5092   b->roworiented = PETSC_TRUE;
5093 
5094   /* stuff used for matrix vector multiply */
5095   b->lvec      = PETSC_NULL;
5096   b->Mvctx     = PETSC_NULL;
5097 
5098   /* stuff for MatGetRow() */
5099   b->rowindices   = 0;
5100   b->rowvalues    = 0;
5101   b->getrowactive = PETSC_FALSE;
5102 
5103 #if defined(PETSC_HAVE_SPOOLES)
5104   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5105                                      "MatGetFactor_mpiaij_spooles",
5106                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5107 #endif
5108 #if defined(PETSC_HAVE_MUMPS)
5109   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5110                                      "MatGetFactor_aij_mumps",
5111                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5112 #endif
5113 #if defined(PETSC_HAVE_PASTIX)
5114   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5115 					   "MatGetFactor_mpiaij_pastix",
5116 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5117 #endif
5118 #if defined(PETSC_HAVE_SUPERLU_DIST)
5119   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5120                                      "MatGetFactor_mpiaij_superlu_dist",
5121                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5122 #endif
5123   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5124                                      "MatStoreValues_MPIAIJ",
5125                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5126   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5127                                      "MatRetrieveValues_MPIAIJ",
5128                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5129   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5130 				     "MatGetDiagonalBlock_MPIAIJ",
5131                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5132   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5133 				     "MatIsTranspose_MPIAIJ",
5134 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5135   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5136 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5137 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5138   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5139 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5140 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5141   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5142 				     "MatDiagonalScaleLocal_MPIAIJ",
5143 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5144   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5145                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5146                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5147   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5148                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5149                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5150   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5151                                      "MatConvert_MPIAIJ_MPISBAIJ",
5152                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5153   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5154                                      "MatMatMult_MPIDense_MPIAIJ",
5155                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5156   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5157                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5158                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5159   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5160                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5161                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5162   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5163   PetscFunctionReturn(0);
5164 }
5165 EXTERN_C_END
5166 
5167 #undef __FUNCT__
5168 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5169 /*@
5170      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5171          and "off-diagonal" part of the matrix in CSR format.
5172 
5173    Collective on MPI_Comm
5174 
5175    Input Parameters:
5176 +  comm - MPI communicator
5177 .  m - number of local rows (Cannot be PETSC_DECIDE)
5178 .  n - This value should be the same as the local size used in creating the
5179        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5180        calculated if N is given) For square matrices n is almost always m.
5181 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5182 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5183 .   i - row indices for "diagonal" portion of matrix
5184 .   j - column indices
5185 .   a - matrix values
5186 .   oi - row indices for "off-diagonal" portion of matrix
5187 .   oj - column indices
5188 -   oa - matrix values
5189 
5190    Output Parameter:
5191 .   mat - the matrix
5192 
5193    Level: advanced
5194 
5195    Notes:
5196        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5197 
5198        The i and j indices are 0 based
5199 
5200        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5201 
5202        This sets local rows and cannot be used to set off-processor values.
5203 
5204        You cannot later use MatSetValues() to change values in this matrix.
5205 
5206 .keywords: matrix, aij, compressed row, sparse, parallel
5207 
5208 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5209           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5210 @*/
5211 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5212 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5213 {
5214   PetscErrorCode ierr;
5215   Mat_MPIAIJ     *maij;
5216 
5217  PetscFunctionBegin;
5218   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5219   if (i[0]) {
5220     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5221   }
5222   if (oi[0]) {
5223     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5224   }
5225   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5226   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5227   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5228   maij = (Mat_MPIAIJ*) (*mat)->data;
5229   maij->donotstash     = PETSC_TRUE;
5230   (*mat)->preallocated = PETSC_TRUE;
5231 
5232   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5233   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5234   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5235   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5236 
5237   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5238   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5239 
5240   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5241   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5242   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5243   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5244 
5245   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5246   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5247   PetscFunctionReturn(0);
5248 }
5249 
5250 /*
5251     Special version for direct calls from Fortran
5252 */
5253 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5254 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5255 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5256 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5257 #endif
5258 
5259 /* Change these macros so can be used in void function */
5260 #undef CHKERRQ
5261 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5262 #undef SETERRQ2
5263 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5264 #undef SETERRQ
5265 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5266 
5267 EXTERN_C_BEGIN
5268 #undef __FUNCT__
5269 #define __FUNCT__ "matsetvaluesmpiaij_"
5270 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5271 {
5272   Mat             mat = *mmat;
5273   PetscInt        m = *mm, n = *mn;
5274   InsertMode      addv = *maddv;
5275   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5276   PetscScalar     value;
5277   PetscErrorCode  ierr;
5278 
5279   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5280   if (mat->insertmode == NOT_SET_VALUES) {
5281     mat->insertmode = addv;
5282   }
5283 #if defined(PETSC_USE_DEBUG)
5284   else if (mat->insertmode != addv) {
5285     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5286   }
5287 #endif
5288   {
5289   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5290   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5291   PetscTruth      roworiented = aij->roworiented;
5292 
5293   /* Some Variables required in the macro */
5294   Mat             A = aij->A;
5295   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5296   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5297   MatScalar       *aa = a->a;
5298   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5299   Mat             B = aij->B;
5300   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5301   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5302   MatScalar       *ba = b->a;
5303 
5304   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5305   PetscInt        nonew = a->nonew;
5306   MatScalar       *ap1,*ap2;
5307 
5308   PetscFunctionBegin;
5309   for (i=0; i<m; i++) {
5310     if (im[i] < 0) continue;
5311 #if defined(PETSC_USE_DEBUG)
5312     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5313 #endif
5314     if (im[i] >= rstart && im[i] < rend) {
5315       row      = im[i] - rstart;
5316       lastcol1 = -1;
5317       rp1      = aj + ai[row];
5318       ap1      = aa + ai[row];
5319       rmax1    = aimax[row];
5320       nrow1    = ailen[row];
5321       low1     = 0;
5322       high1    = nrow1;
5323       lastcol2 = -1;
5324       rp2      = bj + bi[row];
5325       ap2      = ba + bi[row];
5326       rmax2    = bimax[row];
5327       nrow2    = bilen[row];
5328       low2     = 0;
5329       high2    = nrow2;
5330 
5331       for (j=0; j<n; j++) {
5332         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5333         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5334         if (in[j] >= cstart && in[j] < cend){
5335           col = in[j] - cstart;
5336           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5337         } else if (in[j] < 0) continue;
5338 #if defined(PETSC_USE_DEBUG)
5339         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5340 #endif
5341         else {
5342           if (mat->was_assembled) {
5343             if (!aij->colmap) {
5344               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5345             }
5346 #if defined (PETSC_USE_CTABLE)
5347             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5348 	    col--;
5349 #else
5350             col = aij->colmap[in[j]] - 1;
5351 #endif
5352             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5353               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5354               col =  in[j];
5355               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5356               B = aij->B;
5357               b = (Mat_SeqAIJ*)B->data;
5358               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5359               rp2      = bj + bi[row];
5360               ap2      = ba + bi[row];
5361               rmax2    = bimax[row];
5362               nrow2    = bilen[row];
5363               low2     = 0;
5364               high2    = nrow2;
5365               bm       = aij->B->rmap->n;
5366               ba = b->a;
5367             }
5368           } else col = in[j];
5369           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5370         }
5371       }
5372     } else {
5373       if (!aij->donotstash) {
5374         if (roworiented) {
5375           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5376         } else {
5377           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5378         }
5379       }
5380     }
5381   }}
5382   PetscFunctionReturnVoid();
5383 }
5384 EXTERN_C_END
5385 
5386