xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision fac7e3cdc257fbfc3a17cb177c9fb3b514c77af2)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
207 { \
208     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
209     lastcol1 = col;\
210     while (high1-low1 > 5) { \
211       t = (low1+high1)/2; \
212       if (rp1[t] > col) high1 = t; \
213       else             low1  = t; \
214     } \
215       for (_i=low1; _i<high1; _i++) { \
216         if (rp1[_i] > col) break; \
217         if (rp1[_i] == col) { \
218           if (addv == ADD_VALUES) ap1[_i] += value;   \
219           else                    ap1[_i] = value; \
220           goto a_noinsert; \
221         } \
222       }  \
223       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
224       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
225       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
226       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
227       N = nrow1++ - 1; a->nz++; high1++; \
228       /* shift up all the later entries in this row */ \
229       for (ii=N; ii>=_i; ii--) { \
230         rp1[ii+1] = rp1[ii]; \
231         ap1[ii+1] = ap1[ii]; \
232       } \
233       rp1[_i] = col;  \
234       ap1[_i] = value;  \
235       a_noinsert: ; \
236       ailen[row] = nrow1; \
237 }
238 
239 
240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
241 { \
242     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
243     lastcol2 = col;\
244     while (high2-low2 > 5) { \
245       t = (low2+high2)/2; \
246       if (rp2[t] > col) high2 = t; \
247       else             low2  = t; \
248     } \
249     for (_i=low2; _i<high2; _i++) {		\
250       if (rp2[_i] > col) break;			\
251       if (rp2[_i] == col) {			      \
252 	if (addv == ADD_VALUES) ap2[_i] += value;     \
253 	else                    ap2[_i] = value;      \
254 	goto b_noinsert;			      \
255       }						      \
256     }							      \
257     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
258     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
259     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
260     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
261     N = nrow2++ - 1; b->nz++; high2++;					\
262     /* shift up all the later entries in this row */			\
263     for (ii=N; ii>=_i; ii--) {						\
264       rp2[ii+1] = rp2[ii];						\
265       ap2[ii+1] = ap2[ii];						\
266     }									\
267     rp2[_i] = col;							\
268     ap2[_i] = value;							\
269     b_noinsert: ;								\
270     bilen[row] = nrow2;							\
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
276 {
277   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
278   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
279   PetscErrorCode ierr;
280   PetscInt       l,*garray = mat->garray,diag;
281 
282   PetscFunctionBegin;
283   /* code only works for square matrices A */
284 
285   /* find size of row to the left of the diagonal part */
286   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
287   row  = row - diag;
288   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
289     if (garray[b->j[b->i[row]+l]] > diag) break;
290   }
291   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
292 
293   /* diagonal part */
294   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
295 
296   /* right of diagonal part */
297   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatSetValues_MPIAIJ"
303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscScalar    value;
307   PetscErrorCode ierr;
308   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
309   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
310   PetscTruth     roworiented = aij->roworiented;
311 
312   /* Some Variables required in the macro */
313   Mat            A = aij->A;
314   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
315   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
316   MatScalar      *aa = a->a;
317   PetscTruth     ignorezeroentries = a->ignorezeroentries;
318   Mat            B = aij->B;
319   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
320   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
321   MatScalar      *ba = b->a;
322 
323   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
324   PetscInt       nonew = a->nonew;
325   MatScalar      *ap1,*ap2;
326 
327   PetscFunctionBegin;
328   if (v) PetscValidScalarPointer(v,6);
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (!aij->donotstash) {
394         if (roworiented) {
395           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
396         } else {
397           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
398         }
399       }
400     }
401   }
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatGetValues_MPIAIJ"
407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
408 {
409   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
410   PetscErrorCode ierr;
411   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
412   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
413 
414   PetscFunctionBegin;
415   for (i=0; i<m; i++) {
416     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
417     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
418     if (idxm[i] >= rstart && idxm[i] < rend) {
419       row = idxm[i] - rstart;
420       for (j=0; j<n; j++) {
421         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
422         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
423         if (idxn[j] >= cstart && idxn[j] < cend){
424           col = idxn[j] - cstart;
425           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
426         } else {
427           if (!aij->colmap) {
428             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
429           }
430 #if defined (PETSC_USE_CTABLE)
431           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
432           col --;
433 #else
434           col = aij->colmap[idxn[j]] - 1;
435 #endif
436           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
437           else {
438             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
439           }
440         }
441       }
442     } else {
443       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
444     }
445   }
446   PetscFunctionReturn(0);
447 }
448 
449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
454 {
455   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
456   PetscErrorCode ierr;
457   PetscInt       nstash,reallocs;
458   InsertMode     addv;
459 
460   PetscFunctionBegin;
461   if (aij->donotstash) {
462     PetscFunctionReturn(0);
463   }
464 
465   /* make sure all processors are either in INSERTMODE or ADDMODE */
466   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
467   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
468   mat->insertmode = addv; /* in case this processor had no cache */
469 
470   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
471   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
472   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNCT__
477 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
478 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
479 {
480   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
481   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
482   PetscErrorCode ierr;
483   PetscMPIInt    n;
484   PetscInt       i,j,rstart,ncols,flg;
485   PetscInt       *row,*col;
486   PetscTruth     other_disassembled;
487   PetscScalar    *val;
488   InsertMode     addv = mat->insertmode;
489 
490   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
491   PetscFunctionBegin;
492   if (!aij->donotstash) {
493     while (1) {
494       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
495       if (!flg) break;
496 
497       for (i=0; i<n;) {
498         /* Now identify the consecutive vals belonging to the same row */
499         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
500         if (j < n) ncols = j-i;
501         else       ncols = n-i;
502         /* Now assemble all these values with a single function call */
503         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
504         i = j;
505       }
506     }
507     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
508   }
509   a->compressedrow.use     = PETSC_FALSE;
510   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
511   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
512 
513   /* determine if any processor has disassembled, if so we must
514      also disassemble ourselfs, in order that we may reassemble. */
515   /*
516      if nonzero structure of submatrix B cannot change then we know that
517      no processor disassembled thus we can skip this stuff
518   */
519   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
520     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
521     if (mat->was_assembled && !other_disassembled) {
522       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523     }
524   }
525   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
526     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
527   }
528   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
529   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
530   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
531   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
532 
533   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
534   aij->rowvalues = 0;
535 
536   /* used by MatAXPY() */
537   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
538   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
539 
540   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
541   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
542   PetscFunctionReturn(0);
543 }
544 
545 #undef __FUNCT__
546 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
547 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
548 {
549   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
550   PetscErrorCode ierr;
551 
552   PetscFunctionBegin;
553   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
554   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
555   PetscFunctionReturn(0);
556 }
557 
558 #undef __FUNCT__
559 #define __FUNCT__ "MatZeroRows_MPIAIJ"
560 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
561 {
562   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
563   PetscErrorCode ierr;
564   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
565   PetscInt       i,*owners = A->rmap->range;
566   PetscInt       *nprocs,j,idx,nsends,row;
567   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
568   PetscInt       *rvalues,count,base,slen,*source;
569   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
570   MPI_Comm       comm = ((PetscObject)A)->comm;
571   MPI_Request    *send_waits,*recv_waits;
572   MPI_Status     recv_status,*send_status;
573 #if defined(PETSC_DEBUG)
574   PetscTruth     found = PETSC_FALSE;
575 #endif
576 
577   PetscFunctionBegin;
578   /*  first count number of contributors to each processor */
579   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
580   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
581   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
582   j = 0;
583   for (i=0; i<N; i++) {
584     if (lastidx > (idx = rows[i])) j = 0;
585     lastidx = idx;
586     for (; j<size; j++) {
587       if (idx >= owners[j] && idx < owners[j+1]) {
588         nprocs[2*j]++;
589         nprocs[2*j+1] = 1;
590         owner[i] = j;
591 #if defined(PETSC_DEBUG)
592         found = PETSC_TRUE;
593 #endif
594         break;
595       }
596     }
597 #if defined(PETSC_DEBUG)
598     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
599     found = PETSC_FALSE;
600 #endif
601   }
602   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
603 
604   /* inform other processors of number of messages and max length*/
605   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
606 
607   /* post receives:   */
608   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
609   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
610   for (i=0; i<nrecvs; i++) {
611     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
612   }
613 
614   /* do sends:
615       1) starts[i] gives the starting index in svalues for stuff going to
616          the ith processor
617   */
618   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
619   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
620   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
621   starts[0] = 0;
622   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
623   for (i=0; i<N; i++) {
624     svalues[starts[owner[i]]++] = rows[i];
625   }
626 
627   starts[0] = 0;
628   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
629   count = 0;
630   for (i=0; i<size; i++) {
631     if (nprocs[2*i+1]) {
632       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
633     }
634   }
635   ierr = PetscFree(starts);CHKERRQ(ierr);
636 
637   base = owners[rank];
638 
639   /*  wait on receives */
640   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
641   count  = nrecvs; slen = 0;
642   while (count) {
643     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
644     /* unpack receives into our local space */
645     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
646     source[imdex]  = recv_status.MPI_SOURCE;
647     lens[imdex]    = n;
648     slen          += n;
649     count--;
650   }
651   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
652 
653   /* move the data into the send scatter */
654   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
655   count = 0;
656   for (i=0; i<nrecvs; i++) {
657     values = rvalues + i*nmax;
658     for (j=0; j<lens[i]; j++) {
659       lrows[count++] = values[j] - base;
660     }
661   }
662   ierr = PetscFree(rvalues);CHKERRQ(ierr);
663   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
664   ierr = PetscFree(owner);CHKERRQ(ierr);
665   ierr = PetscFree(nprocs);CHKERRQ(ierr);
666 
667   /* actually zap the local rows */
668   /*
669         Zero the required rows. If the "diagonal block" of the matrix
670      is square and the user wishes to set the diagonal we use separate
671      code so that MatSetValues() is not called for each diagonal allocating
672      new memory, thus calling lots of mallocs and slowing things down.
673 
674   */
675   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
676   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
677   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
678     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
679   } else if (diag != 0.0) {
680     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
681     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
682       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
683 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
684     }
685     for (i = 0; i < slen; i++) {
686       row  = lrows[i] + rstart;
687       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
688     }
689     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
690     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
691   } else {
692     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
693   }
694   ierr = PetscFree(lrows);CHKERRQ(ierr);
695 
696   /* wait on sends */
697   if (nsends) {
698     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
699     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
700     ierr = PetscFree(send_status);CHKERRQ(ierr);
701   }
702   ierr = PetscFree(send_waits);CHKERRQ(ierr);
703   ierr = PetscFree(svalues);CHKERRQ(ierr);
704 
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatMult_MPIAIJ"
710 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
711 {
712   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
713   PetscErrorCode ierr;
714   PetscInt       nt;
715 
716   PetscFunctionBegin;
717   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
718   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
719   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
720   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
721   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
722   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
723   PetscFunctionReturn(0);
724 }
725 
726 #undef __FUNCT__
727 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
728 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
729 {
730   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
731   PetscErrorCode ierr;
732 
733   PetscFunctionBegin;
734   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
735   PetscFunctionReturn(0);
736 }
737 
738 #undef __FUNCT__
739 #define __FUNCT__ "MatMultAdd_MPIAIJ"
740 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
741 {
742   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
743   PetscErrorCode ierr;
744 
745   PetscFunctionBegin;
746   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
747   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
748   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
749   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
750   PetscFunctionReturn(0);
751 }
752 
753 #undef __FUNCT__
754 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
755 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
756 {
757   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
758   PetscErrorCode ierr;
759   PetscTruth     merged;
760 
761   PetscFunctionBegin;
762   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
763   /* do nondiagonal part */
764   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
765   if (!merged) {
766     /* send it on its way */
767     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
768     /* do local part */
769     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
770     /* receive remote parts: note this assumes the values are not actually */
771     /* added in yy until the next line, */
772     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773   } else {
774     /* do local part */
775     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
776     /* send it on its way */
777     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
778     /* values actually were received in the Begin() but we need to call this nop */
779     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
780   }
781   PetscFunctionReturn(0);
782 }
783 
784 EXTERN_C_BEGIN
785 #undef __FUNCT__
786 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
787 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
788 {
789   MPI_Comm       comm;
790   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
791   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
792   IS             Me,Notme;
793   PetscErrorCode ierr;
794   PetscInt       M,N,first,last,*notme,i;
795   PetscMPIInt    size;
796 
797   PetscFunctionBegin;
798 
799   /* Easy test: symmetric diagonal block */
800   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
801   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
802   if (!*f) PetscFunctionReturn(0);
803   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
804   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
805   if (size == 1) PetscFunctionReturn(0);
806 
807   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
808   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
809   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
810   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
811   for (i=0; i<first; i++) notme[i] = i;
812   for (i=last; i<M; i++) notme[i-last+first] = i;
813   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
814   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
815   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
816   Aoff = Aoffs[0];
817   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
818   Boff = Boffs[0];
819   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
820   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
821   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
822   ierr = ISDestroy(Me);CHKERRQ(ierr);
823   ierr = ISDestroy(Notme);CHKERRQ(ierr);
824 
825   PetscFunctionReturn(0);
826 }
827 EXTERN_C_END
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
831 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
832 {
833   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
834   PetscErrorCode ierr;
835 
836   PetscFunctionBegin;
837   /* do nondiagonal part */
838   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
839   /* send it on its way */
840   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
841   /* do local part */
842   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
843   /* receive remote parts */
844   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
845   PetscFunctionReturn(0);
846 }
847 
848 /*
849   This only works correctly for square matrices where the subblock A->A is the
850    diagonal block
851 */
852 #undef __FUNCT__
853 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
854 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
855 {
856   PetscErrorCode ierr;
857   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
858 
859   PetscFunctionBegin;
860   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
861   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
862   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatScale_MPIAIJ"
868 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
869 {
870   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
871   PetscErrorCode ierr;
872 
873   PetscFunctionBegin;
874   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
875   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
876   PetscFunctionReturn(0);
877 }
878 
879 #undef __FUNCT__
880 #define __FUNCT__ "MatDestroy_MPIAIJ"
881 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
882 {
883   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
884   PetscErrorCode ierr;
885 
886   PetscFunctionBegin;
887 #if defined(PETSC_USE_LOG)
888   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
889 #endif
890   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
891   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
892   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
893   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
894 #if defined (PETSC_USE_CTABLE)
895   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
896 #else
897   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
898 #endif
899   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
900   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
901   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
902   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
903   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
904   ierr = PetscFree(aij);CHKERRQ(ierr);
905 
906   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
908   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
909   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
910   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
911   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
912   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
913   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
915   PetscFunctionReturn(0);
916 }
917 
918 #undef __FUNCT__
919 #define __FUNCT__ "MatView_MPIAIJ_Binary"
920 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
921 {
922   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
923   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
924   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
925   PetscErrorCode    ierr;
926   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
927   int               fd;
928   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
929   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
930   PetscScalar       *column_values;
931 
932   PetscFunctionBegin;
933   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
934   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
935   nz   = A->nz + B->nz;
936   if (!rank) {
937     header[0] = MAT_FILE_CLASSID;
938     header[1] = mat->rmap->N;
939     header[2] = mat->cmap->N;
940     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
941     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
942     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
943     /* get largest number of rows any processor has */
944     rlen = mat->rmap->n;
945     range = mat->rmap->range;
946     for (i=1; i<size; i++) {
947       rlen = PetscMax(rlen,range[i+1] - range[i]);
948     }
949   } else {
950     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
951     rlen = mat->rmap->n;
952   }
953 
954   /* load up the local row counts */
955   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
956   for (i=0; i<mat->rmap->n; i++) {
957     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
958   }
959 
960   /* store the row lengths to the file */
961   if (!rank) {
962     MPI_Status status;
963     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
964     for (i=1; i<size; i++) {
965       rlen = range[i+1] - range[i];
966       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
967       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
968     }
969   } else {
970     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
971   }
972   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
973 
974   /* load up the local column indices */
975   nzmax = nz; /* )th processor needs space a largest processor needs */
976   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
977   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
978   cnt  = 0;
979   for (i=0; i<mat->rmap->n; i++) {
980     for (j=B->i[i]; j<B->i[i+1]; j++) {
981       if ( (col = garray[B->j[j]]) > cstart) break;
982       column_indices[cnt++] = col;
983     }
984     for (k=A->i[i]; k<A->i[i+1]; k++) {
985       column_indices[cnt++] = A->j[k] + cstart;
986     }
987     for (; j<B->i[i+1]; j++) {
988       column_indices[cnt++] = garray[B->j[j]];
989     }
990   }
991   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
992 
993   /* store the column indices to the file */
994   if (!rank) {
995     MPI_Status status;
996     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
997     for (i=1; i<size; i++) {
998       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
999       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1000       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1001       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1002     }
1003   } else {
1004     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1005     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1006   }
1007   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1008 
1009   /* load up the local column values */
1010   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1011   cnt  = 0;
1012   for (i=0; i<mat->rmap->n; i++) {
1013     for (j=B->i[i]; j<B->i[i+1]; j++) {
1014       if ( garray[B->j[j]] > cstart) break;
1015       column_values[cnt++] = B->a[j];
1016     }
1017     for (k=A->i[i]; k<A->i[i+1]; k++) {
1018       column_values[cnt++] = A->a[k];
1019     }
1020     for (; j<B->i[i+1]; j++) {
1021       column_values[cnt++] = B->a[j];
1022     }
1023   }
1024   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1025 
1026   /* store the column values to the file */
1027   if (!rank) {
1028     MPI_Status status;
1029     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1030     for (i=1; i<size; i++) {
1031       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1032       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1033       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1034       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1035     }
1036   } else {
1037     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1038     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1039   }
1040   ierr = PetscFree(column_values);CHKERRQ(ierr);
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 #undef __FUNCT__
1045 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1046 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1047 {
1048   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1049   PetscErrorCode    ierr;
1050   PetscMPIInt       rank = aij->rank,size = aij->size;
1051   PetscTruth        isdraw,iascii,isbinary;
1052   PetscViewer       sviewer;
1053   PetscViewerFormat format;
1054 
1055   PetscFunctionBegin;
1056   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1057   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1058   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1059   if (iascii) {
1060     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1061     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1062       MatInfo    info;
1063       PetscTruth inodes;
1064 
1065       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1066       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1067       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1068       if (!inodes) {
1069         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1070 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1071       } else {
1072         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1073 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1074       }
1075       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1076       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1077       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1078       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1079       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1080       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1081       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1082       PetscFunctionReturn(0);
1083     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1084       PetscInt   inodecount,inodelimit,*inodes;
1085       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1086       if (inodes) {
1087         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1088       } else {
1089         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1090       }
1091       PetscFunctionReturn(0);
1092     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1093       PetscFunctionReturn(0);
1094     }
1095   } else if (isbinary) {
1096     if (size == 1) {
1097       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1098       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1099     } else {
1100       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1101     }
1102     PetscFunctionReturn(0);
1103   } else if (isdraw) {
1104     PetscDraw  draw;
1105     PetscTruth isnull;
1106     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1107     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1108   }
1109 
1110   if (size == 1) {
1111     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1112     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1113   } else {
1114     /* assemble the entire matrix onto first processor. */
1115     Mat         A;
1116     Mat_SeqAIJ  *Aloc;
1117     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1118     MatScalar   *a;
1119 
1120     if (mat->rmap->N > 1024) {
1121       PetscTruth flg = PETSC_FALSE;
1122 
1123       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1124       if (!flg) {
1125         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1126       }
1127     }
1128 
1129     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1130     if (!rank) {
1131       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1132     } else {
1133       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1134     }
1135     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1136     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1137     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1138     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1139 
1140     /* copy over the A part */
1141     Aloc = (Mat_SeqAIJ*)aij->A->data;
1142     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1143     row = mat->rmap->rstart;
1144     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1145     for (i=0; i<m; i++) {
1146       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1147       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1148     }
1149     aj = Aloc->j;
1150     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1151 
1152     /* copy over the B part */
1153     Aloc = (Mat_SeqAIJ*)aij->B->data;
1154     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1155     row  = mat->rmap->rstart;
1156     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1157     ct   = cols;
1158     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1159     for (i=0; i<m; i++) {
1160       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1161       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1162     }
1163     ierr = PetscFree(ct);CHKERRQ(ierr);
1164     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1165     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1166     /*
1167        Everyone has to call to draw the matrix since the graphics waits are
1168        synchronized across all processors that share the PetscDraw object
1169     */
1170     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1171     if (!rank) {
1172       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1173       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1174     }
1175     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1176     ierr = MatDestroy(A);CHKERRQ(ierr);
1177   }
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 #undef __FUNCT__
1182 #define __FUNCT__ "MatView_MPIAIJ"
1183 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1184 {
1185   PetscErrorCode ierr;
1186   PetscTruth     iascii,isdraw,issocket,isbinary;
1187 
1188   PetscFunctionBegin;
1189   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1190   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1191   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1192   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1193   if (iascii || isdraw || isbinary || issocket) {
1194     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1195   } else {
1196     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1197   }
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatSOR_MPIAIJ"
1203 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1204 {
1205   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1206   PetscErrorCode ierr;
1207   Vec            bb1 = 0;
1208   PetscTruth     hasop;
1209 
1210   PetscFunctionBegin;
1211   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1212     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1213   }
1214 
1215   if (flag == SOR_APPLY_UPPER) {
1216     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1217     PetscFunctionReturn(0);
1218   }
1219 
1220   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1221     if (flag & SOR_ZERO_INITIAL_GUESS) {
1222       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1223       its--;
1224     }
1225 
1226     while (its--) {
1227       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1228       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1229 
1230       /* update rhs: bb1 = bb - B*x */
1231       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1232       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1233 
1234       /* local sweep */
1235       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1236     }
1237   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1238     if (flag & SOR_ZERO_INITIAL_GUESS) {
1239       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1240       its--;
1241     }
1242     while (its--) {
1243       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1244       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245 
1246       /* update rhs: bb1 = bb - B*x */
1247       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1248       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1249 
1250       /* local sweep */
1251       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1252     }
1253   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1254     if (flag & SOR_ZERO_INITIAL_GUESS) {
1255       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1256       its--;
1257     }
1258     while (its--) {
1259       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1260       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1261 
1262       /* update rhs: bb1 = bb - B*x */
1263       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1264       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1265 
1266       /* local sweep */
1267       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1268     }
1269   }  else if (flag & SOR_EISENSTAT) {
1270     Vec         xx1;
1271 
1272     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1273     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1274 
1275     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1276     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1277     if (!mat->diag) {
1278       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1279       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1280     }
1281     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1282     if (hasop) {
1283       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1284     } else {
1285       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1286     }
1287     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1288 
1289     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1290 
1291     /* local sweep */
1292     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1293     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1294     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1295   } else {
1296     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1297   }
1298 
1299   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatPermute_MPIAIJ"
1305 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1306 {
1307   MPI_Comm       comm,pcomm;
1308   PetscInt       first,local_size,nrows;
1309   const PetscInt *rows;
1310   PetscMPIInt    size;
1311   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1312   PetscErrorCode ierr;
1313 
1314   PetscFunctionBegin;
1315   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1316   /* make a collective version of 'rowp' */
1317   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1318   if (pcomm==comm) {
1319     crowp = rowp;
1320   } else {
1321     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1322     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1323     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1324     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1325   }
1326   /* collect the global row permutation and invert it */
1327   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1328   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1329   if (pcomm!=comm) {
1330     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1331   }
1332   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1333   /* get the local target indices */
1334   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1335   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1336   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1337   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1338   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1339   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1340   /* the column permutation is so much easier;
1341      make a local version of 'colp' and invert it */
1342   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1343   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1344   if (size==1) {
1345     lcolp = colp;
1346   } else {
1347     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1348     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1349     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1350   }
1351   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1352   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1353   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1354   if (size>1) {
1355     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1356     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1357   }
1358   /* now we just get the submatrix */
1359   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1360   /* clean up */
1361   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1362   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1368 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1369 {
1370   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1371   Mat            A = mat->A,B = mat->B;
1372   PetscErrorCode ierr;
1373   PetscReal      isend[5],irecv[5];
1374 
1375   PetscFunctionBegin;
1376   info->block_size     = 1.0;
1377   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1378   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1379   isend[3] = info->memory;  isend[4] = info->mallocs;
1380   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1381   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1382   isend[3] += info->memory;  isend[4] += info->mallocs;
1383   if (flag == MAT_LOCAL) {
1384     info->nz_used      = isend[0];
1385     info->nz_allocated = isend[1];
1386     info->nz_unneeded  = isend[2];
1387     info->memory       = isend[3];
1388     info->mallocs      = isend[4];
1389   } else if (flag == MAT_GLOBAL_MAX) {
1390     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1391     info->nz_used      = irecv[0];
1392     info->nz_allocated = irecv[1];
1393     info->nz_unneeded  = irecv[2];
1394     info->memory       = irecv[3];
1395     info->mallocs      = irecv[4];
1396   } else if (flag == MAT_GLOBAL_SUM) {
1397     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1398     info->nz_used      = irecv[0];
1399     info->nz_allocated = irecv[1];
1400     info->nz_unneeded  = irecv[2];
1401     info->memory       = irecv[3];
1402     info->mallocs      = irecv[4];
1403   }
1404   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1405   info->fill_ratio_needed = 0;
1406   info->factor_mallocs    = 0;
1407 
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatSetOption_MPIAIJ"
1413 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1414 {
1415   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1416   PetscErrorCode ierr;
1417 
1418   PetscFunctionBegin;
1419   switch (op) {
1420   case MAT_NEW_NONZERO_LOCATIONS:
1421   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1422   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1423   case MAT_KEEP_NONZERO_PATTERN:
1424   case MAT_NEW_NONZERO_LOCATION_ERR:
1425   case MAT_USE_INODES:
1426   case MAT_IGNORE_ZERO_ENTRIES:
1427     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1428     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1429     break;
1430   case MAT_ROW_ORIENTED:
1431     a->roworiented = flg;
1432     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1433     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1434     break;
1435   case MAT_NEW_DIAGONALS:
1436     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1437     break;
1438   case MAT_IGNORE_OFF_PROC_ENTRIES:
1439     a->donotstash = PETSC_TRUE;
1440     break;
1441   case MAT_SYMMETRIC:
1442     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1443     break;
1444   case MAT_STRUCTURALLY_SYMMETRIC:
1445     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1446     break;
1447   case MAT_HERMITIAN:
1448     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1449     break;
1450   case MAT_SYMMETRY_ETERNAL:
1451     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1452     break;
1453   default:
1454     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1455   }
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatGetRow_MPIAIJ"
1461 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1462 {
1463   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1464   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1465   PetscErrorCode ierr;
1466   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1467   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1468   PetscInt       *cmap,*idx_p;
1469 
1470   PetscFunctionBegin;
1471   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1472   mat->getrowactive = PETSC_TRUE;
1473 
1474   if (!mat->rowvalues && (idx || v)) {
1475     /*
1476         allocate enough space to hold information from the longest row.
1477     */
1478     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1479     PetscInt   max = 1,tmp;
1480     for (i=0; i<matin->rmap->n; i++) {
1481       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1482       if (max < tmp) { max = tmp; }
1483     }
1484     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1485   }
1486 
1487   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1488   lrow = row - rstart;
1489 
1490   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1491   if (!v)   {pvA = 0; pvB = 0;}
1492   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1493   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1494   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1495   nztot = nzA + nzB;
1496 
1497   cmap  = mat->garray;
1498   if (v  || idx) {
1499     if (nztot) {
1500       /* Sort by increasing column numbers, assuming A and B already sorted */
1501       PetscInt imark = -1;
1502       if (v) {
1503         *v = v_p = mat->rowvalues;
1504         for (i=0; i<nzB; i++) {
1505           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1506           else break;
1507         }
1508         imark = i;
1509         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1510         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1511       }
1512       if (idx) {
1513         *idx = idx_p = mat->rowindices;
1514         if (imark > -1) {
1515           for (i=0; i<imark; i++) {
1516             idx_p[i] = cmap[cworkB[i]];
1517           }
1518         } else {
1519           for (i=0; i<nzB; i++) {
1520             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1521             else break;
1522           }
1523           imark = i;
1524         }
1525         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1526         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1527       }
1528     } else {
1529       if (idx) *idx = 0;
1530       if (v)   *v   = 0;
1531     }
1532   }
1533   *nz = nztot;
1534   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1535   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 #undef __FUNCT__
1540 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1541 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1542 {
1543   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1544 
1545   PetscFunctionBegin;
1546   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1547   aij->getrowactive = PETSC_FALSE;
1548   PetscFunctionReturn(0);
1549 }
1550 
1551 #undef __FUNCT__
1552 #define __FUNCT__ "MatNorm_MPIAIJ"
1553 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1554 {
1555   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1556   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1557   PetscErrorCode ierr;
1558   PetscInt       i,j,cstart = mat->cmap->rstart;
1559   PetscReal      sum = 0.0;
1560   MatScalar      *v;
1561 
1562   PetscFunctionBegin;
1563   if (aij->size == 1) {
1564     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1565   } else {
1566     if (type == NORM_FROBENIUS) {
1567       v = amat->a;
1568       for (i=0; i<amat->nz; i++) {
1569 #if defined(PETSC_USE_COMPLEX)
1570         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1571 #else
1572         sum += (*v)*(*v); v++;
1573 #endif
1574       }
1575       v = bmat->a;
1576       for (i=0; i<bmat->nz; i++) {
1577 #if defined(PETSC_USE_COMPLEX)
1578         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1579 #else
1580         sum += (*v)*(*v); v++;
1581 #endif
1582       }
1583       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1584       *norm = sqrt(*norm);
1585     } else if (type == NORM_1) { /* max column norm */
1586       PetscReal *tmp,*tmp2;
1587       PetscInt  *jj,*garray = aij->garray;
1588       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1589       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1590       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1591       *norm = 0.0;
1592       v = amat->a; jj = amat->j;
1593       for (j=0; j<amat->nz; j++) {
1594         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1595       }
1596       v = bmat->a; jj = bmat->j;
1597       for (j=0; j<bmat->nz; j++) {
1598         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1599       }
1600       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1601       for (j=0; j<mat->cmap->N; j++) {
1602         if (tmp2[j] > *norm) *norm = tmp2[j];
1603       }
1604       ierr = PetscFree(tmp);CHKERRQ(ierr);
1605       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1606     } else if (type == NORM_INFINITY) { /* max row norm */
1607       PetscReal ntemp = 0.0;
1608       for (j=0; j<aij->A->rmap->n; j++) {
1609         v = amat->a + amat->i[j];
1610         sum = 0.0;
1611         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1612           sum += PetscAbsScalar(*v); v++;
1613         }
1614         v = bmat->a + bmat->i[j];
1615         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1616           sum += PetscAbsScalar(*v); v++;
1617         }
1618         if (sum > ntemp) ntemp = sum;
1619       }
1620       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1621     } else {
1622       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1623     }
1624   }
1625   PetscFunctionReturn(0);
1626 }
1627 
1628 #undef __FUNCT__
1629 #define __FUNCT__ "MatTranspose_MPIAIJ"
1630 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1631 {
1632   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1633   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1634   PetscErrorCode ierr;
1635   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1636   PetscInt       cstart=A->cmap->rstart,ncol;
1637   Mat            B;
1638   MatScalar      *array;
1639 
1640   PetscFunctionBegin;
1641   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1642 
1643   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1644   ai = Aloc->i; aj = Aloc->j;
1645   bi = Bloc->i; bj = Bloc->j;
1646   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1647     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1648     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1649     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1650     for (i=0; i<ai[ma]; i++){
1651       d_nnz[aj[i]] ++;
1652       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1653     }
1654 
1655     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1656     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1657     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1658     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1659     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1660   } else {
1661     B = *matout;
1662   }
1663 
1664   /* copy over the A part */
1665   array = Aloc->a;
1666   row = A->rmap->rstart;
1667   for (i=0; i<ma; i++) {
1668     ncol = ai[i+1]-ai[i];
1669     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1670     row++; array += ncol; aj += ncol;
1671   }
1672   aj = Aloc->j;
1673   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1674 
1675   /* copy over the B part */
1676   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1677   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1678   array = Bloc->a;
1679   row = A->rmap->rstart;
1680   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1681   cols_tmp = cols;
1682   for (i=0; i<mb; i++) {
1683     ncol = bi[i+1]-bi[i];
1684     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1685     row++; array += ncol; cols_tmp += ncol;
1686   }
1687   ierr = PetscFree(cols);CHKERRQ(ierr);
1688 
1689   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1690   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1691   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1692     *matout = B;
1693   } else {
1694     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1695   }
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 #undef __FUNCT__
1700 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1701 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1702 {
1703   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1704   Mat            a = aij->A,b = aij->B;
1705   PetscErrorCode ierr;
1706   PetscInt       s1,s2,s3;
1707 
1708   PetscFunctionBegin;
1709   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1710   if (rr) {
1711     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1712     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1713     /* Overlap communication with computation. */
1714     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1715   }
1716   if (ll) {
1717     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1718     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1719     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1720   }
1721   /* scale  the diagonal block */
1722   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1723 
1724   if (rr) {
1725     /* Do a scatter end and then right scale the off-diagonal block */
1726     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1727     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1728   }
1729 
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 #undef __FUNCT__
1734 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1735 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1736 {
1737   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1738   PetscErrorCode ierr;
1739 
1740   PetscFunctionBegin;
1741   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1742   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1743   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1744   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1745   PetscFunctionReturn(0);
1746 }
1747 #undef __FUNCT__
1748 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1749 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1750 {
1751   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1752   PetscErrorCode ierr;
1753 
1754   PetscFunctionBegin;
1755   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 #undef __FUNCT__
1760 #define __FUNCT__ "MatEqual_MPIAIJ"
1761 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1762 {
1763   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1764   Mat            a,b,c,d;
1765   PetscTruth     flg;
1766   PetscErrorCode ierr;
1767 
1768   PetscFunctionBegin;
1769   a = matA->A; b = matA->B;
1770   c = matB->A; d = matB->B;
1771 
1772   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1773   if (flg) {
1774     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1775   }
1776   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 #undef __FUNCT__
1781 #define __FUNCT__ "MatCopy_MPIAIJ"
1782 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1783 {
1784   PetscErrorCode ierr;
1785   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1786   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1787 
1788   PetscFunctionBegin;
1789   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1790   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1791     /* because of the column compression in the off-processor part of the matrix a->B,
1792        the number of columns in a->B and b->B may be different, hence we cannot call
1793        the MatCopy() directly on the two parts. If need be, we can provide a more
1794        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1795        then copying the submatrices */
1796     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1797   } else {
1798     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1799     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1800   }
1801   PetscFunctionReturn(0);
1802 }
1803 
1804 #undef __FUNCT__
1805 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1806 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1807 {
1808   PetscErrorCode ierr;
1809 
1810   PetscFunctionBegin;
1811   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1812   PetscFunctionReturn(0);
1813 }
1814 
1815 #undef __FUNCT__
1816 #define __FUNCT__ "MatAXPY_MPIAIJ"
1817 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1818 {
1819   PetscErrorCode ierr;
1820   PetscInt       i;
1821   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1822   PetscBLASInt   bnz,one=1;
1823   Mat_SeqAIJ     *x,*y;
1824 
1825   PetscFunctionBegin;
1826   if (str == SAME_NONZERO_PATTERN) {
1827     PetscScalar alpha = a;
1828     x = (Mat_SeqAIJ *)xx->A->data;
1829     y = (Mat_SeqAIJ *)yy->A->data;
1830     bnz = PetscBLASIntCast(x->nz);
1831     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1832     x = (Mat_SeqAIJ *)xx->B->data;
1833     y = (Mat_SeqAIJ *)yy->B->data;
1834     bnz = PetscBLASIntCast(x->nz);
1835     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1836   } else if (str == SUBSET_NONZERO_PATTERN) {
1837     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1838 
1839     x = (Mat_SeqAIJ *)xx->B->data;
1840     y = (Mat_SeqAIJ *)yy->B->data;
1841     if (y->xtoy && y->XtoY != xx->B) {
1842       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1843       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1844     }
1845     if (!y->xtoy) { /* get xtoy */
1846       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1847       y->XtoY = xx->B;
1848       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1849     }
1850     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1851   } else {
1852     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1853   }
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1858 
1859 #undef __FUNCT__
1860 #define __FUNCT__ "MatConjugate_MPIAIJ"
1861 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1862 {
1863 #if defined(PETSC_USE_COMPLEX)
1864   PetscErrorCode ierr;
1865   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1866 
1867   PetscFunctionBegin;
1868   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1869   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1870 #else
1871   PetscFunctionBegin;
1872 #endif
1873   PetscFunctionReturn(0);
1874 }
1875 
1876 #undef __FUNCT__
1877 #define __FUNCT__ "MatRealPart_MPIAIJ"
1878 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1879 {
1880   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1881   PetscErrorCode ierr;
1882 
1883   PetscFunctionBegin;
1884   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1885   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1886   PetscFunctionReturn(0);
1887 }
1888 
1889 #undef __FUNCT__
1890 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1891 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1892 {
1893   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1894   PetscErrorCode ierr;
1895 
1896   PetscFunctionBegin;
1897   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1898   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 #ifdef PETSC_HAVE_PBGL
1903 
1904 #include <boost/parallel/mpi/bsp_process_group.hpp>
1905 #include <boost/graph/distributed/ilu_default_graph.hpp>
1906 #include <boost/graph/distributed/ilu_0_block.hpp>
1907 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1908 #include <boost/graph/distributed/petsc/interface.hpp>
1909 #include <boost/multi_array.hpp>
1910 #include <boost/parallel/distributed_property_map->hpp>
1911 
1912 #undef __FUNCT__
1913 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1914 /*
1915   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1916 */
1917 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1918 {
1919   namespace petsc = boost::distributed::petsc;
1920 
1921   namespace graph_dist = boost::graph::distributed;
1922   using boost::graph::distributed::ilu_default::process_group_type;
1923   using boost::graph::ilu_permuted;
1924 
1925   PetscTruth      row_identity, col_identity;
1926   PetscContainer  c;
1927   PetscInt        m, n, M, N;
1928   PetscErrorCode  ierr;
1929 
1930   PetscFunctionBegin;
1931   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1932   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1933   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1934   if (!row_identity || !col_identity) {
1935     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1936   }
1937 
1938   process_group_type pg;
1939   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1940   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1941   lgraph_type&   level_graph = *lgraph_p;
1942   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1943 
1944   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1945   ilu_permuted(level_graph);
1946 
1947   /* put together the new matrix */
1948   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1949   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1950   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1951   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1952   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1953   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1954   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1955 
1956   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1957   ierr = PetscContainerSetPointer(c, lgraph_p);
1958   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1964 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1965 {
1966   PetscFunctionBegin;
1967   PetscFunctionReturn(0);
1968 }
1969 
1970 #undef __FUNCT__
1971 #define __FUNCT__ "MatSolve_MPIAIJ"
1972 /*
1973   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1974 */
1975 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1976 {
1977   namespace graph_dist = boost::graph::distributed;
1978 
1979   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1980   lgraph_type*   lgraph_p;
1981   PetscContainer c;
1982   PetscErrorCode ierr;
1983 
1984   PetscFunctionBegin;
1985   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1986   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1987   ierr = VecCopy(b, x);CHKERRQ(ierr);
1988 
1989   PetscScalar* array_x;
1990   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1991   PetscInt sx;
1992   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1993 
1994   PetscScalar* array_b;
1995   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1996   PetscInt sb;
1997   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1998 
1999   lgraph_type&   level_graph = *lgraph_p;
2000   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2001 
2002   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2003   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2004                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2005 
2006   typedef boost::iterator_property_map<array_ref_type::iterator,
2007                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2008   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2009                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2010 
2011   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2012 
2013   PetscFunctionReturn(0);
2014 }
2015 #endif
2016 
2017 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2018   PetscInt       nzlocal,nsends,nrecvs;
2019   PetscMPIInt    *send_rank,*recv_rank;
2020   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2021   PetscScalar    *sbuf_a,**rbuf_a;
2022   PetscErrorCode (*MatDestroy)(Mat);
2023 } Mat_Redundant;
2024 
2025 #undef __FUNCT__
2026 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2027 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2028 {
2029   PetscErrorCode       ierr;
2030   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2031   PetscInt             i;
2032 
2033   PetscFunctionBegin;
2034   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2035   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2036   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2037   for (i=0; i<redund->nrecvs; i++){
2038     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2039     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2040   }
2041   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2042   ierr = PetscFree(redund);CHKERRQ(ierr);
2043   PetscFunctionReturn(0);
2044 }
2045 
2046 #undef __FUNCT__
2047 #define __FUNCT__ "MatDestroy_MatRedundant"
2048 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2049 {
2050   PetscErrorCode  ierr;
2051   PetscContainer  container;
2052   Mat_Redundant   *redund=PETSC_NULL;
2053 
2054   PetscFunctionBegin;
2055   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2056   if (container) {
2057     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2058   } else {
2059     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2060   }
2061   A->ops->destroy = redund->MatDestroy;
2062   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2063   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2064   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 #undef __FUNCT__
2069 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2070 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2071 {
2072   PetscMPIInt    rank,size;
2073   MPI_Comm       comm=((PetscObject)mat)->comm;
2074   PetscErrorCode ierr;
2075   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2076   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2077   PetscInt       *rowrange=mat->rmap->range;
2078   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2079   Mat            A=aij->A,B=aij->B,C=*matredundant;
2080   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2081   PetscScalar    *sbuf_a;
2082   PetscInt       nzlocal=a->nz+b->nz;
2083   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2084   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2085   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2086   MatScalar      *aworkA,*aworkB;
2087   PetscScalar    *vals;
2088   PetscMPIInt    tag1,tag2,tag3,imdex;
2089   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2090                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2091   MPI_Status     recv_status,*send_status;
2092   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2093   PetscInt       **rbuf_j=PETSC_NULL;
2094   PetscScalar    **rbuf_a=PETSC_NULL;
2095   Mat_Redundant  *redund=PETSC_NULL;
2096   PetscContainer container;
2097 
2098   PetscFunctionBegin;
2099   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2100   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2101 
2102   if (reuse == MAT_REUSE_MATRIX) {
2103     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2104     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2105     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2106     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2107     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2108     if (container) {
2109       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2110     } else {
2111       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2112     }
2113     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2114 
2115     nsends    = redund->nsends;
2116     nrecvs    = redund->nrecvs;
2117     send_rank = redund->send_rank;
2118     recv_rank = redund->recv_rank;
2119     sbuf_nz   = redund->sbuf_nz;
2120     rbuf_nz   = redund->rbuf_nz;
2121     sbuf_j    = redund->sbuf_j;
2122     sbuf_a    = redund->sbuf_a;
2123     rbuf_j    = redund->rbuf_j;
2124     rbuf_a    = redund->rbuf_a;
2125   }
2126 
2127   if (reuse == MAT_INITIAL_MATRIX){
2128     PetscMPIInt  subrank,subsize;
2129     PetscInt     nleftover,np_subcomm;
2130     /* get the destination processors' id send_rank, nsends and nrecvs */
2131     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2132     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2133     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2134     np_subcomm = size/nsubcomm;
2135     nleftover  = size - nsubcomm*np_subcomm;
2136     nsends = 0; nrecvs = 0;
2137     for (i=0; i<size; i++){ /* i=rank*/
2138       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2139         send_rank[nsends] = i; nsends++;
2140         recv_rank[nrecvs++] = i;
2141       }
2142     }
2143     if (rank >= size - nleftover){/* this proc is a leftover processor */
2144       i = size-nleftover-1;
2145       j = 0;
2146       while (j < nsubcomm - nleftover){
2147         send_rank[nsends++] = i;
2148         i--; j++;
2149       }
2150     }
2151 
2152     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2153       for (i=0; i<nleftover; i++){
2154         recv_rank[nrecvs++] = size-nleftover+i;
2155       }
2156     }
2157 
2158     /* allocate sbuf_j, sbuf_a */
2159     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2160     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2161     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2162   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2163 
2164   /* copy mat's local entries into the buffers */
2165   if (reuse == MAT_INITIAL_MATRIX){
2166     rownz_max = 0;
2167     rptr = sbuf_j;
2168     cols = sbuf_j + rend-rstart + 1;
2169     vals = sbuf_a;
2170     rptr[0] = 0;
2171     for (i=0; i<rend-rstart; i++){
2172       row = i + rstart;
2173       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2174       ncols  = nzA + nzB;
2175       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2176       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2177       /* load the column indices for this row into cols */
2178       lwrite = 0;
2179       for (l=0; l<nzB; l++) {
2180         if ((ctmp = bmap[cworkB[l]]) < cstart){
2181           vals[lwrite]   = aworkB[l];
2182           cols[lwrite++] = ctmp;
2183         }
2184       }
2185       for (l=0; l<nzA; l++){
2186         vals[lwrite]   = aworkA[l];
2187         cols[lwrite++] = cstart + cworkA[l];
2188       }
2189       for (l=0; l<nzB; l++) {
2190         if ((ctmp = bmap[cworkB[l]]) >= cend){
2191           vals[lwrite]   = aworkB[l];
2192           cols[lwrite++] = ctmp;
2193         }
2194       }
2195       vals += ncols;
2196       cols += ncols;
2197       rptr[i+1] = rptr[i] + ncols;
2198       if (rownz_max < ncols) rownz_max = ncols;
2199     }
2200     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2201   } else { /* only copy matrix values into sbuf_a */
2202     rptr = sbuf_j;
2203     vals = sbuf_a;
2204     rptr[0] = 0;
2205     for (i=0; i<rend-rstart; i++){
2206       row = i + rstart;
2207       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2208       ncols  = nzA + nzB;
2209       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2210       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2211       lwrite = 0;
2212       for (l=0; l<nzB; l++) {
2213         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2214       }
2215       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2216       for (l=0; l<nzB; l++) {
2217         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2218       }
2219       vals += ncols;
2220       rptr[i+1] = rptr[i] + ncols;
2221     }
2222   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2223 
2224   /* send nzlocal to others, and recv other's nzlocal */
2225   /*--------------------------------------------------*/
2226   if (reuse == MAT_INITIAL_MATRIX){
2227     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2228     s_waits2 = s_waits3 + nsends;
2229     s_waits1 = s_waits2 + nsends;
2230     r_waits1 = s_waits1 + nsends;
2231     r_waits2 = r_waits1 + nrecvs;
2232     r_waits3 = r_waits2 + nrecvs;
2233   } else {
2234     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2235     r_waits3 = s_waits3 + nsends;
2236   }
2237 
2238   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2239   if (reuse == MAT_INITIAL_MATRIX){
2240     /* get new tags to keep the communication clean */
2241     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2242     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2243     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2244 
2245     /* post receives of other's nzlocal */
2246     for (i=0; i<nrecvs; i++){
2247       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2248     }
2249     /* send nzlocal to others */
2250     for (i=0; i<nsends; i++){
2251       sbuf_nz[i] = nzlocal;
2252       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2253     }
2254     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2255     count = nrecvs;
2256     while (count) {
2257       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2258       recv_rank[imdex] = recv_status.MPI_SOURCE;
2259       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2260       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2261 
2262       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2263       rbuf_nz[imdex] += i + 2;
2264       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2265       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2266       count--;
2267     }
2268     /* wait on sends of nzlocal */
2269     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2270     /* send mat->i,j to others, and recv from other's */
2271     /*------------------------------------------------*/
2272     for (i=0; i<nsends; i++){
2273       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2274       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2275     }
2276     /* wait on receives of mat->i,j */
2277     /*------------------------------*/
2278     count = nrecvs;
2279     while (count) {
2280       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2281       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2282       count--;
2283     }
2284     /* wait on sends of mat->i,j */
2285     /*---------------------------*/
2286     if (nsends) {
2287       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2288     }
2289   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2290 
2291   /* post receives, send and receive mat->a */
2292   /*----------------------------------------*/
2293   for (imdex=0; imdex<nrecvs; imdex++) {
2294     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2295   }
2296   for (i=0; i<nsends; i++){
2297     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2298   }
2299   count = nrecvs;
2300   while (count) {
2301     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2302     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2303     count--;
2304   }
2305   if (nsends) {
2306     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2307   }
2308 
2309   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2310 
2311   /* create redundant matrix */
2312   /*-------------------------*/
2313   if (reuse == MAT_INITIAL_MATRIX){
2314     /* compute rownz_max for preallocation */
2315     for (imdex=0; imdex<nrecvs; imdex++){
2316       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2317       rptr = rbuf_j[imdex];
2318       for (i=0; i<j; i++){
2319         ncols = rptr[i+1] - rptr[i];
2320         if (rownz_max < ncols) rownz_max = ncols;
2321       }
2322     }
2323 
2324     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2325     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2326     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2327     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2328     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2329   } else {
2330     C = *matredundant;
2331   }
2332 
2333   /* insert local matrix entries */
2334   rptr = sbuf_j;
2335   cols = sbuf_j + rend-rstart + 1;
2336   vals = sbuf_a;
2337   for (i=0; i<rend-rstart; i++){
2338     row   = i + rstart;
2339     ncols = rptr[i+1] - rptr[i];
2340     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2341     vals += ncols;
2342     cols += ncols;
2343   }
2344   /* insert received matrix entries */
2345   for (imdex=0; imdex<nrecvs; imdex++){
2346     rstart = rowrange[recv_rank[imdex]];
2347     rend   = rowrange[recv_rank[imdex]+1];
2348     rptr = rbuf_j[imdex];
2349     cols = rbuf_j[imdex] + rend-rstart + 1;
2350     vals = rbuf_a[imdex];
2351     for (i=0; i<rend-rstart; i++){
2352       row   = i + rstart;
2353       ncols = rptr[i+1] - rptr[i];
2354       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2355       vals += ncols;
2356       cols += ncols;
2357     }
2358   }
2359   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2360   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2361   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2362   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2363   if (reuse == MAT_INITIAL_MATRIX){
2364     PetscContainer container;
2365     *matredundant = C;
2366     /* create a supporting struct and attach it to C for reuse */
2367     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2368     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2369     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2370     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2371     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2372 
2373     redund->nzlocal = nzlocal;
2374     redund->nsends  = nsends;
2375     redund->nrecvs  = nrecvs;
2376     redund->send_rank = send_rank;
2377     redund->recv_rank = recv_rank;
2378     redund->sbuf_nz = sbuf_nz;
2379     redund->rbuf_nz = rbuf_nz;
2380     redund->sbuf_j  = sbuf_j;
2381     redund->sbuf_a  = sbuf_a;
2382     redund->rbuf_j  = rbuf_j;
2383     redund->rbuf_a  = rbuf_a;
2384 
2385     redund->MatDestroy = C->ops->destroy;
2386     C->ops->destroy    = MatDestroy_MatRedundant;
2387   }
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 #undef __FUNCT__
2392 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2393 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2394 {
2395   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2396   PetscErrorCode ierr;
2397   PetscInt       i,*idxb = 0;
2398   PetscScalar    *va,*vb;
2399   Vec            vtmp;
2400 
2401   PetscFunctionBegin;
2402   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2403   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2404   if (idx) {
2405     for (i=0; i<A->rmap->n; i++) {
2406       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2407     }
2408   }
2409 
2410   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2411   if (idx) {
2412     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2413   }
2414   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2415   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2416 
2417   for (i=0; i<A->rmap->n; i++){
2418     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2419       va[i] = vb[i];
2420       if (idx) idx[i] = a->garray[idxb[i]];
2421     }
2422   }
2423 
2424   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2425   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2426   if (idxb) {
2427     ierr = PetscFree(idxb);CHKERRQ(ierr);
2428   }
2429   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2430   PetscFunctionReturn(0);
2431 }
2432 
2433 #undef __FUNCT__
2434 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2435 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2436 {
2437   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2438   PetscErrorCode ierr;
2439   PetscInt       i,*idxb = 0;
2440   PetscScalar    *va,*vb;
2441   Vec            vtmp;
2442 
2443   PetscFunctionBegin;
2444   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2445   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2446   if (idx) {
2447     for (i=0; i<A->cmap->n; i++) {
2448       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2449     }
2450   }
2451 
2452   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2453   if (idx) {
2454     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2455   }
2456   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2457   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2458 
2459   for (i=0; i<A->rmap->n; i++){
2460     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2461       va[i] = vb[i];
2462       if (idx) idx[i] = a->garray[idxb[i]];
2463     }
2464   }
2465 
2466   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2467   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2468   if (idxb) {
2469     ierr = PetscFree(idxb);CHKERRQ(ierr);
2470   }
2471   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2472   PetscFunctionReturn(0);
2473 }
2474 
2475 #undef __FUNCT__
2476 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2477 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2478 {
2479   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2480   PetscInt       n      = A->rmap->n;
2481   PetscInt       cstart = A->cmap->rstart;
2482   PetscInt      *cmap   = mat->garray;
2483   PetscInt      *diagIdx, *offdiagIdx;
2484   Vec            diagV, offdiagV;
2485   PetscScalar   *a, *diagA, *offdiagA;
2486   PetscInt       r;
2487   PetscErrorCode ierr;
2488 
2489   PetscFunctionBegin;
2490   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2491   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2492   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2493   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2494   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2495   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2496   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2497   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2498   for(r = 0; r < n; ++r) {
2499     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2500       a[r]   = diagA[r];
2501       idx[r] = cstart + diagIdx[r];
2502     } else {
2503       a[r]   = offdiagA[r];
2504       idx[r] = cmap[offdiagIdx[r]];
2505     }
2506   }
2507   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2508   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2509   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2510   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2511   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2512   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2513   PetscFunctionReturn(0);
2514 }
2515 
2516 #undef __FUNCT__
2517 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2518 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2519 {
2520   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2521   PetscInt       n      = A->rmap->n;
2522   PetscInt       cstart = A->cmap->rstart;
2523   PetscInt      *cmap   = mat->garray;
2524   PetscInt      *diagIdx, *offdiagIdx;
2525   Vec            diagV, offdiagV;
2526   PetscScalar   *a, *diagA, *offdiagA;
2527   PetscInt       r;
2528   PetscErrorCode ierr;
2529 
2530   PetscFunctionBegin;
2531   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2532   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2533   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2534   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2535   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2536   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2537   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2538   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2539   for(r = 0; r < n; ++r) {
2540     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2541       a[r]   = diagA[r];
2542       idx[r] = cstart + diagIdx[r];
2543     } else {
2544       a[r]   = offdiagA[r];
2545       idx[r] = cmap[offdiagIdx[r]];
2546     }
2547   }
2548   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2549   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2550   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2551   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2552   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2553   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 #undef __FUNCT__
2558 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2559 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2560 {
2561   PetscErrorCode ierr;
2562   Mat            *dummy;
2563 
2564   PetscFunctionBegin;
2565   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2566   *newmat = *dummy;
2567   ierr = PetscFree(dummy);CHKERRQ(ierr);
2568   PetscFunctionReturn(0);
2569 }
2570 
2571 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2572 /* -------------------------------------------------------------------*/
2573 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2574        MatGetRow_MPIAIJ,
2575        MatRestoreRow_MPIAIJ,
2576        MatMult_MPIAIJ,
2577 /* 4*/ MatMultAdd_MPIAIJ,
2578        MatMultTranspose_MPIAIJ,
2579        MatMultTransposeAdd_MPIAIJ,
2580 #ifdef PETSC_HAVE_PBGL
2581        MatSolve_MPIAIJ,
2582 #else
2583        0,
2584 #endif
2585        0,
2586        0,
2587 /*10*/ 0,
2588        0,
2589        0,
2590        MatSOR_MPIAIJ,
2591        MatTranspose_MPIAIJ,
2592 /*15*/ MatGetInfo_MPIAIJ,
2593        MatEqual_MPIAIJ,
2594        MatGetDiagonal_MPIAIJ,
2595        MatDiagonalScale_MPIAIJ,
2596        MatNorm_MPIAIJ,
2597 /*20*/ MatAssemblyBegin_MPIAIJ,
2598        MatAssemblyEnd_MPIAIJ,
2599        MatSetOption_MPIAIJ,
2600        MatZeroEntries_MPIAIJ,
2601 /*24*/ MatZeroRows_MPIAIJ,
2602        0,
2603 #ifdef PETSC_HAVE_PBGL
2604        0,
2605 #else
2606        0,
2607 #endif
2608        0,
2609        0,
2610 /*29*/ MatSetUpPreallocation_MPIAIJ,
2611 #ifdef PETSC_HAVE_PBGL
2612        0,
2613 #else
2614        0,
2615 #endif
2616        0,
2617        0,
2618        0,
2619 /*34*/ MatDuplicate_MPIAIJ,
2620        0,
2621        0,
2622        0,
2623        0,
2624 /*39*/ MatAXPY_MPIAIJ,
2625        MatGetSubMatrices_MPIAIJ,
2626        MatIncreaseOverlap_MPIAIJ,
2627        MatGetValues_MPIAIJ,
2628        MatCopy_MPIAIJ,
2629 /*44*/ MatGetRowMax_MPIAIJ,
2630        MatScale_MPIAIJ,
2631        0,
2632        0,
2633        0,
2634 /*49*/ MatSetBlockSize_MPIAIJ,
2635        0,
2636        0,
2637        0,
2638        0,
2639 /*54*/ MatFDColoringCreate_MPIAIJ,
2640        0,
2641        MatSetUnfactored_MPIAIJ,
2642        MatPermute_MPIAIJ,
2643        0,
2644 /*59*/ MatGetSubMatrix_MPIAIJ,
2645        MatDestroy_MPIAIJ,
2646        MatView_MPIAIJ,
2647        0,
2648        0,
2649 /*64*/ 0,
2650        0,
2651        0,
2652        0,
2653        0,
2654 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2655        MatGetRowMinAbs_MPIAIJ,
2656        0,
2657        MatSetColoring_MPIAIJ,
2658 #if defined(PETSC_HAVE_ADIC)
2659        MatSetValuesAdic_MPIAIJ,
2660 #else
2661        0,
2662 #endif
2663        MatSetValuesAdifor_MPIAIJ,
2664 /*75*/ MatFDColoringApply_AIJ,
2665        0,
2666        0,
2667        0,
2668        0,
2669 /*80*/ 0,
2670        0,
2671        0,
2672 /*83*/ 0,
2673        0,
2674        0,
2675        0,
2676        0,
2677        0,
2678 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2679        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2680        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2681        MatPtAP_Basic,
2682        MatPtAPSymbolic_MPIAIJ,
2683 /*94*/ MatPtAPNumeric_MPIAIJ,
2684        0,
2685        0,
2686        0,
2687        0,
2688 /*99*/ 0,
2689        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2690        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2691        MatConjugate_MPIAIJ,
2692        0,
2693 /*104*/MatSetValuesRow_MPIAIJ,
2694        MatRealPart_MPIAIJ,
2695        MatImaginaryPart_MPIAIJ,
2696        0,
2697        0,
2698 /*109*/0,
2699        MatGetRedundantMatrix_MPIAIJ,
2700        MatGetRowMin_MPIAIJ,
2701        0,
2702        0,
2703 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2704        0,
2705        0,
2706        0,
2707        0,
2708 /*119*/0,
2709        0,
2710        0,
2711        0,
2712        MatLoadnew_MPIAIJ
2713 };
2714 
2715 /* ----------------------------------------------------------------------------------------*/
2716 
2717 EXTERN_C_BEGIN
2718 #undef __FUNCT__
2719 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2720 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2721 {
2722   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2723   PetscErrorCode ierr;
2724 
2725   PetscFunctionBegin;
2726   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2727   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2728   PetscFunctionReturn(0);
2729 }
2730 EXTERN_C_END
2731 
2732 EXTERN_C_BEGIN
2733 #undef __FUNCT__
2734 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2735 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2736 {
2737   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2738   PetscErrorCode ierr;
2739 
2740   PetscFunctionBegin;
2741   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2742   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2743   PetscFunctionReturn(0);
2744 }
2745 EXTERN_C_END
2746 
2747 #include "petscpc.h"
2748 EXTERN_C_BEGIN
2749 #undef __FUNCT__
2750 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2751 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2752 {
2753   Mat_MPIAIJ     *b;
2754   PetscErrorCode ierr;
2755   PetscInt       i;
2756 
2757   PetscFunctionBegin;
2758   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2759   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2760   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2761   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2762 
2763   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2764   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2765   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2766   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2767   if (d_nnz) {
2768     for (i=0; i<B->rmap->n; i++) {
2769       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2770     }
2771   }
2772   if (o_nnz) {
2773     for (i=0; i<B->rmap->n; i++) {
2774       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2775     }
2776   }
2777   b = (Mat_MPIAIJ*)B->data;
2778 
2779   if (!B->preallocated) {
2780     /* Explicitly create 2 MATSEQAIJ matrices. */
2781     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2782     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2783     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2784     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2785     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2786     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2787     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2788     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2789   }
2790 
2791   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2792   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2793   B->preallocated = PETSC_TRUE;
2794   PetscFunctionReturn(0);
2795 }
2796 EXTERN_C_END
2797 
2798 #undef __FUNCT__
2799 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2800 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2801 {
2802   Mat            mat;
2803   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2804   PetscErrorCode ierr;
2805 
2806   PetscFunctionBegin;
2807   *newmat       = 0;
2808   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2809   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2810   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2811   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2812   a    = (Mat_MPIAIJ*)mat->data;
2813 
2814   mat->factortype    = matin->factortype;
2815   mat->rmap->bs      = matin->rmap->bs;
2816   mat->assembled    = PETSC_TRUE;
2817   mat->insertmode   = NOT_SET_VALUES;
2818   mat->preallocated = PETSC_TRUE;
2819 
2820   a->size           = oldmat->size;
2821   a->rank           = oldmat->rank;
2822   a->donotstash     = oldmat->donotstash;
2823   a->roworiented    = oldmat->roworiented;
2824   a->rowindices     = 0;
2825   a->rowvalues      = 0;
2826   a->getrowactive   = PETSC_FALSE;
2827 
2828   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2829   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2830 
2831   if (oldmat->colmap) {
2832 #if defined (PETSC_USE_CTABLE)
2833     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2834 #else
2835     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2836     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2837     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2838 #endif
2839   } else a->colmap = 0;
2840   if (oldmat->garray) {
2841     PetscInt len;
2842     len  = oldmat->B->cmap->n;
2843     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2844     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2845     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2846   } else a->garray = 0;
2847 
2848   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2849   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2850   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2851   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2852   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2853   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2854   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2855   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2856   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2857   *newmat = mat;
2858   PetscFunctionReturn(0);
2859 }
2860 
2861 #undef __FUNCT__
2862 #define __FUNCT__ "MatLoadnew_MPIAIJ"
2863 PetscErrorCode MatLoadnew_MPIAIJ(PetscViewer viewer, Mat newMat)
2864 {
2865   PetscScalar    *vals,*svals;
2866   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2867   MPI_Status     status;
2868   PetscErrorCode ierr;
2869   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2870   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
2871   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2872   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2873   PetscInt       cend,cstart,n,*rowners,sizesset=1;
2874   int            fd;
2875 
2876   PetscFunctionBegin;
2877   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2878   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2879   if (!rank) {
2880     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2881     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2882     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2883   }
2884 
2885   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
2886 
2887   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2888   M = header[1]; N = header[2];
2889   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2890   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
2891   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
2892 
2893   /* If global sizes are set, check if they are consistent with that given in the file */
2894   if (sizesset) {
2895     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
2896   }
2897   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2898   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2899 
2900   /* determine ownership of all rows */
2901   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
2902   else m = newMat->rmap->n; /* Set by user */
2903 
2904   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2905   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2906 
2907   /* First process needs enough room for process with most rows */
2908   if (!rank) {
2909     mmax       = rowners[1];
2910     for (i=2; i<size; i++) {
2911       mmax = PetscMax(mmax,rowners[i]);
2912     }
2913   } else mmax = m;
2914 
2915   rowners[0] = 0;
2916   for (i=2; i<=size; i++) {
2917     rowners[i] += rowners[i-1];
2918   }
2919   rstart = rowners[rank];
2920   rend   = rowners[rank+1];
2921 
2922   /* distribute row lengths to all processors */
2923   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2924   if (!rank) {
2925     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2926     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2927     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2928     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2929     for (j=0; j<m; j++) {
2930       procsnz[0] += ourlens[j];
2931     }
2932     for (i=1; i<size; i++) {
2933       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2934       /* calculate the number of nonzeros on each processor */
2935       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2936         procsnz[i] += rowlengths[j];
2937       }
2938       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2939       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2940     }
2941     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2942   } else {
2943     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2944     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2945   }
2946 
2947   if (!rank) {
2948     /* determine max buffer needed and allocate it */
2949     maxnz = 0;
2950     for (i=0; i<size; i++) {
2951       maxnz = PetscMax(maxnz,procsnz[i]);
2952     }
2953     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2954 
2955     /* read in my part of the matrix column indices  */
2956     nz   = procsnz[0];
2957     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2958     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2959 
2960     /* read in every one elses and ship off */
2961     for (i=1; i<size; i++) {
2962       nz     = procsnz[i];
2963       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2964       mpicnt = PetscMPIIntCast(nz);
2965       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2966     }
2967     ierr = PetscFree(cols);CHKERRQ(ierr);
2968   } else {
2969     /* determine buffer space needed for message */
2970     nz = 0;
2971     for (i=0; i<m; i++) {
2972       nz += ourlens[i];
2973     }
2974     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2975 
2976     /* receive message of column indices*/
2977     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2978     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2979     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2980     if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);
2981     else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);
2982     else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);
2983   }
2984 
2985   /* determine column ownership if matrix is not square */
2986   if (N != M) {
2987     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
2988     else n = newMat->cmap->n;
2989     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2990     cstart = cend - n;
2991   } else {
2992     cstart = rstart;
2993     cend   = rend;
2994     n      = cend - cstart;
2995   }
2996 
2997   /* loop over local rows, determining number of off diagonal entries */
2998   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2999   jj = 0;
3000   for (i=0; i<m; i++) {
3001     for (j=0; j<ourlens[i]; j++) {
3002       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3003       jj++;
3004     }
3005   }
3006 
3007   for (i=0; i<m; i++) {
3008     ourlens[i] -= offlens[i];
3009   }
3010   if (!sizesset) {
3011     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3012   }
3013   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3014 
3015   for (i=0; i<m; i++) {
3016     ourlens[i] += offlens[i];
3017   }
3018 
3019   if (!rank) {
3020     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3021 
3022     /* read in my part of the matrix numerical values  */
3023     nz   = procsnz[0];
3024     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3025 
3026     /* insert into matrix */
3027     jj      = rstart;
3028     smycols = mycols;
3029     svals   = vals;
3030     for (i=0; i<m; i++) {
3031       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3032       smycols += ourlens[i];
3033       svals   += ourlens[i];
3034       jj++;
3035     }
3036 
3037     /* read in other processors and ship out */
3038     for (i=1; i<size; i++) {
3039       nz     = procsnz[i];
3040       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3041       mpicnt = PetscMPIIntCast(nz);
3042       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3043     }
3044     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3045   } else {
3046     /* receive numeric values */
3047     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3048 
3049     /* receive message of values*/
3050     mpicnt = PetscMPIIntCast(nz);
3051     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm,&status);CHKERRQ(ierr);
3052     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3053     if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);
3054     else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);
3055     else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);
3056 
3057     /* insert into matrix */
3058     jj      = rstart;
3059     smycols = mycols;
3060     svals   = vals;
3061     for (i=0; i<m; i++) {
3062       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3063       smycols += ourlens[i];
3064       svals   += ourlens[i];
3065       jj++;
3066     }
3067   }
3068   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3069   ierr = PetscFree(vals);CHKERRQ(ierr);
3070   ierr = PetscFree(mycols);CHKERRQ(ierr);
3071   ierr = PetscFree(rowners);CHKERRQ(ierr);
3072 
3073   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3074   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3075   PetscFunctionReturn(0);
3076 }
3077 
3078 #undef __FUNCT__
3079 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3080 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3081 {
3082   PetscErrorCode ierr;
3083   IS             iscol_local;
3084   PetscInt       csize;
3085 
3086   PetscFunctionBegin;
3087   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3088   if (call == MAT_REUSE_MATRIX) {
3089     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3090     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3091   } else {
3092     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3093   }
3094   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3095   if (call == MAT_INITIAL_MATRIX) {
3096     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3097     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3098   }
3099   PetscFunctionReturn(0);
3100 }
3101 
3102 #undef __FUNCT__
3103 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3104 /*
3105     Not great since it makes two copies of the submatrix, first an SeqAIJ
3106   in local and then by concatenating the local matrices the end result.
3107   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3108 
3109   Note: This requires a sequential iscol with all indices.
3110 */
3111 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3112 {
3113   PetscErrorCode ierr;
3114   PetscMPIInt    rank,size;
3115   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3116   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3117   Mat            *local,M,Mreuse;
3118   MatScalar      *vwork,*aa;
3119   MPI_Comm       comm = ((PetscObject)mat)->comm;
3120   Mat_SeqAIJ     *aij;
3121 
3122 
3123   PetscFunctionBegin;
3124   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3125   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3126 
3127   if (call ==  MAT_REUSE_MATRIX) {
3128     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3129     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3130     local = &Mreuse;
3131     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3132   } else {
3133     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3134     Mreuse = *local;
3135     ierr   = PetscFree(local);CHKERRQ(ierr);
3136   }
3137 
3138   /*
3139       m - number of local rows
3140       n - number of columns (same on all processors)
3141       rstart - first row in new global matrix generated
3142   */
3143   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3144   if (call == MAT_INITIAL_MATRIX) {
3145     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3146     ii  = aij->i;
3147     jj  = aij->j;
3148 
3149     /*
3150         Determine the number of non-zeros in the diagonal and off-diagonal
3151         portions of the matrix in order to do correct preallocation
3152     */
3153 
3154     /* first get start and end of "diagonal" columns */
3155     if (csize == PETSC_DECIDE) {
3156       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3157       if (mglobal == n) { /* square matrix */
3158 	nlocal = m;
3159       } else {
3160         nlocal = n/size + ((n % size) > rank);
3161       }
3162     } else {
3163       nlocal = csize;
3164     }
3165     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3166     rstart = rend - nlocal;
3167     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3168 
3169     /* next, compute all the lengths */
3170     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3171     olens = dlens + m;
3172     for (i=0; i<m; i++) {
3173       jend = ii[i+1] - ii[i];
3174       olen = 0;
3175       dlen = 0;
3176       for (j=0; j<jend; j++) {
3177         if (*jj < rstart || *jj >= rend) olen++;
3178         else dlen++;
3179         jj++;
3180       }
3181       olens[i] = olen;
3182       dlens[i] = dlen;
3183     }
3184     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3185     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3186     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3187     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3188     ierr = PetscFree(dlens);CHKERRQ(ierr);
3189   } else {
3190     PetscInt ml,nl;
3191 
3192     M = *newmat;
3193     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3194     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3195     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3196     /*
3197          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3198        rather than the slower MatSetValues().
3199     */
3200     M->was_assembled = PETSC_TRUE;
3201     M->assembled     = PETSC_FALSE;
3202   }
3203   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3204   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3205   ii  = aij->i;
3206   jj  = aij->j;
3207   aa  = aij->a;
3208   for (i=0; i<m; i++) {
3209     row   = rstart + i;
3210     nz    = ii[i+1] - ii[i];
3211     cwork = jj;     jj += nz;
3212     vwork = aa;     aa += nz;
3213     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3214   }
3215 
3216   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3217   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3218   *newmat = M;
3219 
3220   /* save submatrix used in processor for next request */
3221   if (call ==  MAT_INITIAL_MATRIX) {
3222     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3223     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3224   }
3225 
3226   PetscFunctionReturn(0);
3227 }
3228 
3229 EXTERN_C_BEGIN
3230 #undef __FUNCT__
3231 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3232 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3233 {
3234   PetscInt       m,cstart, cend,j,nnz,i,d;
3235   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3236   const PetscInt *JJ;
3237   PetscScalar    *values;
3238   PetscErrorCode ierr;
3239 
3240   PetscFunctionBegin;
3241   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3242 
3243   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3244   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3245   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3246   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3247   m      = B->rmap->n;
3248   cstart = B->cmap->rstart;
3249   cend   = B->cmap->rend;
3250   rstart = B->rmap->rstart;
3251 
3252   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3253 
3254 #if defined(PETSC_USE_DEBUGGING)
3255   for (i=0; i<m; i++) {
3256     nnz     = Ii[i+1]- Ii[i];
3257     JJ      = J + Ii[i];
3258     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3259     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3260     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3261   }
3262 #endif
3263 
3264   for (i=0; i<m; i++) {
3265     nnz     = Ii[i+1]- Ii[i];
3266     JJ      = J + Ii[i];
3267     nnz_max = PetscMax(nnz_max,nnz);
3268     d       = 0;
3269     for (j=0; j<nnz; j++) {
3270       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3271     }
3272     d_nnz[i] = d;
3273     o_nnz[i] = nnz - d;
3274   }
3275   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3276   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3277 
3278   if (v) values = (PetscScalar*)v;
3279   else {
3280     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3281     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3282   }
3283 
3284   for (i=0; i<m; i++) {
3285     ii   = i + rstart;
3286     nnz  = Ii[i+1]- Ii[i];
3287     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3288   }
3289   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3290   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3291 
3292   if (!v) {
3293     ierr = PetscFree(values);CHKERRQ(ierr);
3294   }
3295   PetscFunctionReturn(0);
3296 }
3297 EXTERN_C_END
3298 
3299 #undef __FUNCT__
3300 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3301 /*@
3302    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3303    (the default parallel PETSc format).
3304 
3305    Collective on MPI_Comm
3306 
3307    Input Parameters:
3308 +  B - the matrix
3309 .  i - the indices into j for the start of each local row (starts with zero)
3310 .  j - the column indices for each local row (starts with zero)
3311 -  v - optional values in the matrix
3312 
3313    Level: developer
3314 
3315    Notes:
3316        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3317      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3318      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3319 
3320        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3321 
3322        The format which is used for the sparse matrix input, is equivalent to a
3323     row-major ordering.. i.e for the following matrix, the input data expected is
3324     as shown:
3325 
3326         1 0 0
3327         2 0 3     P0
3328        -------
3329         4 5 6     P1
3330 
3331      Process0 [P0]: rows_owned=[0,1]
3332         i =  {0,1,3}  [size = nrow+1  = 2+1]
3333         j =  {0,0,2}  [size = nz = 6]
3334         v =  {1,2,3}  [size = nz = 6]
3335 
3336      Process1 [P1]: rows_owned=[2]
3337         i =  {0,3}    [size = nrow+1  = 1+1]
3338         j =  {0,1,2}  [size = nz = 6]
3339         v =  {4,5,6}  [size = nz = 6]
3340 
3341 .keywords: matrix, aij, compressed row, sparse, parallel
3342 
3343 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3344           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3345 @*/
3346 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3347 {
3348   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3349 
3350   PetscFunctionBegin;
3351   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3352   if (f) {
3353     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3354   }
3355   PetscFunctionReturn(0);
3356 }
3357 
3358 #undef __FUNCT__
3359 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3360 /*@C
3361    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3362    (the default parallel PETSc format).  For good matrix assembly performance
3363    the user should preallocate the matrix storage by setting the parameters
3364    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3365    performance can be increased by more than a factor of 50.
3366 
3367    Collective on MPI_Comm
3368 
3369    Input Parameters:
3370 +  A - the matrix
3371 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3372            (same value is used for all local rows)
3373 .  d_nnz - array containing the number of nonzeros in the various rows of the
3374            DIAGONAL portion of the local submatrix (possibly different for each row)
3375            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3376            The size of this array is equal to the number of local rows, i.e 'm'.
3377            You must leave room for the diagonal entry even if it is zero.
3378 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3379            submatrix (same value is used for all local rows).
3380 -  o_nnz - array containing the number of nonzeros in the various rows of the
3381            OFF-DIAGONAL portion of the local submatrix (possibly different for
3382            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3383            structure. The size of this array is equal to the number
3384            of local rows, i.e 'm'.
3385 
3386    If the *_nnz parameter is given then the *_nz parameter is ignored
3387 
3388    The AIJ format (also called the Yale sparse matrix format or
3389    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3390    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3391 
3392    The parallel matrix is partitioned such that the first m0 rows belong to
3393    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3394    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3395 
3396    The DIAGONAL portion of the local submatrix of a processor can be defined
3397    as the submatrix which is obtained by extraction the part corresponding
3398    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3399    first row that belongs to the processor, and r2 is the last row belonging
3400    to the this processor. This is a square mxm matrix. The remaining portion
3401    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3402 
3403    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3404 
3405    You can call MatGetInfo() to get information on how effective the preallocation was;
3406    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3407    You can also run with the option -info and look for messages with the string
3408    malloc in them to see if additional memory allocation was needed.
3409 
3410    Example usage:
3411 
3412    Consider the following 8x8 matrix with 34 non-zero values, that is
3413    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3414    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3415    as follows:
3416 
3417 .vb
3418             1  2  0  |  0  3  0  |  0  4
3419     Proc0   0  5  6  |  7  0  0  |  8  0
3420             9  0 10  | 11  0  0  | 12  0
3421     -------------------------------------
3422            13  0 14  | 15 16 17  |  0  0
3423     Proc1   0 18  0  | 19 20 21  |  0  0
3424             0  0  0  | 22 23  0  | 24  0
3425     -------------------------------------
3426     Proc2  25 26 27  |  0  0 28  | 29  0
3427            30  0  0  | 31 32 33  |  0 34
3428 .ve
3429 
3430    This can be represented as a collection of submatrices as:
3431 
3432 .vb
3433       A B C
3434       D E F
3435       G H I
3436 .ve
3437 
3438    Where the submatrices A,B,C are owned by proc0, D,E,F are
3439    owned by proc1, G,H,I are owned by proc2.
3440 
3441    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3442    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3443    The 'M','N' parameters are 8,8, and have the same values on all procs.
3444 
3445    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3446    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3447    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3448    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3449    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3450    matrix, ans [DF] as another SeqAIJ matrix.
3451 
3452    When d_nz, o_nz parameters are specified, d_nz storage elements are
3453    allocated for every row of the local diagonal submatrix, and o_nz
3454    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3455    One way to choose d_nz and o_nz is to use the max nonzerors per local
3456    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3457    In this case, the values of d_nz,o_nz are:
3458 .vb
3459      proc0 : dnz = 2, o_nz = 2
3460      proc1 : dnz = 3, o_nz = 2
3461      proc2 : dnz = 1, o_nz = 4
3462 .ve
3463    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3464    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3465    for proc3. i.e we are using 12+15+10=37 storage locations to store
3466    34 values.
3467 
3468    When d_nnz, o_nnz parameters are specified, the storage is specified
3469    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3470    In the above case the values for d_nnz,o_nnz are:
3471 .vb
3472      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3473      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3474      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3475 .ve
3476    Here the space allocated is sum of all the above values i.e 34, and
3477    hence pre-allocation is perfect.
3478 
3479    Level: intermediate
3480 
3481 .keywords: matrix, aij, compressed row, sparse, parallel
3482 
3483 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3484           MPIAIJ, MatGetInfo()
3485 @*/
3486 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3487 {
3488   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3489 
3490   PetscFunctionBegin;
3491   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3492   if (f) {
3493     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3494   }
3495   PetscFunctionReturn(0);
3496 }
3497 
3498 #undef __FUNCT__
3499 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3500 /*@
3501      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3502          CSR format the local rows.
3503 
3504    Collective on MPI_Comm
3505 
3506    Input Parameters:
3507 +  comm - MPI communicator
3508 .  m - number of local rows (Cannot be PETSC_DECIDE)
3509 .  n - This value should be the same as the local size used in creating the
3510        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3511        calculated if N is given) For square matrices n is almost always m.
3512 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3513 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3514 .   i - row indices
3515 .   j - column indices
3516 -   a - matrix values
3517 
3518    Output Parameter:
3519 .   mat - the matrix
3520 
3521    Level: intermediate
3522 
3523    Notes:
3524        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3525      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3526      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3527 
3528        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3529 
3530        The format which is used for the sparse matrix input, is equivalent to a
3531     row-major ordering.. i.e for the following matrix, the input data expected is
3532     as shown:
3533 
3534         1 0 0
3535         2 0 3     P0
3536        -------
3537         4 5 6     P1
3538 
3539      Process0 [P0]: rows_owned=[0,1]
3540         i =  {0,1,3}  [size = nrow+1  = 2+1]
3541         j =  {0,0,2}  [size = nz = 6]
3542         v =  {1,2,3}  [size = nz = 6]
3543 
3544      Process1 [P1]: rows_owned=[2]
3545         i =  {0,3}    [size = nrow+1  = 1+1]
3546         j =  {0,1,2}  [size = nz = 6]
3547         v =  {4,5,6}  [size = nz = 6]
3548 
3549 .keywords: matrix, aij, compressed row, sparse, parallel
3550 
3551 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3552           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3553 @*/
3554 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3555 {
3556   PetscErrorCode ierr;
3557 
3558  PetscFunctionBegin;
3559   if (i[0]) {
3560     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3561   }
3562   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3563   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3564   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3565   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3566   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3567   PetscFunctionReturn(0);
3568 }
3569 
3570 #undef __FUNCT__
3571 #define __FUNCT__ "MatCreateMPIAIJ"
3572 /*@C
3573    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3574    (the default parallel PETSc format).  For good matrix assembly performance
3575    the user should preallocate the matrix storage by setting the parameters
3576    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3577    performance can be increased by more than a factor of 50.
3578 
3579    Collective on MPI_Comm
3580 
3581    Input Parameters:
3582 +  comm - MPI communicator
3583 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3584            This value should be the same as the local size used in creating the
3585            y vector for the matrix-vector product y = Ax.
3586 .  n - This value should be the same as the local size used in creating the
3587        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3588        calculated if N is given) For square matrices n is almost always m.
3589 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3590 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3591 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3592            (same value is used for all local rows)
3593 .  d_nnz - array containing the number of nonzeros in the various rows of the
3594            DIAGONAL portion of the local submatrix (possibly different for each row)
3595            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3596            The size of this array is equal to the number of local rows, i.e 'm'.
3597            You must leave room for the diagonal entry even if it is zero.
3598 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3599            submatrix (same value is used for all local rows).
3600 -  o_nnz - array containing the number of nonzeros in the various rows of the
3601            OFF-DIAGONAL portion of the local submatrix (possibly different for
3602            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3603            structure. The size of this array is equal to the number
3604            of local rows, i.e 'm'.
3605 
3606    Output Parameter:
3607 .  A - the matrix
3608 
3609    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3610    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3611    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3612 
3613    Notes:
3614    If the *_nnz parameter is given then the *_nz parameter is ignored
3615 
3616    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3617    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3618    storage requirements for this matrix.
3619 
3620    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3621    processor than it must be used on all processors that share the object for
3622    that argument.
3623 
3624    The user MUST specify either the local or global matrix dimensions
3625    (possibly both).
3626 
3627    The parallel matrix is partitioned across processors such that the
3628    first m0 rows belong to process 0, the next m1 rows belong to
3629    process 1, the next m2 rows belong to process 2 etc.. where
3630    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3631    values corresponding to [m x N] submatrix.
3632 
3633    The columns are logically partitioned with the n0 columns belonging
3634    to 0th partition, the next n1 columns belonging to the next
3635    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3636 
3637    The DIAGONAL portion of the local submatrix on any given processor
3638    is the submatrix corresponding to the rows and columns m,n
3639    corresponding to the given processor. i.e diagonal matrix on
3640    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3641    etc. The remaining portion of the local submatrix [m x (N-n)]
3642    constitute the OFF-DIAGONAL portion. The example below better
3643    illustrates this concept.
3644 
3645    For a square global matrix we define each processor's diagonal portion
3646    to be its local rows and the corresponding columns (a square submatrix);
3647    each processor's off-diagonal portion encompasses the remainder of the
3648    local matrix (a rectangular submatrix).
3649 
3650    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3651 
3652    When calling this routine with a single process communicator, a matrix of
3653    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3654    type of communicator, use the construction mechanism:
3655      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3656 
3657    By default, this format uses inodes (identical nodes) when possible.
3658    We search for consecutive rows with the same nonzero structure, thereby
3659    reusing matrix information to achieve increased efficiency.
3660 
3661    Options Database Keys:
3662 +  -mat_no_inode  - Do not use inodes
3663 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3664 -  -mat_aij_oneindex - Internally use indexing starting at 1
3665         rather than 0.  Note that when calling MatSetValues(),
3666         the user still MUST index entries starting at 0!
3667 
3668 
3669    Example usage:
3670 
3671    Consider the following 8x8 matrix with 34 non-zero values, that is
3672    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3673    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3674    as follows:
3675 
3676 .vb
3677             1  2  0  |  0  3  0  |  0  4
3678     Proc0   0  5  6  |  7  0  0  |  8  0
3679             9  0 10  | 11  0  0  | 12  0
3680     -------------------------------------
3681            13  0 14  | 15 16 17  |  0  0
3682     Proc1   0 18  0  | 19 20 21  |  0  0
3683             0  0  0  | 22 23  0  | 24  0
3684     -------------------------------------
3685     Proc2  25 26 27  |  0  0 28  | 29  0
3686            30  0  0  | 31 32 33  |  0 34
3687 .ve
3688 
3689    This can be represented as a collection of submatrices as:
3690 
3691 .vb
3692       A B C
3693       D E F
3694       G H I
3695 .ve
3696 
3697    Where the submatrices A,B,C are owned by proc0, D,E,F are
3698    owned by proc1, G,H,I are owned by proc2.
3699 
3700    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3701    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3702    The 'M','N' parameters are 8,8, and have the same values on all procs.
3703 
3704    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3705    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3706    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3707    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3708    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3709    matrix, ans [DF] as another SeqAIJ matrix.
3710 
3711    When d_nz, o_nz parameters are specified, d_nz storage elements are
3712    allocated for every row of the local diagonal submatrix, and o_nz
3713    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3714    One way to choose d_nz and o_nz is to use the max nonzerors per local
3715    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3716    In this case, the values of d_nz,o_nz are:
3717 .vb
3718      proc0 : dnz = 2, o_nz = 2
3719      proc1 : dnz = 3, o_nz = 2
3720      proc2 : dnz = 1, o_nz = 4
3721 .ve
3722    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3723    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3724    for proc3. i.e we are using 12+15+10=37 storage locations to store
3725    34 values.
3726 
3727    When d_nnz, o_nnz parameters are specified, the storage is specified
3728    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3729    In the above case the values for d_nnz,o_nnz are:
3730 .vb
3731      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3732      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3733      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3734 .ve
3735    Here the space allocated is sum of all the above values i.e 34, and
3736    hence pre-allocation is perfect.
3737 
3738    Level: intermediate
3739 
3740 .keywords: matrix, aij, compressed row, sparse, parallel
3741 
3742 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3743           MPIAIJ, MatCreateMPIAIJWithArrays()
3744 @*/
3745 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3746 {
3747   PetscErrorCode ierr;
3748   PetscMPIInt    size;
3749 
3750   PetscFunctionBegin;
3751   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3752   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3753   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3754   if (size > 1) {
3755     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3756     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3757   } else {
3758     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3759     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3760   }
3761   PetscFunctionReturn(0);
3762 }
3763 
3764 #undef __FUNCT__
3765 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3766 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3767 {
3768   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3769 
3770   PetscFunctionBegin;
3771   *Ad     = a->A;
3772   *Ao     = a->B;
3773   *colmap = a->garray;
3774   PetscFunctionReturn(0);
3775 }
3776 
3777 #undef __FUNCT__
3778 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3779 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3780 {
3781   PetscErrorCode ierr;
3782   PetscInt       i;
3783   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3784 
3785   PetscFunctionBegin;
3786   if (coloring->ctype == IS_COLORING_GLOBAL) {
3787     ISColoringValue *allcolors,*colors;
3788     ISColoring      ocoloring;
3789 
3790     /* set coloring for diagonal portion */
3791     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3792 
3793     /* set coloring for off-diagonal portion */
3794     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3795     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3796     for (i=0; i<a->B->cmap->n; i++) {
3797       colors[i] = allcolors[a->garray[i]];
3798     }
3799     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3800     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3801     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3802     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3803   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3804     ISColoringValue *colors;
3805     PetscInt        *larray;
3806     ISColoring      ocoloring;
3807 
3808     /* set coloring for diagonal portion */
3809     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3810     for (i=0; i<a->A->cmap->n; i++) {
3811       larray[i] = i + A->cmap->rstart;
3812     }
3813     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3814     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3815     for (i=0; i<a->A->cmap->n; i++) {
3816       colors[i] = coloring->colors[larray[i]];
3817     }
3818     ierr = PetscFree(larray);CHKERRQ(ierr);
3819     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3820     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3821     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3822 
3823     /* set coloring for off-diagonal portion */
3824     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3825     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3826     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3827     for (i=0; i<a->B->cmap->n; i++) {
3828       colors[i] = coloring->colors[larray[i]];
3829     }
3830     ierr = PetscFree(larray);CHKERRQ(ierr);
3831     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3832     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3833     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3834   } else {
3835     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3836   }
3837 
3838   PetscFunctionReturn(0);
3839 }
3840 
3841 #if defined(PETSC_HAVE_ADIC)
3842 #undef __FUNCT__
3843 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3844 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3845 {
3846   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3847   PetscErrorCode ierr;
3848 
3849   PetscFunctionBegin;
3850   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3851   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3852   PetscFunctionReturn(0);
3853 }
3854 #endif
3855 
3856 #undef __FUNCT__
3857 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3858 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3859 {
3860   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3861   PetscErrorCode ierr;
3862 
3863   PetscFunctionBegin;
3864   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3865   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3866   PetscFunctionReturn(0);
3867 }
3868 
3869 #undef __FUNCT__
3870 #define __FUNCT__ "MatMerge"
3871 /*@
3872       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3873                  matrices from each processor
3874 
3875     Collective on MPI_Comm
3876 
3877    Input Parameters:
3878 +    comm - the communicators the parallel matrix will live on
3879 .    inmat - the input sequential matrices
3880 .    n - number of local columns (or PETSC_DECIDE)
3881 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3882 
3883    Output Parameter:
3884 .    outmat - the parallel matrix generated
3885 
3886     Level: advanced
3887 
3888    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3889 
3890 @*/
3891 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3892 {
3893   PetscErrorCode ierr;
3894   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3895   PetscInt       *indx;
3896   PetscScalar    *values;
3897 
3898   PetscFunctionBegin;
3899   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3900   if (scall == MAT_INITIAL_MATRIX){
3901     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3902     if (n == PETSC_DECIDE){
3903       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3904     }
3905     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3906     rstart -= m;
3907 
3908     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3909     for (i=0;i<m;i++) {
3910       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3911       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3912       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3913     }
3914     /* This routine will ONLY return MPIAIJ type matrix */
3915     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3916     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3917     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3918     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3919     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3920 
3921   } else if (scall == MAT_REUSE_MATRIX){
3922     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3923   } else {
3924     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3925   }
3926 
3927   for (i=0;i<m;i++) {
3928     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3929     Ii    = i + rstart;
3930     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3931     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3932   }
3933   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3934   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3935   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3936 
3937   PetscFunctionReturn(0);
3938 }
3939 
3940 #undef __FUNCT__
3941 #define __FUNCT__ "MatFileSplit"
3942 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3943 {
3944   PetscErrorCode    ierr;
3945   PetscMPIInt       rank;
3946   PetscInt          m,N,i,rstart,nnz;
3947   size_t            len;
3948   const PetscInt    *indx;
3949   PetscViewer       out;
3950   char              *name;
3951   Mat               B;
3952   const PetscScalar *values;
3953 
3954   PetscFunctionBegin;
3955   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3956   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3957   /* Should this be the type of the diagonal block of A? */
3958   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3959   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3960   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3961   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3962   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3963   for (i=0;i<m;i++) {
3964     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3965     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3966     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3967   }
3968   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3969   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3970 
3971   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3972   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3973   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3974   sprintf(name,"%s.%d",outfile,rank);
3975   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3976   ierr = PetscFree(name);
3977   ierr = MatView(B,out);CHKERRQ(ierr);
3978   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3979   ierr = MatDestroy(B);CHKERRQ(ierr);
3980   PetscFunctionReturn(0);
3981 }
3982 
3983 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3984 #undef __FUNCT__
3985 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3986 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3987 {
3988   PetscErrorCode       ierr;
3989   Mat_Merge_SeqsToMPI  *merge;
3990   PetscContainer       container;
3991 
3992   PetscFunctionBegin;
3993   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3994   if (container) {
3995     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3996     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3997     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3998     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3999     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4000     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4001     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4002     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4003     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4004     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4005     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4006     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4007     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4008     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4009 
4010     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4011     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4012   }
4013   ierr = PetscFree(merge);CHKERRQ(ierr);
4014 
4015   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4016   PetscFunctionReturn(0);
4017 }
4018 
4019 #include "../src/mat/utils/freespace.h"
4020 #include "petscbt.h"
4021 
4022 #undef __FUNCT__
4023 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4024 /*@C
4025       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4026                  matrices from each processor
4027 
4028     Collective on MPI_Comm
4029 
4030    Input Parameters:
4031 +    comm - the communicators the parallel matrix will live on
4032 .    seqmat - the input sequential matrices
4033 .    m - number of local rows (or PETSC_DECIDE)
4034 .    n - number of local columns (or PETSC_DECIDE)
4035 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4036 
4037    Output Parameter:
4038 .    mpimat - the parallel matrix generated
4039 
4040     Level: advanced
4041 
4042    Notes:
4043      The dimensions of the sequential matrix in each processor MUST be the same.
4044      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4045      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4046 @*/
4047 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4048 {
4049   PetscErrorCode       ierr;
4050   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4051   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4052   PetscMPIInt          size,rank,taga,*len_s;
4053   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4054   PetscInt             proc,m;
4055   PetscInt             **buf_ri,**buf_rj;
4056   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4057   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4058   MPI_Request          *s_waits,*r_waits;
4059   MPI_Status           *status;
4060   MatScalar            *aa=a->a;
4061   MatScalar            **abuf_r,*ba_i;
4062   Mat_Merge_SeqsToMPI  *merge;
4063   PetscContainer       container;
4064 
4065   PetscFunctionBegin;
4066   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4067 
4068   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4069   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4070 
4071   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4072   if (container) {
4073     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4074   }
4075   bi     = merge->bi;
4076   bj     = merge->bj;
4077   buf_ri = merge->buf_ri;
4078   buf_rj = merge->buf_rj;
4079 
4080   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4081   owners = merge->rowmap->range;
4082   len_s  = merge->len_s;
4083 
4084   /* send and recv matrix values */
4085   /*-----------------------------*/
4086   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4087   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4088 
4089   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4090   for (proc=0,k=0; proc<size; proc++){
4091     if (!len_s[proc]) continue;
4092     i = owners[proc];
4093     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4094     k++;
4095   }
4096 
4097   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4098   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4099   ierr = PetscFree(status);CHKERRQ(ierr);
4100 
4101   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4102   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4103 
4104   /* insert mat values of mpimat */
4105   /*----------------------------*/
4106   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4107   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4108 
4109   for (k=0; k<merge->nrecv; k++){
4110     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4111     nrows = *(buf_ri_k[k]);
4112     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4113     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4114   }
4115 
4116   /* set values of ba */
4117   m = merge->rowmap->n;
4118   for (i=0; i<m; i++) {
4119     arow = owners[rank] + i;
4120     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4121     bnzi = bi[i+1] - bi[i];
4122     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4123 
4124     /* add local non-zero vals of this proc's seqmat into ba */
4125     anzi = ai[arow+1] - ai[arow];
4126     aj   = a->j + ai[arow];
4127     aa   = a->a + ai[arow];
4128     nextaj = 0;
4129     for (j=0; nextaj<anzi; j++){
4130       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4131         ba_i[j] += aa[nextaj++];
4132       }
4133     }
4134 
4135     /* add received vals into ba */
4136     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4137       /* i-th row */
4138       if (i == *nextrow[k]) {
4139         anzi = *(nextai[k]+1) - *nextai[k];
4140         aj   = buf_rj[k] + *(nextai[k]);
4141         aa   = abuf_r[k] + *(nextai[k]);
4142         nextaj = 0;
4143         for (j=0; nextaj<anzi; j++){
4144           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4145             ba_i[j] += aa[nextaj++];
4146           }
4147         }
4148         nextrow[k]++; nextai[k]++;
4149       }
4150     }
4151     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4152   }
4153   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4154   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4155 
4156   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4157   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4158   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4159   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4160   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4161   PetscFunctionReturn(0);
4162 }
4163 
4164 #undef __FUNCT__
4165 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4166 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4167 {
4168   PetscErrorCode       ierr;
4169   Mat                  B_mpi;
4170   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4171   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4172   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4173   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4174   PetscInt             len,proc,*dnz,*onz;
4175   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4176   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4177   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4178   MPI_Status           *status;
4179   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4180   PetscBT              lnkbt;
4181   Mat_Merge_SeqsToMPI  *merge;
4182   PetscContainer       container;
4183 
4184   PetscFunctionBegin;
4185   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4186 
4187   /* make sure it is a PETSc comm */
4188   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4189   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4190   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4191 
4192   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4193   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4194 
4195   /* determine row ownership */
4196   /*---------------------------------------------------------*/
4197   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4198   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4199   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4200   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4201   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4202   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4203   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4204 
4205   m      = merge->rowmap->n;
4206   M      = merge->rowmap->N;
4207   owners = merge->rowmap->range;
4208 
4209   /* determine the number of messages to send, their lengths */
4210   /*---------------------------------------------------------*/
4211   len_s  = merge->len_s;
4212 
4213   len = 0;  /* length of buf_si[] */
4214   merge->nsend = 0;
4215   for (proc=0; proc<size; proc++){
4216     len_si[proc] = 0;
4217     if (proc == rank){
4218       len_s[proc] = 0;
4219     } else {
4220       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4221       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4222     }
4223     if (len_s[proc]) {
4224       merge->nsend++;
4225       nrows = 0;
4226       for (i=owners[proc]; i<owners[proc+1]; i++){
4227         if (ai[i+1] > ai[i]) nrows++;
4228       }
4229       len_si[proc] = 2*(nrows+1);
4230       len += len_si[proc];
4231     }
4232   }
4233 
4234   /* determine the number and length of messages to receive for ij-structure */
4235   /*-------------------------------------------------------------------------*/
4236   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4237   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4238 
4239   /* post the Irecv of j-structure */
4240   /*-------------------------------*/
4241   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4242   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4243 
4244   /* post the Isend of j-structure */
4245   /*--------------------------------*/
4246   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4247 
4248   for (proc=0, k=0; proc<size; proc++){
4249     if (!len_s[proc]) continue;
4250     i = owners[proc];
4251     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4252     k++;
4253   }
4254 
4255   /* receives and sends of j-structure are complete */
4256   /*------------------------------------------------*/
4257   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4258   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4259 
4260   /* send and recv i-structure */
4261   /*---------------------------*/
4262   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4263   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4264 
4265   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4266   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4267   for (proc=0,k=0; proc<size; proc++){
4268     if (!len_s[proc]) continue;
4269     /* form outgoing message for i-structure:
4270          buf_si[0]:                 nrows to be sent
4271                [1:nrows]:           row index (global)
4272                [nrows+1:2*nrows+1]: i-structure index
4273     */
4274     /*-------------------------------------------*/
4275     nrows = len_si[proc]/2 - 1;
4276     buf_si_i    = buf_si + nrows+1;
4277     buf_si[0]   = nrows;
4278     buf_si_i[0] = 0;
4279     nrows = 0;
4280     for (i=owners[proc]; i<owners[proc+1]; i++){
4281       anzi = ai[i+1] - ai[i];
4282       if (anzi) {
4283         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4284         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4285         nrows++;
4286       }
4287     }
4288     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4289     k++;
4290     buf_si += len_si[proc];
4291   }
4292 
4293   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4294   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4295 
4296   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4297   for (i=0; i<merge->nrecv; i++){
4298     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4299   }
4300 
4301   ierr = PetscFree(len_si);CHKERRQ(ierr);
4302   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4303   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4304   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4305   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4306   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4307   ierr = PetscFree(status);CHKERRQ(ierr);
4308 
4309   /* compute a local seq matrix in each processor */
4310   /*----------------------------------------------*/
4311   /* allocate bi array and free space for accumulating nonzero column info */
4312   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4313   bi[0] = 0;
4314 
4315   /* create and initialize a linked list */
4316   nlnk = N+1;
4317   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4318 
4319   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4320   len = 0;
4321   len  = ai[owners[rank+1]] - ai[owners[rank]];
4322   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4323   current_space = free_space;
4324 
4325   /* determine symbolic info for each local row */
4326   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4327 
4328   for (k=0; k<merge->nrecv; k++){
4329     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4330     nrows = *buf_ri_k[k];
4331     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4332     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4333   }
4334 
4335   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4336   len = 0;
4337   for (i=0;i<m;i++) {
4338     bnzi   = 0;
4339     /* add local non-zero cols of this proc's seqmat into lnk */
4340     arow   = owners[rank] + i;
4341     anzi   = ai[arow+1] - ai[arow];
4342     aj     = a->j + ai[arow];
4343     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4344     bnzi += nlnk;
4345     /* add received col data into lnk */
4346     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4347       if (i == *nextrow[k]) { /* i-th row */
4348         anzi = *(nextai[k]+1) - *nextai[k];
4349         aj   = buf_rj[k] + *nextai[k];
4350         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4351         bnzi += nlnk;
4352         nextrow[k]++; nextai[k]++;
4353       }
4354     }
4355     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4356 
4357     /* if free space is not available, make more free space */
4358     if (current_space->local_remaining<bnzi) {
4359       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4360       nspacedouble++;
4361     }
4362     /* copy data into free space, then initialize lnk */
4363     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4364     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4365 
4366     current_space->array           += bnzi;
4367     current_space->local_used      += bnzi;
4368     current_space->local_remaining -= bnzi;
4369 
4370     bi[i+1] = bi[i] + bnzi;
4371   }
4372 
4373   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4374 
4375   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4376   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4377   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4378 
4379   /* create symbolic parallel matrix B_mpi */
4380   /*---------------------------------------*/
4381   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4382   if (n==PETSC_DECIDE) {
4383     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4384   } else {
4385     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4386   }
4387   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4388   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4389   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4390 
4391   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4392   B_mpi->assembled     = PETSC_FALSE;
4393   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4394   merge->bi            = bi;
4395   merge->bj            = bj;
4396   merge->buf_ri        = buf_ri;
4397   merge->buf_rj        = buf_rj;
4398   merge->coi           = PETSC_NULL;
4399   merge->coj           = PETSC_NULL;
4400   merge->owners_co     = PETSC_NULL;
4401 
4402   /* attach the supporting struct to B_mpi for reuse */
4403   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4404   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4405   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4406   *mpimat = B_mpi;
4407 
4408   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4409   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4410   PetscFunctionReturn(0);
4411 }
4412 
4413 #undef __FUNCT__
4414 #define __FUNCT__ "MatMerge_SeqsToMPI"
4415 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4416 {
4417   PetscErrorCode   ierr;
4418 
4419   PetscFunctionBegin;
4420   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4421   if (scall == MAT_INITIAL_MATRIX){
4422     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4423   }
4424   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4425   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4426   PetscFunctionReturn(0);
4427 }
4428 
4429 #undef __FUNCT__
4430 #define __FUNCT__ "MatGetLocalMat"
4431 /*@
4432      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4433 
4434     Not Collective
4435 
4436    Input Parameters:
4437 +    A - the matrix
4438 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4439 
4440    Output Parameter:
4441 .    A_loc - the local sequential matrix generated
4442 
4443     Level: developer
4444 
4445 @*/
4446 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4447 {
4448   PetscErrorCode  ierr;
4449   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4450   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4451   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4452   MatScalar       *aa=a->a,*ba=b->a,*cam;
4453   PetscScalar     *ca;
4454   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4455   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4456 
4457   PetscFunctionBegin;
4458   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4459   if (scall == MAT_INITIAL_MATRIX){
4460     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4461     ci[0] = 0;
4462     for (i=0; i<am; i++){
4463       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4464     }
4465     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4466     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4467     k = 0;
4468     for (i=0; i<am; i++) {
4469       ncols_o = bi[i+1] - bi[i];
4470       ncols_d = ai[i+1] - ai[i];
4471       /* off-diagonal portion of A */
4472       for (jo=0; jo<ncols_o; jo++) {
4473         col = cmap[*bj];
4474         if (col >= cstart) break;
4475         cj[k]   = col; bj++;
4476         ca[k++] = *ba++;
4477       }
4478       /* diagonal portion of A */
4479       for (j=0; j<ncols_d; j++) {
4480         cj[k]   = cstart + *aj++;
4481         ca[k++] = *aa++;
4482       }
4483       /* off-diagonal portion of A */
4484       for (j=jo; j<ncols_o; j++) {
4485         cj[k]   = cmap[*bj++];
4486         ca[k++] = *ba++;
4487       }
4488     }
4489     /* put together the new matrix */
4490     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4491     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4492     /* Since these are PETSc arrays, change flags to free them as necessary. */
4493     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4494     mat->free_a  = PETSC_TRUE;
4495     mat->free_ij = PETSC_TRUE;
4496     mat->nonew   = 0;
4497   } else if (scall == MAT_REUSE_MATRIX){
4498     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4499     ci = mat->i; cj = mat->j; cam = mat->a;
4500     for (i=0; i<am; i++) {
4501       /* off-diagonal portion of A */
4502       ncols_o = bi[i+1] - bi[i];
4503       for (jo=0; jo<ncols_o; jo++) {
4504         col = cmap[*bj];
4505         if (col >= cstart) break;
4506         *cam++ = *ba++; bj++;
4507       }
4508       /* diagonal portion of A */
4509       ncols_d = ai[i+1] - ai[i];
4510       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4511       /* off-diagonal portion of A */
4512       for (j=jo; j<ncols_o; j++) {
4513         *cam++ = *ba++; bj++;
4514       }
4515     }
4516   } else {
4517     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4518   }
4519 
4520   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4521   PetscFunctionReturn(0);
4522 }
4523 
4524 #undef __FUNCT__
4525 #define __FUNCT__ "MatGetLocalMatCondensed"
4526 /*@C
4527      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4528 
4529     Not Collective
4530 
4531    Input Parameters:
4532 +    A - the matrix
4533 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4534 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4535 
4536    Output Parameter:
4537 .    A_loc - the local sequential matrix generated
4538 
4539     Level: developer
4540 
4541 @*/
4542 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4543 {
4544   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4545   PetscErrorCode    ierr;
4546   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4547   IS                isrowa,iscola;
4548   Mat               *aloc;
4549 
4550   PetscFunctionBegin;
4551   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4552   if (!row){
4553     start = A->rmap->rstart; end = A->rmap->rend;
4554     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4555   } else {
4556     isrowa = *row;
4557   }
4558   if (!col){
4559     start = A->cmap->rstart;
4560     cmap  = a->garray;
4561     nzA   = a->A->cmap->n;
4562     nzB   = a->B->cmap->n;
4563     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4564     ncols = 0;
4565     for (i=0; i<nzB; i++) {
4566       if (cmap[i] < start) idx[ncols++] = cmap[i];
4567       else break;
4568     }
4569     imark = i;
4570     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4571     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4572     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4573     ierr = PetscFree(idx);CHKERRQ(ierr);
4574   } else {
4575     iscola = *col;
4576   }
4577   if (scall != MAT_INITIAL_MATRIX){
4578     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4579     aloc[0] = *A_loc;
4580   }
4581   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4582   *A_loc = aloc[0];
4583   ierr = PetscFree(aloc);CHKERRQ(ierr);
4584   if (!row){
4585     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4586   }
4587   if (!col){
4588     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4589   }
4590   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4591   PetscFunctionReturn(0);
4592 }
4593 
4594 #undef __FUNCT__
4595 #define __FUNCT__ "MatGetBrowsOfAcols"
4596 /*@C
4597     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4598 
4599     Collective on Mat
4600 
4601    Input Parameters:
4602 +    A,B - the matrices in mpiaij format
4603 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4604 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4605 
4606    Output Parameter:
4607 +    rowb, colb - index sets of rows and columns of B to extract
4608 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4609 -    B_seq - the sequential matrix generated
4610 
4611     Level: developer
4612 
4613 @*/
4614 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4615 {
4616   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4617   PetscErrorCode    ierr;
4618   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4619   IS                isrowb,iscolb;
4620   Mat               *bseq;
4621 
4622   PetscFunctionBegin;
4623   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4624     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4625   }
4626   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4627 
4628   if (scall == MAT_INITIAL_MATRIX){
4629     start = A->cmap->rstart;
4630     cmap  = a->garray;
4631     nzA   = a->A->cmap->n;
4632     nzB   = a->B->cmap->n;
4633     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4634     ncols = 0;
4635     for (i=0; i<nzB; i++) {  /* row < local row index */
4636       if (cmap[i] < start) idx[ncols++] = cmap[i];
4637       else break;
4638     }
4639     imark = i;
4640     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4641     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4642     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4643     ierr = PetscFree(idx);CHKERRQ(ierr);
4644     *brstart = imark;
4645     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4646   } else {
4647     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4648     isrowb = *rowb; iscolb = *colb;
4649     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4650     bseq[0] = *B_seq;
4651   }
4652   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4653   *B_seq = bseq[0];
4654   ierr = PetscFree(bseq);CHKERRQ(ierr);
4655   if (!rowb){
4656     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4657   } else {
4658     *rowb = isrowb;
4659   }
4660   if (!colb){
4661     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4662   } else {
4663     *colb = iscolb;
4664   }
4665   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4666   PetscFunctionReturn(0);
4667 }
4668 
4669 #undef __FUNCT__
4670 #define __FUNCT__ "MatGetBrowsOfAoCols"
4671 /*@C
4672     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4673     of the OFF-DIAGONAL portion of local A
4674 
4675     Collective on Mat
4676 
4677    Input Parameters:
4678 +    A,B - the matrices in mpiaij format
4679 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4680 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4681 .    startsj_r - similar to startsj for receives
4682 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4683 
4684    Output Parameter:
4685 +    B_oth - the sequential matrix generated
4686 
4687     Level: developer
4688 
4689 @*/
4690 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4691 {
4692   VecScatter_MPI_General *gen_to,*gen_from;
4693   PetscErrorCode         ierr;
4694   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4695   Mat_SeqAIJ             *b_oth;
4696   VecScatter             ctx=a->Mvctx;
4697   MPI_Comm               comm=((PetscObject)ctx)->comm;
4698   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4699   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4700   PetscScalar            *rvalues,*svalues;
4701   MatScalar              *b_otha,*bufa,*bufA;
4702   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4703   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4704   MPI_Status             *sstatus,rstatus;
4705   PetscMPIInt            jj;
4706   PetscInt               *cols,sbs,rbs;
4707   PetscScalar            *vals;
4708 
4709   PetscFunctionBegin;
4710   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4711     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4712   }
4713   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4714   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4715 
4716   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4717   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4718   rvalues  = gen_from->values; /* holds the length of receiving row */
4719   svalues  = gen_to->values;   /* holds the length of sending row */
4720   nrecvs   = gen_from->n;
4721   nsends   = gen_to->n;
4722 
4723   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4724   srow     = gen_to->indices;   /* local row index to be sent */
4725   sstarts  = gen_to->starts;
4726   sprocs   = gen_to->procs;
4727   sstatus  = gen_to->sstatus;
4728   sbs      = gen_to->bs;
4729   rstarts  = gen_from->starts;
4730   rprocs   = gen_from->procs;
4731   rbs      = gen_from->bs;
4732 
4733   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4734   if (scall == MAT_INITIAL_MATRIX){
4735     /* i-array */
4736     /*---------*/
4737     /*  post receives */
4738     for (i=0; i<nrecvs; i++){
4739       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4740       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4741       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4742     }
4743 
4744     /* pack the outgoing message */
4745     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4746     sstartsj[0] = 0;  rstartsj[0] = 0;
4747     len = 0; /* total length of j or a array to be sent */
4748     k = 0;
4749     for (i=0; i<nsends; i++){
4750       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4751       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4752       for (j=0; j<nrows; j++) {
4753         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4754         for (l=0; l<sbs; l++){
4755           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4756           rowlen[j*sbs+l] = ncols;
4757           len += ncols;
4758           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4759         }
4760         k++;
4761       }
4762       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4763       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4764     }
4765     /* recvs and sends of i-array are completed */
4766     i = nrecvs;
4767     while (i--) {
4768       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4769     }
4770     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4771 
4772     /* allocate buffers for sending j and a arrays */
4773     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4774     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4775 
4776     /* create i-array of B_oth */
4777     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4778     b_othi[0] = 0;
4779     len = 0; /* total length of j or a array to be received */
4780     k = 0;
4781     for (i=0; i<nrecvs; i++){
4782       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4783       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4784       for (j=0; j<nrows; j++) {
4785         b_othi[k+1] = b_othi[k] + rowlen[j];
4786         len += rowlen[j]; k++;
4787       }
4788       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4789     }
4790 
4791     /* allocate space for j and a arrrays of B_oth */
4792     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4793     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4794 
4795     /* j-array */
4796     /*---------*/
4797     /*  post receives of j-array */
4798     for (i=0; i<nrecvs; i++){
4799       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4800       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4801     }
4802 
4803     /* pack the outgoing message j-array */
4804     k = 0;
4805     for (i=0; i<nsends; i++){
4806       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4807       bufJ = bufj+sstartsj[i];
4808       for (j=0; j<nrows; j++) {
4809         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4810         for (ll=0; ll<sbs; ll++){
4811           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4812           for (l=0; l<ncols; l++){
4813             *bufJ++ = cols[l];
4814           }
4815           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4816         }
4817       }
4818       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4819     }
4820 
4821     /* recvs and sends of j-array are completed */
4822     i = nrecvs;
4823     while (i--) {
4824       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4825     }
4826     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4827   } else if (scall == MAT_REUSE_MATRIX){
4828     sstartsj = *startsj;
4829     rstartsj = *startsj_r;
4830     bufa     = *bufa_ptr;
4831     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4832     b_otha   = b_oth->a;
4833   } else {
4834     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4835   }
4836 
4837   /* a-array */
4838   /*---------*/
4839   /*  post receives of a-array */
4840   for (i=0; i<nrecvs; i++){
4841     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4842     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4843   }
4844 
4845   /* pack the outgoing message a-array */
4846   k = 0;
4847   for (i=0; i<nsends; i++){
4848     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4849     bufA = bufa+sstartsj[i];
4850     for (j=0; j<nrows; j++) {
4851       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4852       for (ll=0; ll<sbs; ll++){
4853         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4854         for (l=0; l<ncols; l++){
4855           *bufA++ = vals[l];
4856         }
4857         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4858       }
4859     }
4860     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4861   }
4862   /* recvs and sends of a-array are completed */
4863   i = nrecvs;
4864   while (i--) {
4865     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4866   }
4867   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4868   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4869 
4870   if (scall == MAT_INITIAL_MATRIX){
4871     /* put together the new matrix */
4872     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4873 
4874     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4875     /* Since these are PETSc arrays, change flags to free them as necessary. */
4876     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4877     b_oth->free_a  = PETSC_TRUE;
4878     b_oth->free_ij = PETSC_TRUE;
4879     b_oth->nonew   = 0;
4880 
4881     ierr = PetscFree(bufj);CHKERRQ(ierr);
4882     if (!startsj || !bufa_ptr){
4883       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4884       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4885     } else {
4886       *startsj   = sstartsj;
4887       *startsj_r = rstartsj;
4888       *bufa_ptr  = bufa;
4889     }
4890   }
4891   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4892   PetscFunctionReturn(0);
4893 }
4894 
4895 #undef __FUNCT__
4896 #define __FUNCT__ "MatGetCommunicationStructs"
4897 /*@C
4898   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4899 
4900   Not Collective
4901 
4902   Input Parameters:
4903 . A - The matrix in mpiaij format
4904 
4905   Output Parameter:
4906 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4907 . colmap - A map from global column index to local index into lvec
4908 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4909 
4910   Level: developer
4911 
4912 @*/
4913 #if defined (PETSC_USE_CTABLE)
4914 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4915 #else
4916 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4917 #endif
4918 {
4919   Mat_MPIAIJ *a;
4920 
4921   PetscFunctionBegin;
4922   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4923   PetscValidPointer(lvec, 2);
4924   PetscValidPointer(colmap, 3);
4925   PetscValidPointer(multScatter, 4);
4926   a = (Mat_MPIAIJ *) A->data;
4927   if (lvec) *lvec = a->lvec;
4928   if (colmap) *colmap = a->colmap;
4929   if (multScatter) *multScatter = a->Mvctx;
4930   PetscFunctionReturn(0);
4931 }
4932 
4933 EXTERN_C_BEGIN
4934 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4935 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4936 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
4937 EXTERN_C_END
4938 
4939 #undef __FUNCT__
4940 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4941 /*
4942     Computes (B'*A')' since computing B*A directly is untenable
4943 
4944                n                       p                          p
4945         (              )       (              )         (                  )
4946       m (      A       )  *  n (       B      )   =   m (         C        )
4947         (              )       (              )         (                  )
4948 
4949 */
4950 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4951 {
4952   PetscErrorCode     ierr;
4953   Mat                At,Bt,Ct;
4954 
4955   PetscFunctionBegin;
4956   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4957   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4958   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4959   ierr = MatDestroy(At);CHKERRQ(ierr);
4960   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4961   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4962   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4963   PetscFunctionReturn(0);
4964 }
4965 
4966 #undef __FUNCT__
4967 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4968 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4969 {
4970   PetscErrorCode ierr;
4971   PetscInt       m=A->rmap->n,n=B->cmap->n;
4972   Mat            Cmat;
4973 
4974   PetscFunctionBegin;
4975   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4976   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4977   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4978   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4979   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4980   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4981   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4982   *C   = Cmat;
4983   PetscFunctionReturn(0);
4984 }
4985 
4986 /* ----------------------------------------------------------------*/
4987 #undef __FUNCT__
4988 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4989 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4990 {
4991   PetscErrorCode ierr;
4992 
4993   PetscFunctionBegin;
4994   if (scall == MAT_INITIAL_MATRIX){
4995     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4996   }
4997   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4998   PetscFunctionReturn(0);
4999 }
5000 
5001 EXTERN_C_BEGIN
5002 #if defined(PETSC_HAVE_MUMPS)
5003 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5004 #endif
5005 #if defined(PETSC_HAVE_PASTIX)
5006 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5007 #endif
5008 #if defined(PETSC_HAVE_SUPERLU_DIST)
5009 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5010 #endif
5011 #if defined(PETSC_HAVE_SPOOLES)
5012 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5013 #endif
5014 EXTERN_C_END
5015 
5016 /*MC
5017    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5018 
5019    Options Database Keys:
5020 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5021 
5022   Level: beginner
5023 
5024 .seealso: MatCreateMPIAIJ()
5025 M*/
5026 
5027 EXTERN_C_BEGIN
5028 #undef __FUNCT__
5029 #define __FUNCT__ "MatCreate_MPIAIJ"
5030 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5031 {
5032   Mat_MPIAIJ     *b;
5033   PetscErrorCode ierr;
5034   PetscMPIInt    size;
5035 
5036   PetscFunctionBegin;
5037   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5038 
5039   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5040   B->data         = (void*)b;
5041   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5042   B->rmap->bs     = 1;
5043   B->assembled    = PETSC_FALSE;
5044   B->mapping      = 0;
5045 
5046   B->insertmode   = NOT_SET_VALUES;
5047   b->size         = size;
5048   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5049 
5050   /* build cache for off array entries formed */
5051   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5052   b->donotstash  = PETSC_FALSE;
5053   b->colmap      = 0;
5054   b->garray      = 0;
5055   b->roworiented = PETSC_TRUE;
5056 
5057   /* stuff used for matrix vector multiply */
5058   b->lvec      = PETSC_NULL;
5059   b->Mvctx     = PETSC_NULL;
5060 
5061   /* stuff for MatGetRow() */
5062   b->rowindices   = 0;
5063   b->rowvalues    = 0;
5064   b->getrowactive = PETSC_FALSE;
5065 
5066 #if defined(PETSC_HAVE_SPOOLES)
5067   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5068                                      "MatGetFactor_mpiaij_spooles",
5069                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5070 #endif
5071 #if defined(PETSC_HAVE_MUMPS)
5072   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5073                                      "MatGetFactor_aij_mumps",
5074                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5075 #endif
5076 #if defined(PETSC_HAVE_PASTIX)
5077   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5078 					   "MatGetFactor_mpiaij_pastix",
5079 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5080 #endif
5081 #if defined(PETSC_HAVE_SUPERLU_DIST)
5082   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5083                                      "MatGetFactor_mpiaij_superlu_dist",
5084                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5085 #endif
5086   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5087                                      "MatStoreValues_MPIAIJ",
5088                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5089   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5090                                      "MatRetrieveValues_MPIAIJ",
5091                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5092   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5093 				     "MatGetDiagonalBlock_MPIAIJ",
5094                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5095   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5096 				     "MatIsTranspose_MPIAIJ",
5097 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5098   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5099 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5100 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5101   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5102 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5103 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5104   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5105 				     "MatDiagonalScaleLocal_MPIAIJ",
5106 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5107   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5108                                      "MatConvert_MPIAIJ_MPICSRPERM",
5109                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5110   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5111                                      "MatConvert_MPIAIJ_MPICRL",
5112                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5113   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5114                                      "MatConvert_MPIAIJ_MPISBAIJ",
5115                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5116   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5117                                      "MatMatMult_MPIDense_MPIAIJ",
5118                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5119   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5120                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5121                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5122   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5123                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5124                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5125   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5126   PetscFunctionReturn(0);
5127 }
5128 EXTERN_C_END
5129 
5130 #undef __FUNCT__
5131 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5132 /*@
5133      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5134          and "off-diagonal" part of the matrix in CSR format.
5135 
5136    Collective on MPI_Comm
5137 
5138    Input Parameters:
5139 +  comm - MPI communicator
5140 .  m - number of local rows (Cannot be PETSC_DECIDE)
5141 .  n - This value should be the same as the local size used in creating the
5142        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5143        calculated if N is given) For square matrices n is almost always m.
5144 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5145 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5146 .   i - row indices for "diagonal" portion of matrix
5147 .   j - column indices
5148 .   a - matrix values
5149 .   oi - row indices for "off-diagonal" portion of matrix
5150 .   oj - column indices
5151 -   oa - matrix values
5152 
5153    Output Parameter:
5154 .   mat - the matrix
5155 
5156    Level: advanced
5157 
5158    Notes:
5159        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5160 
5161        The i and j indices are 0 based
5162 
5163        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5164 
5165        This sets local rows and cannot be used to set off-processor values.
5166 
5167        You cannot later use MatSetValues() to change values in this matrix.
5168 
5169 .keywords: matrix, aij, compressed row, sparse, parallel
5170 
5171 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5172           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5173 @*/
5174 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5175 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5176 {
5177   PetscErrorCode ierr;
5178   Mat_MPIAIJ     *maij;
5179 
5180  PetscFunctionBegin;
5181   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5182   if (i[0]) {
5183     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5184   }
5185   if (oi[0]) {
5186     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5187   }
5188   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5189   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5190   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5191   maij = (Mat_MPIAIJ*) (*mat)->data;
5192   maij->donotstash     = PETSC_TRUE;
5193   (*mat)->preallocated = PETSC_TRUE;
5194 
5195   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5196   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5197   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5198   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5199 
5200   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5201   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5202 
5203   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5204   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5205   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5206   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5207 
5208   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5209   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5210   PetscFunctionReturn(0);
5211 }
5212 
5213 /*
5214     Special version for direct calls from Fortran
5215 */
5216 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5217 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5218 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5219 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5220 #endif
5221 
5222 /* Change these macros so can be used in void function */
5223 #undef CHKERRQ
5224 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5225 #undef SETERRQ2
5226 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5227 #undef SETERRQ
5228 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5229 
5230 EXTERN_C_BEGIN
5231 #undef __FUNCT__
5232 #define __FUNCT__ "matsetvaluesmpiaij_"
5233 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5234 {
5235   Mat             mat = *mmat;
5236   PetscInt        m = *mm, n = *mn;
5237   InsertMode      addv = *maddv;
5238   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5239   PetscScalar     value;
5240   PetscErrorCode  ierr;
5241 
5242   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5243   if (mat->insertmode == NOT_SET_VALUES) {
5244     mat->insertmode = addv;
5245   }
5246 #if defined(PETSC_USE_DEBUG)
5247   else if (mat->insertmode != addv) {
5248     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5249   }
5250 #endif
5251   {
5252   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5253   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5254   PetscTruth      roworiented = aij->roworiented;
5255 
5256   /* Some Variables required in the macro */
5257   Mat             A = aij->A;
5258   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5259   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5260   MatScalar       *aa = a->a;
5261   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5262   Mat             B = aij->B;
5263   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5264   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5265   MatScalar       *ba = b->a;
5266 
5267   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5268   PetscInt        nonew = a->nonew;
5269   MatScalar       *ap1,*ap2;
5270 
5271   PetscFunctionBegin;
5272   for (i=0; i<m; i++) {
5273     if (im[i] < 0) continue;
5274 #if defined(PETSC_USE_DEBUG)
5275     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5276 #endif
5277     if (im[i] >= rstart && im[i] < rend) {
5278       row      = im[i] - rstart;
5279       lastcol1 = -1;
5280       rp1      = aj + ai[row];
5281       ap1      = aa + ai[row];
5282       rmax1    = aimax[row];
5283       nrow1    = ailen[row];
5284       low1     = 0;
5285       high1    = nrow1;
5286       lastcol2 = -1;
5287       rp2      = bj + bi[row];
5288       ap2      = ba + bi[row];
5289       rmax2    = bimax[row];
5290       nrow2    = bilen[row];
5291       low2     = 0;
5292       high2    = nrow2;
5293 
5294       for (j=0; j<n; j++) {
5295         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5296         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5297         if (in[j] >= cstart && in[j] < cend){
5298           col = in[j] - cstart;
5299           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5300         } else if (in[j] < 0) continue;
5301 #if defined(PETSC_USE_DEBUG)
5302         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5303 #endif
5304         else {
5305           if (mat->was_assembled) {
5306             if (!aij->colmap) {
5307               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5308             }
5309 #if defined (PETSC_USE_CTABLE)
5310             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5311 	    col--;
5312 #else
5313             col = aij->colmap[in[j]] - 1;
5314 #endif
5315             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5316               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5317               col =  in[j];
5318               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5319               B = aij->B;
5320               b = (Mat_SeqAIJ*)B->data;
5321               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5322               rp2      = bj + bi[row];
5323               ap2      = ba + bi[row];
5324               rmax2    = bimax[row];
5325               nrow2    = bilen[row];
5326               low2     = 0;
5327               high2    = nrow2;
5328               bm       = aij->B->rmap->n;
5329               ba = b->a;
5330             }
5331           } else col = in[j];
5332           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5333         }
5334       }
5335     } else {
5336       if (!aij->donotstash) {
5337         if (roworiented) {
5338           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5339         } else {
5340           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5341         }
5342       }
5343     }
5344   }}
5345   PetscFunctionReturnVoid();
5346 }
5347 EXTERN_C_END
5348 
5349