xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 12fe8d350da2a1fb5ce0234d2abcef2a979c3670)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
207 { \
208     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
209     lastcol1 = col;\
210     while (high1-low1 > 5) { \
211       t = (low1+high1)/2; \
212       if (rp1[t] > col) high1 = t; \
213       else             low1  = t; \
214     } \
215       for (_i=low1; _i<high1; _i++) { \
216         if (rp1[_i] > col) break; \
217         if (rp1[_i] == col) { \
218           if (addv == ADD_VALUES) ap1[_i] += value;   \
219           else                    ap1[_i] = value; \
220           goto a_noinsert; \
221         } \
222       }  \
223       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
224       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
225       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
226       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
227       N = nrow1++ - 1; a->nz++; high1++; \
228       /* shift up all the later entries in this row */ \
229       for (ii=N; ii>=_i; ii--) { \
230         rp1[ii+1] = rp1[ii]; \
231         ap1[ii+1] = ap1[ii]; \
232       } \
233       rp1[_i] = col;  \
234       ap1[_i] = value;  \
235       a_noinsert: ; \
236       ailen[row] = nrow1; \
237 }
238 
239 
240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
241 { \
242     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
243     lastcol2 = col;\
244     while (high2-low2 > 5) { \
245       t = (low2+high2)/2; \
246       if (rp2[t] > col) high2 = t; \
247       else             low2  = t; \
248     } \
249     for (_i=low2; _i<high2; _i++) {		\
250       if (rp2[_i] > col) break;			\
251       if (rp2[_i] == col) {			      \
252 	if (addv == ADD_VALUES) ap2[_i] += value;     \
253 	else                    ap2[_i] = value;      \
254 	goto b_noinsert;			      \
255       }						      \
256     }							      \
257     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
258     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
259     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
260     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
261     N = nrow2++ - 1; b->nz++; high2++;					\
262     /* shift up all the later entries in this row */			\
263     for (ii=N; ii>=_i; ii--) {						\
264       rp2[ii+1] = rp2[ii];						\
265       ap2[ii+1] = ap2[ii];						\
266     }									\
267     rp2[_i] = col;							\
268     ap2[_i] = value;							\
269     b_noinsert: ;								\
270     bilen[row] = nrow2;							\
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
276 {
277   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
278   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
279   PetscErrorCode ierr;
280   PetscInt       l,*garray = mat->garray,diag;
281 
282   PetscFunctionBegin;
283   /* code only works for square matrices A */
284 
285   /* find size of row to the left of the diagonal part */
286   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
287   row  = row - diag;
288   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
289     if (garray[b->j[b->i[row]+l]] > diag) break;
290   }
291   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
292 
293   /* diagonal part */
294   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
295 
296   /* right of diagonal part */
297   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatSetValues_MPIAIJ"
303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscScalar    value;
307   PetscErrorCode ierr;
308   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
309   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
310   PetscTruth     roworiented = aij->roworiented;
311 
312   /* Some Variables required in the macro */
313   Mat            A = aij->A;
314   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
315   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
316   MatScalar      *aa = a->a;
317   PetscTruth     ignorezeroentries = a->ignorezeroentries;
318   Mat            B = aij->B;
319   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
320   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
321   MatScalar      *ba = b->a;
322 
323   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
324   PetscInt       nonew = a->nonew;
325   MatScalar      *ap1,*ap2;
326 
327   PetscFunctionBegin;
328   if (v) PetscValidScalarPointer(v,6);
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (!aij->donotstash) {
394         if (roworiented) {
395           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
396         } else {
397           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
398         }
399       }
400     }
401   }
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatGetValues_MPIAIJ"
407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
408 {
409   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
410   PetscErrorCode ierr;
411   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
412   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
413 
414   PetscFunctionBegin;
415   for (i=0; i<m; i++) {
416     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
417     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
418     if (idxm[i] >= rstart && idxm[i] < rend) {
419       row = idxm[i] - rstart;
420       for (j=0; j<n; j++) {
421         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
422         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
423         if (idxn[j] >= cstart && idxn[j] < cend){
424           col = idxn[j] - cstart;
425           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
426         } else {
427           if (!aij->colmap) {
428             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
429           }
430 #if defined (PETSC_USE_CTABLE)
431           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
432           col --;
433 #else
434           col = aij->colmap[idxn[j]] - 1;
435 #endif
436           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
437           else {
438             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
439           }
440         }
441       }
442     } else {
443       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
444     }
445   }
446   PetscFunctionReturn(0);
447 }
448 
449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
454 {
455   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
456   PetscErrorCode ierr;
457   PetscInt       nstash,reallocs;
458   InsertMode     addv;
459 
460   PetscFunctionBegin;
461   if (aij->donotstash) {
462     PetscFunctionReturn(0);
463   }
464 
465   /* make sure all processors are either in INSERTMODE or ADDMODE */
466   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
467   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
468   mat->insertmode = addv; /* in case this processor had no cache */
469 
470   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
471   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
472   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNCT__
477 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
478 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
479 {
480   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
481   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
482   PetscErrorCode ierr;
483   PetscMPIInt    n;
484   PetscInt       i,j,rstart,ncols,flg;
485   PetscInt       *row,*col;
486   PetscTruth     other_disassembled;
487   PetscScalar    *val;
488   InsertMode     addv = mat->insertmode;
489 
490   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
491   PetscFunctionBegin;
492   if (!aij->donotstash) {
493     while (1) {
494       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
495       if (!flg) break;
496 
497       for (i=0; i<n;) {
498         /* Now identify the consecutive vals belonging to the same row */
499         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
500         if (j < n) ncols = j-i;
501         else       ncols = n-i;
502         /* Now assemble all these values with a single function call */
503         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
504         i = j;
505       }
506     }
507     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
508   }
509   a->compressedrow.use     = PETSC_FALSE;
510   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
511   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
512 
513   /* determine if any processor has disassembled, if so we must
514      also disassemble ourselfs, in order that we may reassemble. */
515   /*
516      if nonzero structure of submatrix B cannot change then we know that
517      no processor disassembled thus we can skip this stuff
518   */
519   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
520     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
521     if (mat->was_assembled && !other_disassembled) {
522       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523     }
524   }
525   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
526     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
527   }
528   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
529   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
530   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
531   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
532 
533   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
534   aij->rowvalues = 0;
535 
536   /* used by MatAXPY() */
537   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
538   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
539 
540   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
541   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
542   PetscFunctionReturn(0);
543 }
544 
545 #undef __FUNCT__
546 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
547 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
548 {
549   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
550   PetscErrorCode ierr;
551 
552   PetscFunctionBegin;
553   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
554   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
555   PetscFunctionReturn(0);
556 }
557 
558 #undef __FUNCT__
559 #define __FUNCT__ "MatZeroRows_MPIAIJ"
560 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
561 {
562   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
563   PetscErrorCode ierr;
564   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
565   PetscInt       i,*owners = A->rmap->range;
566   PetscInt       *nprocs,j,idx,nsends,row;
567   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
568   PetscInt       *rvalues,count,base,slen,*source;
569   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
570   MPI_Comm       comm = ((PetscObject)A)->comm;
571   MPI_Request    *send_waits,*recv_waits;
572   MPI_Status     recv_status,*send_status;
573 #if defined(PETSC_DEBUG)
574   PetscTruth     found = PETSC_FALSE;
575 #endif
576 
577   PetscFunctionBegin;
578   /*  first count number of contributors to each processor */
579   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
580   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
581   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
582   j = 0;
583   for (i=0; i<N; i++) {
584     if (lastidx > (idx = rows[i])) j = 0;
585     lastidx = idx;
586     for (; j<size; j++) {
587       if (idx >= owners[j] && idx < owners[j+1]) {
588         nprocs[2*j]++;
589         nprocs[2*j+1] = 1;
590         owner[i] = j;
591 #if defined(PETSC_DEBUG)
592         found = PETSC_TRUE;
593 #endif
594         break;
595       }
596     }
597 #if defined(PETSC_DEBUG)
598     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
599     found = PETSC_FALSE;
600 #endif
601   }
602   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
603 
604   /* inform other processors of number of messages and max length*/
605   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
606 
607   /* post receives:   */
608   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
609   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
610   for (i=0; i<nrecvs; i++) {
611     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
612   }
613 
614   /* do sends:
615       1) starts[i] gives the starting index in svalues for stuff going to
616          the ith processor
617   */
618   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
619   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
620   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
621   starts[0] = 0;
622   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
623   for (i=0; i<N; i++) {
624     svalues[starts[owner[i]]++] = rows[i];
625   }
626 
627   starts[0] = 0;
628   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
629   count = 0;
630   for (i=0; i<size; i++) {
631     if (nprocs[2*i+1]) {
632       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
633     }
634   }
635   ierr = PetscFree(starts);CHKERRQ(ierr);
636 
637   base = owners[rank];
638 
639   /*  wait on receives */
640   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
641   count  = nrecvs; slen = 0;
642   while (count) {
643     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
644     /* unpack receives into our local space */
645     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
646     source[imdex]  = recv_status.MPI_SOURCE;
647     lens[imdex]    = n;
648     slen          += n;
649     count--;
650   }
651   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
652 
653   /* move the data into the send scatter */
654   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
655   count = 0;
656   for (i=0; i<nrecvs; i++) {
657     values = rvalues + i*nmax;
658     for (j=0; j<lens[i]; j++) {
659       lrows[count++] = values[j] - base;
660     }
661   }
662   ierr = PetscFree(rvalues);CHKERRQ(ierr);
663   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
664   ierr = PetscFree(owner);CHKERRQ(ierr);
665   ierr = PetscFree(nprocs);CHKERRQ(ierr);
666 
667   /* actually zap the local rows */
668   /*
669         Zero the required rows. If the "diagonal block" of the matrix
670      is square and the user wishes to set the diagonal we use separate
671      code so that MatSetValues() is not called for each diagonal allocating
672      new memory, thus calling lots of mallocs and slowing things down.
673 
674   */
675   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
676   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
677   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
678     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
679   } else if (diag != 0.0) {
680     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
681     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
682       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
683 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
684     }
685     for (i = 0; i < slen; i++) {
686       row  = lrows[i] + rstart;
687       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
688     }
689     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
690     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
691   } else {
692     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
693   }
694   ierr = PetscFree(lrows);CHKERRQ(ierr);
695 
696   /* wait on sends */
697   if (nsends) {
698     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
699     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
700     ierr = PetscFree(send_status);CHKERRQ(ierr);
701   }
702   ierr = PetscFree(send_waits);CHKERRQ(ierr);
703   ierr = PetscFree(svalues);CHKERRQ(ierr);
704 
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatMult_MPIAIJ"
710 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
711 {
712   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
713   PetscErrorCode ierr;
714   PetscInt       nt;
715 
716   PetscFunctionBegin;
717   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
718   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
719   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
720   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
721   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
722   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
723   PetscFunctionReturn(0);
724 }
725 
726 #undef __FUNCT__
727 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
728 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
729 {
730   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
731   PetscErrorCode ierr;
732 
733   PetscFunctionBegin;
734   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
735   PetscFunctionReturn(0);
736 }
737 
738 #undef __FUNCT__
739 #define __FUNCT__ "MatMultAdd_MPIAIJ"
740 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
741 {
742   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
743   PetscErrorCode ierr;
744 
745   PetscFunctionBegin;
746   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
747   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
748   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
749   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
750   PetscFunctionReturn(0);
751 }
752 
753 #undef __FUNCT__
754 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
755 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
756 {
757   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
758   PetscErrorCode ierr;
759   PetscTruth     merged;
760 
761   PetscFunctionBegin;
762   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
763   /* do nondiagonal part */
764   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
765   if (!merged) {
766     /* send it on its way */
767     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
768     /* do local part */
769     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
770     /* receive remote parts: note this assumes the values are not actually */
771     /* added in yy until the next line, */
772     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773   } else {
774     /* do local part */
775     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
776     /* send it on its way */
777     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
778     /* values actually were received in the Begin() but we need to call this nop */
779     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
780   }
781   PetscFunctionReturn(0);
782 }
783 
784 EXTERN_C_BEGIN
785 #undef __FUNCT__
786 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
787 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
788 {
789   MPI_Comm       comm;
790   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
791   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
792   IS             Me,Notme;
793   PetscErrorCode ierr;
794   PetscInt       M,N,first,last,*notme,i;
795   PetscMPIInt    size;
796 
797   PetscFunctionBegin;
798 
799   /* Easy test: symmetric diagonal block */
800   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
801   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
802   if (!*f) PetscFunctionReturn(0);
803   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
804   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
805   if (size == 1) PetscFunctionReturn(0);
806 
807   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
808   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
809   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
810   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
811   for (i=0; i<first; i++) notme[i] = i;
812   for (i=last; i<M; i++) notme[i-last+first] = i;
813   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
814   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
815   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
816   Aoff = Aoffs[0];
817   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
818   Boff = Boffs[0];
819   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
820   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
821   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
822   ierr = ISDestroy(Me);CHKERRQ(ierr);
823   ierr = ISDestroy(Notme);CHKERRQ(ierr);
824 
825   PetscFunctionReturn(0);
826 }
827 EXTERN_C_END
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
831 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
832 {
833   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
834   PetscErrorCode ierr;
835 
836   PetscFunctionBegin;
837   /* do nondiagonal part */
838   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
839   /* send it on its way */
840   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
841   /* do local part */
842   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
843   /* receive remote parts */
844   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
845   PetscFunctionReturn(0);
846 }
847 
848 /*
849   This only works correctly for square matrices where the subblock A->A is the
850    diagonal block
851 */
852 #undef __FUNCT__
853 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
854 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
855 {
856   PetscErrorCode ierr;
857   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
858 
859   PetscFunctionBegin;
860   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
861   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
862   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatScale_MPIAIJ"
868 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
869 {
870   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
871   PetscErrorCode ierr;
872 
873   PetscFunctionBegin;
874   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
875   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
876   PetscFunctionReturn(0);
877 }
878 
879 #undef __FUNCT__
880 #define __FUNCT__ "MatDestroy_MPIAIJ"
881 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
882 {
883   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
884   PetscErrorCode ierr;
885 
886   PetscFunctionBegin;
887 #if defined(PETSC_USE_LOG)
888   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
889 #endif
890   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
891   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
892   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
893   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
894 #if defined (PETSC_USE_CTABLE)
895   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
896 #else
897   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
898 #endif
899   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
900   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
901   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
902   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
903   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
904   ierr = PetscFree(aij);CHKERRQ(ierr);
905 
906   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
908   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
909   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
910   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
911   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
912   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
913   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
915   PetscFunctionReturn(0);
916 }
917 
918 #undef __FUNCT__
919 #define __FUNCT__ "MatView_MPIAIJ_Binary"
920 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
921 {
922   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
923   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
924   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
925   PetscErrorCode    ierr;
926   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
927   int               fd;
928   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
929   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
930   PetscScalar       *column_values;
931 
932   PetscFunctionBegin;
933   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
934   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
935   nz   = A->nz + B->nz;
936   if (!rank) {
937     header[0] = MAT_FILE_CLASSID;
938     header[1] = mat->rmap->N;
939     header[2] = mat->cmap->N;
940     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
941     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
942     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
943     /* get largest number of rows any processor has */
944     rlen = mat->rmap->n;
945     range = mat->rmap->range;
946     for (i=1; i<size; i++) {
947       rlen = PetscMax(rlen,range[i+1] - range[i]);
948     }
949   } else {
950     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
951     rlen = mat->rmap->n;
952   }
953 
954   /* load up the local row counts */
955   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
956   for (i=0; i<mat->rmap->n; i++) {
957     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
958   }
959 
960   /* store the row lengths to the file */
961   if (!rank) {
962     MPI_Status status;
963     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
964     for (i=1; i<size; i++) {
965       rlen = range[i+1] - range[i];
966       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
967       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
968     }
969   } else {
970     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
971   }
972   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
973 
974   /* load up the local column indices */
975   nzmax = nz; /* )th processor needs space a largest processor needs */
976   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
977   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
978   cnt  = 0;
979   for (i=0; i<mat->rmap->n; i++) {
980     for (j=B->i[i]; j<B->i[i+1]; j++) {
981       if ( (col = garray[B->j[j]]) > cstart) break;
982       column_indices[cnt++] = col;
983     }
984     for (k=A->i[i]; k<A->i[i+1]; k++) {
985       column_indices[cnt++] = A->j[k] + cstart;
986     }
987     for (; j<B->i[i+1]; j++) {
988       column_indices[cnt++] = garray[B->j[j]];
989     }
990   }
991   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
992 
993   /* store the column indices to the file */
994   if (!rank) {
995     MPI_Status status;
996     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
997     for (i=1; i<size; i++) {
998       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
999       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1000       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1001       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1002     }
1003   } else {
1004     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1005     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1006   }
1007   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1008 
1009   /* load up the local column values */
1010   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1011   cnt  = 0;
1012   for (i=0; i<mat->rmap->n; i++) {
1013     for (j=B->i[i]; j<B->i[i+1]; j++) {
1014       if ( garray[B->j[j]] > cstart) break;
1015       column_values[cnt++] = B->a[j];
1016     }
1017     for (k=A->i[i]; k<A->i[i+1]; k++) {
1018       column_values[cnt++] = A->a[k];
1019     }
1020     for (; j<B->i[i+1]; j++) {
1021       column_values[cnt++] = B->a[j];
1022     }
1023   }
1024   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1025 
1026   /* store the column values to the file */
1027   if (!rank) {
1028     MPI_Status status;
1029     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1030     for (i=1; i<size; i++) {
1031       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1032       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1033       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1034       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1035     }
1036   } else {
1037     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1038     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1039   }
1040   ierr = PetscFree(column_values);CHKERRQ(ierr);
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 #undef __FUNCT__
1045 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1046 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1047 {
1048   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1049   PetscErrorCode    ierr;
1050   PetscMPIInt       rank = aij->rank,size = aij->size;
1051   PetscTruth        isdraw,iascii,isbinary;
1052   PetscViewer       sviewer;
1053   PetscViewerFormat format;
1054 
1055   PetscFunctionBegin;
1056   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1057   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1058   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1059   if (iascii) {
1060     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1061     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1062       MatInfo    info;
1063       PetscTruth inodes;
1064 
1065       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1066       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1067       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1068       if (!inodes) {
1069         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1070 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1071       } else {
1072         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1073 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1074       }
1075       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1076       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1077       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1078       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1079       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1080       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1081       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1082       PetscFunctionReturn(0);
1083     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1084       PetscInt   inodecount,inodelimit,*inodes;
1085       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1086       if (inodes) {
1087         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1088       } else {
1089         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1090       }
1091       PetscFunctionReturn(0);
1092     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1093       PetscFunctionReturn(0);
1094     }
1095   } else if (isbinary) {
1096     if (size == 1) {
1097       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1098       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1099     } else {
1100       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1101     }
1102     PetscFunctionReturn(0);
1103   } else if (isdraw) {
1104     PetscDraw  draw;
1105     PetscTruth isnull;
1106     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1107     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1108   }
1109 
1110   if (size == 1) {
1111     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1112     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1113   } else {
1114     /* assemble the entire matrix onto first processor. */
1115     Mat         A;
1116     Mat_SeqAIJ  *Aloc;
1117     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1118     MatScalar   *a;
1119 
1120     if (mat->rmap->N > 1024) {
1121       PetscTruth flg = PETSC_FALSE;
1122 
1123       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1124       if (!flg) {
1125         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1126       }
1127     }
1128 
1129     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1130     if (!rank) {
1131       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1132     } else {
1133       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1134     }
1135     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1136     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1137     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1138     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1139 
1140     /* copy over the A part */
1141     Aloc = (Mat_SeqAIJ*)aij->A->data;
1142     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1143     row = mat->rmap->rstart;
1144     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1145     for (i=0; i<m; i++) {
1146       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1147       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1148     }
1149     aj = Aloc->j;
1150     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1151 
1152     /* copy over the B part */
1153     Aloc = (Mat_SeqAIJ*)aij->B->data;
1154     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1155     row  = mat->rmap->rstart;
1156     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1157     ct   = cols;
1158     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1159     for (i=0; i<m; i++) {
1160       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1161       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1162     }
1163     ierr = PetscFree(ct);CHKERRQ(ierr);
1164     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1165     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1166     /*
1167        Everyone has to call to draw the matrix since the graphics waits are
1168        synchronized across all processors that share the PetscDraw object
1169     */
1170     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1171     if (!rank) {
1172       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1173       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1174     }
1175     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1176     ierr = MatDestroy(A);CHKERRQ(ierr);
1177   }
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 #undef __FUNCT__
1182 #define __FUNCT__ "MatView_MPIAIJ"
1183 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1184 {
1185   PetscErrorCode ierr;
1186   PetscTruth     iascii,isdraw,issocket,isbinary;
1187 
1188   PetscFunctionBegin;
1189   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1190   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1191   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1192   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1193   if (iascii || isdraw || isbinary || issocket) {
1194     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1195   } else {
1196     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1197   }
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatSOR_MPIAIJ"
1203 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1204 {
1205   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1206   PetscErrorCode ierr;
1207   Vec            bb1 = 0;
1208   PetscTruth     hasop;
1209 
1210   PetscFunctionBegin;
1211   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1212     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1213   }
1214 
1215   if (flag == SOR_APPLY_UPPER) {
1216     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1217     PetscFunctionReturn(0);
1218   }
1219 
1220   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1221     if (flag & SOR_ZERO_INITIAL_GUESS) {
1222       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1223       its--;
1224     }
1225 
1226     while (its--) {
1227       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1228       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1229 
1230       /* update rhs: bb1 = bb - B*x */
1231       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1232       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1233 
1234       /* local sweep */
1235       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1236     }
1237   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1238     if (flag & SOR_ZERO_INITIAL_GUESS) {
1239       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1240       its--;
1241     }
1242     while (its--) {
1243       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1244       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245 
1246       /* update rhs: bb1 = bb - B*x */
1247       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1248       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1249 
1250       /* local sweep */
1251       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1252     }
1253   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1254     if (flag & SOR_ZERO_INITIAL_GUESS) {
1255       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1256       its--;
1257     }
1258     while (its--) {
1259       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1260       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1261 
1262       /* update rhs: bb1 = bb - B*x */
1263       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1264       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1265 
1266       /* local sweep */
1267       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1268     }
1269   }  else if (flag & SOR_EISENSTAT) {
1270     Vec         xx1;
1271 
1272     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1273     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1274 
1275     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1276     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1277     if (!mat->diag) {
1278       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1279       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1280     }
1281     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1282     if (hasop) {
1283       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1284     } else {
1285       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1286     }
1287     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1288 
1289     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1290 
1291     /* local sweep */
1292     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1293     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1294     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1295   } else {
1296     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1297   }
1298 
1299   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatPermute_MPIAIJ"
1305 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1306 {
1307   MPI_Comm       comm,pcomm;
1308   PetscInt       first,local_size,nrows;
1309   const PetscInt *rows;
1310   PetscMPIInt    size;
1311   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1312   PetscErrorCode ierr;
1313 
1314   PetscFunctionBegin;
1315   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1316   /* make a collective version of 'rowp' */
1317   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1318   if (pcomm==comm) {
1319     crowp = rowp;
1320   } else {
1321     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1322     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1323     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1324     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1325   }
1326   /* collect the global row permutation and invert it */
1327   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1328   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1329   if (pcomm!=comm) {
1330     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1331   }
1332   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1333   /* get the local target indices */
1334   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1335   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1336   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1337   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1338   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1339   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1340   /* the column permutation is so much easier;
1341      make a local version of 'colp' and invert it */
1342   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1343   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1344   if (size==1) {
1345     lcolp = colp;
1346   } else {
1347     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1348     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1349     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1350   }
1351   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1352   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1353   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1354   if (size>1) {
1355     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1356     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1357   }
1358   /* now we just get the submatrix */
1359   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1360   /* clean up */
1361   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1362   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1368 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1369 {
1370   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1371   Mat            A = mat->A,B = mat->B;
1372   PetscErrorCode ierr;
1373   PetscReal      isend[5],irecv[5];
1374 
1375   PetscFunctionBegin;
1376   info->block_size     = 1.0;
1377   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1378   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1379   isend[3] = info->memory;  isend[4] = info->mallocs;
1380   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1381   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1382   isend[3] += info->memory;  isend[4] += info->mallocs;
1383   if (flag == MAT_LOCAL) {
1384     info->nz_used      = isend[0];
1385     info->nz_allocated = isend[1];
1386     info->nz_unneeded  = isend[2];
1387     info->memory       = isend[3];
1388     info->mallocs      = isend[4];
1389   } else if (flag == MAT_GLOBAL_MAX) {
1390     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1391     info->nz_used      = irecv[0];
1392     info->nz_allocated = irecv[1];
1393     info->nz_unneeded  = irecv[2];
1394     info->memory       = irecv[3];
1395     info->mallocs      = irecv[4];
1396   } else if (flag == MAT_GLOBAL_SUM) {
1397     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1398     info->nz_used      = irecv[0];
1399     info->nz_allocated = irecv[1];
1400     info->nz_unneeded  = irecv[2];
1401     info->memory       = irecv[3];
1402     info->mallocs      = irecv[4];
1403   }
1404   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1405   info->fill_ratio_needed = 0;
1406   info->factor_mallocs    = 0;
1407 
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatSetOption_MPIAIJ"
1413 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1414 {
1415   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1416   PetscErrorCode ierr;
1417 
1418   PetscFunctionBegin;
1419   switch (op) {
1420   case MAT_NEW_NONZERO_LOCATIONS:
1421   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1422   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1423   case MAT_KEEP_NONZERO_PATTERN:
1424   case MAT_NEW_NONZERO_LOCATION_ERR:
1425   case MAT_USE_INODES:
1426   case MAT_IGNORE_ZERO_ENTRIES:
1427     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1428     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1429     break;
1430   case MAT_ROW_ORIENTED:
1431     a->roworiented = flg;
1432     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1433     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1434     break;
1435   case MAT_NEW_DIAGONALS:
1436     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1437     break;
1438   case MAT_IGNORE_OFF_PROC_ENTRIES:
1439     a->donotstash = PETSC_TRUE;
1440     break;
1441   case MAT_SPD:
1442     A->spd_set                         = PETSC_TRUE;
1443     A->spd                             = flg;
1444     if (flg) {
1445       A->symmetric                     = PETSC_TRUE;
1446       A->structurally_symmetric        = PETSC_TRUE;
1447       A->symmetric_set                 = PETSC_TRUE;
1448       A->structurally_symmetric_set    = PETSC_TRUE;
1449     }
1450     break;
1451   case MAT_SYMMETRIC:
1452     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1453     break;
1454   case MAT_STRUCTURALLY_SYMMETRIC:
1455     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1456     break;
1457   case MAT_HERMITIAN:
1458     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1459     break;
1460   case MAT_SYMMETRY_ETERNAL:
1461     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1462     break;
1463   default:
1464     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1465   }
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 #undef __FUNCT__
1470 #define __FUNCT__ "MatGetRow_MPIAIJ"
1471 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1472 {
1473   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1474   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1475   PetscErrorCode ierr;
1476   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1477   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1478   PetscInt       *cmap,*idx_p;
1479 
1480   PetscFunctionBegin;
1481   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1482   mat->getrowactive = PETSC_TRUE;
1483 
1484   if (!mat->rowvalues && (idx || v)) {
1485     /*
1486         allocate enough space to hold information from the longest row.
1487     */
1488     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1489     PetscInt   max = 1,tmp;
1490     for (i=0; i<matin->rmap->n; i++) {
1491       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1492       if (max < tmp) { max = tmp; }
1493     }
1494     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1495   }
1496 
1497   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1498   lrow = row - rstart;
1499 
1500   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1501   if (!v)   {pvA = 0; pvB = 0;}
1502   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1503   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1504   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1505   nztot = nzA + nzB;
1506 
1507   cmap  = mat->garray;
1508   if (v  || idx) {
1509     if (nztot) {
1510       /* Sort by increasing column numbers, assuming A and B already sorted */
1511       PetscInt imark = -1;
1512       if (v) {
1513         *v = v_p = mat->rowvalues;
1514         for (i=0; i<nzB; i++) {
1515           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1516           else break;
1517         }
1518         imark = i;
1519         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1520         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1521       }
1522       if (idx) {
1523         *idx = idx_p = mat->rowindices;
1524         if (imark > -1) {
1525           for (i=0; i<imark; i++) {
1526             idx_p[i] = cmap[cworkB[i]];
1527           }
1528         } else {
1529           for (i=0; i<nzB; i++) {
1530             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1531             else break;
1532           }
1533           imark = i;
1534         }
1535         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1536         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1537       }
1538     } else {
1539       if (idx) *idx = 0;
1540       if (v)   *v   = 0;
1541     }
1542   }
1543   *nz = nztot;
1544   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1545   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1546   PetscFunctionReturn(0);
1547 }
1548 
1549 #undef __FUNCT__
1550 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1551 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1552 {
1553   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1554 
1555   PetscFunctionBegin;
1556   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1557   aij->getrowactive = PETSC_FALSE;
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 #undef __FUNCT__
1562 #define __FUNCT__ "MatNorm_MPIAIJ"
1563 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1564 {
1565   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1566   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1567   PetscErrorCode ierr;
1568   PetscInt       i,j,cstart = mat->cmap->rstart;
1569   PetscReal      sum = 0.0;
1570   MatScalar      *v;
1571 
1572   PetscFunctionBegin;
1573   if (aij->size == 1) {
1574     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1575   } else {
1576     if (type == NORM_FROBENIUS) {
1577       v = amat->a;
1578       for (i=0; i<amat->nz; i++) {
1579 #if defined(PETSC_USE_COMPLEX)
1580         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1581 #else
1582         sum += (*v)*(*v); v++;
1583 #endif
1584       }
1585       v = bmat->a;
1586       for (i=0; i<bmat->nz; i++) {
1587 #if defined(PETSC_USE_COMPLEX)
1588         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1589 #else
1590         sum += (*v)*(*v); v++;
1591 #endif
1592       }
1593       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1594       *norm = sqrt(*norm);
1595     } else if (type == NORM_1) { /* max column norm */
1596       PetscReal *tmp,*tmp2;
1597       PetscInt  *jj,*garray = aij->garray;
1598       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1599       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1600       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1601       *norm = 0.0;
1602       v = amat->a; jj = amat->j;
1603       for (j=0; j<amat->nz; j++) {
1604         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1605       }
1606       v = bmat->a; jj = bmat->j;
1607       for (j=0; j<bmat->nz; j++) {
1608         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1609       }
1610       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1611       for (j=0; j<mat->cmap->N; j++) {
1612         if (tmp2[j] > *norm) *norm = tmp2[j];
1613       }
1614       ierr = PetscFree(tmp);CHKERRQ(ierr);
1615       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1616     } else if (type == NORM_INFINITY) { /* max row norm */
1617       PetscReal ntemp = 0.0;
1618       for (j=0; j<aij->A->rmap->n; j++) {
1619         v = amat->a + amat->i[j];
1620         sum = 0.0;
1621         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1622           sum += PetscAbsScalar(*v); v++;
1623         }
1624         v = bmat->a + bmat->i[j];
1625         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1626           sum += PetscAbsScalar(*v); v++;
1627         }
1628         if (sum > ntemp) ntemp = sum;
1629       }
1630       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1631     } else {
1632       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1633     }
1634   }
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 #undef __FUNCT__
1639 #define __FUNCT__ "MatTranspose_MPIAIJ"
1640 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1641 {
1642   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1643   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1644   PetscErrorCode ierr;
1645   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1646   PetscInt       cstart=A->cmap->rstart,ncol;
1647   Mat            B;
1648   MatScalar      *array;
1649 
1650   PetscFunctionBegin;
1651   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1652 
1653   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1654   ai = Aloc->i; aj = Aloc->j;
1655   bi = Bloc->i; bj = Bloc->j;
1656   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1657     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1658     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1659     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1660     for (i=0; i<ai[ma]; i++){
1661       d_nnz[aj[i]] ++;
1662       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1663     }
1664 
1665     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1666     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1667     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1668     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1669     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1670   } else {
1671     B = *matout;
1672   }
1673 
1674   /* copy over the A part */
1675   array = Aloc->a;
1676   row = A->rmap->rstart;
1677   for (i=0; i<ma; i++) {
1678     ncol = ai[i+1]-ai[i];
1679     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1680     row++; array += ncol; aj += ncol;
1681   }
1682   aj = Aloc->j;
1683   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1684 
1685   /* copy over the B part */
1686   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1687   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1688   array = Bloc->a;
1689   row = A->rmap->rstart;
1690   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1691   cols_tmp = cols;
1692   for (i=0; i<mb; i++) {
1693     ncol = bi[i+1]-bi[i];
1694     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1695     row++; array += ncol; cols_tmp += ncol;
1696   }
1697   ierr = PetscFree(cols);CHKERRQ(ierr);
1698 
1699   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1700   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1701   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1702     *matout = B;
1703   } else {
1704     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1705   }
1706   PetscFunctionReturn(0);
1707 }
1708 
1709 #undef __FUNCT__
1710 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1711 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1712 {
1713   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1714   Mat            a = aij->A,b = aij->B;
1715   PetscErrorCode ierr;
1716   PetscInt       s1,s2,s3;
1717 
1718   PetscFunctionBegin;
1719   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1720   if (rr) {
1721     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1722     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1723     /* Overlap communication with computation. */
1724     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1725   }
1726   if (ll) {
1727     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1728     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1729     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1730   }
1731   /* scale  the diagonal block */
1732   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1733 
1734   if (rr) {
1735     /* Do a scatter end and then right scale the off-diagonal block */
1736     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1737     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1738   }
1739 
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 #undef __FUNCT__
1744 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1745 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1746 {
1747   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1748   PetscErrorCode ierr;
1749 
1750   PetscFunctionBegin;
1751   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1752   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1753   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1754   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1755   PetscFunctionReturn(0);
1756 }
1757 #undef __FUNCT__
1758 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1759 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1760 {
1761   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1762   PetscErrorCode ierr;
1763 
1764   PetscFunctionBegin;
1765   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1766   PetscFunctionReturn(0);
1767 }
1768 
1769 #undef __FUNCT__
1770 #define __FUNCT__ "MatEqual_MPIAIJ"
1771 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1772 {
1773   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1774   Mat            a,b,c,d;
1775   PetscTruth     flg;
1776   PetscErrorCode ierr;
1777 
1778   PetscFunctionBegin;
1779   a = matA->A; b = matA->B;
1780   c = matB->A; d = matB->B;
1781 
1782   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1783   if (flg) {
1784     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1785   }
1786   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 #undef __FUNCT__
1791 #define __FUNCT__ "MatCopy_MPIAIJ"
1792 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1793 {
1794   PetscErrorCode ierr;
1795   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1796   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1797 
1798   PetscFunctionBegin;
1799   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1800   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1801     /* because of the column compression in the off-processor part of the matrix a->B,
1802        the number of columns in a->B and b->B may be different, hence we cannot call
1803        the MatCopy() directly on the two parts. If need be, we can provide a more
1804        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1805        then copying the submatrices */
1806     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1807   } else {
1808     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1809     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1810   }
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 #undef __FUNCT__
1815 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1816 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1817 {
1818   PetscErrorCode ierr;
1819 
1820   PetscFunctionBegin;
1821   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 #undef __FUNCT__
1826 #define __FUNCT__ "MatAXPY_MPIAIJ"
1827 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1828 {
1829   PetscErrorCode ierr;
1830   PetscInt       i;
1831   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1832   PetscBLASInt   bnz,one=1;
1833   Mat_SeqAIJ     *x,*y;
1834 
1835   PetscFunctionBegin;
1836   if (str == SAME_NONZERO_PATTERN) {
1837     PetscScalar alpha = a;
1838     x = (Mat_SeqAIJ *)xx->A->data;
1839     y = (Mat_SeqAIJ *)yy->A->data;
1840     bnz = PetscBLASIntCast(x->nz);
1841     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1842     x = (Mat_SeqAIJ *)xx->B->data;
1843     y = (Mat_SeqAIJ *)yy->B->data;
1844     bnz = PetscBLASIntCast(x->nz);
1845     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1846   } else if (str == SUBSET_NONZERO_PATTERN) {
1847     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1848 
1849     x = (Mat_SeqAIJ *)xx->B->data;
1850     y = (Mat_SeqAIJ *)yy->B->data;
1851     if (y->xtoy && y->XtoY != xx->B) {
1852       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1853       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1854     }
1855     if (!y->xtoy) { /* get xtoy */
1856       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1857       y->XtoY = xx->B;
1858       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1859     }
1860     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1861   } else {
1862     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1863   }
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatConjugate_MPIAIJ"
1871 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1872 {
1873 #if defined(PETSC_USE_COMPLEX)
1874   PetscErrorCode ierr;
1875   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1876 
1877   PetscFunctionBegin;
1878   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1879   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1880 #else
1881   PetscFunctionBegin;
1882 #endif
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 #undef __FUNCT__
1887 #define __FUNCT__ "MatRealPart_MPIAIJ"
1888 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1889 {
1890   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1891   PetscErrorCode ierr;
1892 
1893   PetscFunctionBegin;
1894   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1895   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1901 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1902 {
1903   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1904   PetscErrorCode ierr;
1905 
1906   PetscFunctionBegin;
1907   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1908   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 #ifdef PETSC_HAVE_PBGL
1913 
1914 #include <boost/parallel/mpi/bsp_process_group.hpp>
1915 #include <boost/graph/distributed/ilu_default_graph.hpp>
1916 #include <boost/graph/distributed/ilu_0_block.hpp>
1917 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1918 #include <boost/graph/distributed/petsc/interface.hpp>
1919 #include <boost/multi_array.hpp>
1920 #include <boost/parallel/distributed_property_map->hpp>
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1924 /*
1925   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1926 */
1927 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1928 {
1929   namespace petsc = boost::distributed::petsc;
1930 
1931   namespace graph_dist = boost::graph::distributed;
1932   using boost::graph::distributed::ilu_default::process_group_type;
1933   using boost::graph::ilu_permuted;
1934 
1935   PetscTruth      row_identity, col_identity;
1936   PetscContainer  c;
1937   PetscInt        m, n, M, N;
1938   PetscErrorCode  ierr;
1939 
1940   PetscFunctionBegin;
1941   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1942   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1943   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1944   if (!row_identity || !col_identity) {
1945     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1946   }
1947 
1948   process_group_type pg;
1949   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1950   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1951   lgraph_type&   level_graph = *lgraph_p;
1952   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1953 
1954   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1955   ilu_permuted(level_graph);
1956 
1957   /* put together the new matrix */
1958   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1959   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1960   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1961   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1962   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1963   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1964   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1965 
1966   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1967   ierr = PetscContainerSetPointer(c, lgraph_p);
1968   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1974 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1975 {
1976   PetscFunctionBegin;
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 #undef __FUNCT__
1981 #define __FUNCT__ "MatSolve_MPIAIJ"
1982 /*
1983   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1984 */
1985 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1986 {
1987   namespace graph_dist = boost::graph::distributed;
1988 
1989   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1990   lgraph_type*   lgraph_p;
1991   PetscContainer c;
1992   PetscErrorCode ierr;
1993 
1994   PetscFunctionBegin;
1995   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1996   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1997   ierr = VecCopy(b, x);CHKERRQ(ierr);
1998 
1999   PetscScalar* array_x;
2000   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2001   PetscInt sx;
2002   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2003 
2004   PetscScalar* array_b;
2005   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2006   PetscInt sb;
2007   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2008 
2009   lgraph_type&   level_graph = *lgraph_p;
2010   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2011 
2012   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2013   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2014                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2015 
2016   typedef boost::iterator_property_map<array_ref_type::iterator,
2017                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2018   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2019                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2020 
2021   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2022 
2023   PetscFunctionReturn(0);
2024 }
2025 #endif
2026 
2027 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2028   PetscInt       nzlocal,nsends,nrecvs;
2029   PetscMPIInt    *send_rank,*recv_rank;
2030   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2031   PetscScalar    *sbuf_a,**rbuf_a;
2032   PetscErrorCode (*MatDestroy)(Mat);
2033 } Mat_Redundant;
2034 
2035 #undef __FUNCT__
2036 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2037 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2038 {
2039   PetscErrorCode       ierr;
2040   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2041   PetscInt             i;
2042 
2043   PetscFunctionBegin;
2044   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2045   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2046   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2047   for (i=0; i<redund->nrecvs; i++){
2048     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2049     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2050   }
2051   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2052   ierr = PetscFree(redund);CHKERRQ(ierr);
2053   PetscFunctionReturn(0);
2054 }
2055 
2056 #undef __FUNCT__
2057 #define __FUNCT__ "MatDestroy_MatRedundant"
2058 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2059 {
2060   PetscErrorCode  ierr;
2061   PetscContainer  container;
2062   Mat_Redundant   *redund=PETSC_NULL;
2063 
2064   PetscFunctionBegin;
2065   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2066   if (container) {
2067     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2068   } else {
2069     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2070   }
2071   A->ops->destroy = redund->MatDestroy;
2072   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2073   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2074   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 #undef __FUNCT__
2079 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2080 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2081 {
2082   PetscMPIInt    rank,size;
2083   MPI_Comm       comm=((PetscObject)mat)->comm;
2084   PetscErrorCode ierr;
2085   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2086   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2087   PetscInt       *rowrange=mat->rmap->range;
2088   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2089   Mat            A=aij->A,B=aij->B,C=*matredundant;
2090   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2091   PetscScalar    *sbuf_a;
2092   PetscInt       nzlocal=a->nz+b->nz;
2093   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2094   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2095   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2096   MatScalar      *aworkA,*aworkB;
2097   PetscScalar    *vals;
2098   PetscMPIInt    tag1,tag2,tag3,imdex;
2099   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2100                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2101   MPI_Status     recv_status,*send_status;
2102   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2103   PetscInt       **rbuf_j=PETSC_NULL;
2104   PetscScalar    **rbuf_a=PETSC_NULL;
2105   Mat_Redundant  *redund=PETSC_NULL;
2106   PetscContainer container;
2107 
2108   PetscFunctionBegin;
2109   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2110   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2111 
2112   if (reuse == MAT_REUSE_MATRIX) {
2113     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2114     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2115     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2116     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2117     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2118     if (container) {
2119       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2120     } else {
2121       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2122     }
2123     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2124 
2125     nsends    = redund->nsends;
2126     nrecvs    = redund->nrecvs;
2127     send_rank = redund->send_rank;
2128     recv_rank = redund->recv_rank;
2129     sbuf_nz   = redund->sbuf_nz;
2130     rbuf_nz   = redund->rbuf_nz;
2131     sbuf_j    = redund->sbuf_j;
2132     sbuf_a    = redund->sbuf_a;
2133     rbuf_j    = redund->rbuf_j;
2134     rbuf_a    = redund->rbuf_a;
2135   }
2136 
2137   if (reuse == MAT_INITIAL_MATRIX){
2138     PetscMPIInt  subrank,subsize;
2139     PetscInt     nleftover,np_subcomm;
2140     /* get the destination processors' id send_rank, nsends and nrecvs */
2141     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2142     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2143     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2144     np_subcomm = size/nsubcomm;
2145     nleftover  = size - nsubcomm*np_subcomm;
2146     nsends = 0; nrecvs = 0;
2147     for (i=0; i<size; i++){ /* i=rank*/
2148       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2149         send_rank[nsends] = i; nsends++;
2150         recv_rank[nrecvs++] = i;
2151       }
2152     }
2153     if (rank >= size - nleftover){/* this proc is a leftover processor */
2154       i = size-nleftover-1;
2155       j = 0;
2156       while (j < nsubcomm - nleftover){
2157         send_rank[nsends++] = i;
2158         i--; j++;
2159       }
2160     }
2161 
2162     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2163       for (i=0; i<nleftover; i++){
2164         recv_rank[nrecvs++] = size-nleftover+i;
2165       }
2166     }
2167 
2168     /* allocate sbuf_j, sbuf_a */
2169     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2170     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2171     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2172   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2173 
2174   /* copy mat's local entries into the buffers */
2175   if (reuse == MAT_INITIAL_MATRIX){
2176     rownz_max = 0;
2177     rptr = sbuf_j;
2178     cols = sbuf_j + rend-rstart + 1;
2179     vals = sbuf_a;
2180     rptr[0] = 0;
2181     for (i=0; i<rend-rstart; i++){
2182       row = i + rstart;
2183       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2184       ncols  = nzA + nzB;
2185       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2186       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2187       /* load the column indices for this row into cols */
2188       lwrite = 0;
2189       for (l=0; l<nzB; l++) {
2190         if ((ctmp = bmap[cworkB[l]]) < cstart){
2191           vals[lwrite]   = aworkB[l];
2192           cols[lwrite++] = ctmp;
2193         }
2194       }
2195       for (l=0; l<nzA; l++){
2196         vals[lwrite]   = aworkA[l];
2197         cols[lwrite++] = cstart + cworkA[l];
2198       }
2199       for (l=0; l<nzB; l++) {
2200         if ((ctmp = bmap[cworkB[l]]) >= cend){
2201           vals[lwrite]   = aworkB[l];
2202           cols[lwrite++] = ctmp;
2203         }
2204       }
2205       vals += ncols;
2206       cols += ncols;
2207       rptr[i+1] = rptr[i] + ncols;
2208       if (rownz_max < ncols) rownz_max = ncols;
2209     }
2210     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2211   } else { /* only copy matrix values into sbuf_a */
2212     rptr = sbuf_j;
2213     vals = sbuf_a;
2214     rptr[0] = 0;
2215     for (i=0; i<rend-rstart; i++){
2216       row = i + rstart;
2217       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2218       ncols  = nzA + nzB;
2219       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2220       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2221       lwrite = 0;
2222       for (l=0; l<nzB; l++) {
2223         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2224       }
2225       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2226       for (l=0; l<nzB; l++) {
2227         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2228       }
2229       vals += ncols;
2230       rptr[i+1] = rptr[i] + ncols;
2231     }
2232   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2233 
2234   /* send nzlocal to others, and recv other's nzlocal */
2235   /*--------------------------------------------------*/
2236   if (reuse == MAT_INITIAL_MATRIX){
2237     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2238     s_waits2 = s_waits3 + nsends;
2239     s_waits1 = s_waits2 + nsends;
2240     r_waits1 = s_waits1 + nsends;
2241     r_waits2 = r_waits1 + nrecvs;
2242     r_waits3 = r_waits2 + nrecvs;
2243   } else {
2244     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2245     r_waits3 = s_waits3 + nsends;
2246   }
2247 
2248   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2249   if (reuse == MAT_INITIAL_MATRIX){
2250     /* get new tags to keep the communication clean */
2251     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2252     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2253     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2254 
2255     /* post receives of other's nzlocal */
2256     for (i=0; i<nrecvs; i++){
2257       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2258     }
2259     /* send nzlocal to others */
2260     for (i=0; i<nsends; i++){
2261       sbuf_nz[i] = nzlocal;
2262       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2263     }
2264     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2265     count = nrecvs;
2266     while (count) {
2267       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2268       recv_rank[imdex] = recv_status.MPI_SOURCE;
2269       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2270       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2271 
2272       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2273       rbuf_nz[imdex] += i + 2;
2274       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2275       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2276       count--;
2277     }
2278     /* wait on sends of nzlocal */
2279     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2280     /* send mat->i,j to others, and recv from other's */
2281     /*------------------------------------------------*/
2282     for (i=0; i<nsends; i++){
2283       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2284       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2285     }
2286     /* wait on receives of mat->i,j */
2287     /*------------------------------*/
2288     count = nrecvs;
2289     while (count) {
2290       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2291       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2292       count--;
2293     }
2294     /* wait on sends of mat->i,j */
2295     /*---------------------------*/
2296     if (nsends) {
2297       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2298     }
2299   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2300 
2301   /* post receives, send and receive mat->a */
2302   /*----------------------------------------*/
2303   for (imdex=0; imdex<nrecvs; imdex++) {
2304     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2305   }
2306   for (i=0; i<nsends; i++){
2307     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2308   }
2309   count = nrecvs;
2310   while (count) {
2311     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2312     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2313     count--;
2314   }
2315   if (nsends) {
2316     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2317   }
2318 
2319   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2320 
2321   /* create redundant matrix */
2322   /*-------------------------*/
2323   if (reuse == MAT_INITIAL_MATRIX){
2324     /* compute rownz_max for preallocation */
2325     for (imdex=0; imdex<nrecvs; imdex++){
2326       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2327       rptr = rbuf_j[imdex];
2328       for (i=0; i<j; i++){
2329         ncols = rptr[i+1] - rptr[i];
2330         if (rownz_max < ncols) rownz_max = ncols;
2331       }
2332     }
2333 
2334     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2335     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2336     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2337     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2338     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2339   } else {
2340     C = *matredundant;
2341   }
2342 
2343   /* insert local matrix entries */
2344   rptr = sbuf_j;
2345   cols = sbuf_j + rend-rstart + 1;
2346   vals = sbuf_a;
2347   for (i=0; i<rend-rstart; i++){
2348     row   = i + rstart;
2349     ncols = rptr[i+1] - rptr[i];
2350     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2351     vals += ncols;
2352     cols += ncols;
2353   }
2354   /* insert received matrix entries */
2355   for (imdex=0; imdex<nrecvs; imdex++){
2356     rstart = rowrange[recv_rank[imdex]];
2357     rend   = rowrange[recv_rank[imdex]+1];
2358     rptr = rbuf_j[imdex];
2359     cols = rbuf_j[imdex] + rend-rstart + 1;
2360     vals = rbuf_a[imdex];
2361     for (i=0; i<rend-rstart; i++){
2362       row   = i + rstart;
2363       ncols = rptr[i+1] - rptr[i];
2364       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2365       vals += ncols;
2366       cols += ncols;
2367     }
2368   }
2369   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2370   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2371   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2372   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2373   if (reuse == MAT_INITIAL_MATRIX){
2374     PetscContainer container;
2375     *matredundant = C;
2376     /* create a supporting struct and attach it to C for reuse */
2377     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2378     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2379     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2380     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2381     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2382 
2383     redund->nzlocal = nzlocal;
2384     redund->nsends  = nsends;
2385     redund->nrecvs  = nrecvs;
2386     redund->send_rank = send_rank;
2387     redund->recv_rank = recv_rank;
2388     redund->sbuf_nz = sbuf_nz;
2389     redund->rbuf_nz = rbuf_nz;
2390     redund->sbuf_j  = sbuf_j;
2391     redund->sbuf_a  = sbuf_a;
2392     redund->rbuf_j  = rbuf_j;
2393     redund->rbuf_a  = rbuf_a;
2394 
2395     redund->MatDestroy = C->ops->destroy;
2396     C->ops->destroy    = MatDestroy_MatRedundant;
2397   }
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2403 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2404 {
2405   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2406   PetscErrorCode ierr;
2407   PetscInt       i,*idxb = 0;
2408   PetscScalar    *va,*vb;
2409   Vec            vtmp;
2410 
2411   PetscFunctionBegin;
2412   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2413   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2414   if (idx) {
2415     for (i=0; i<A->rmap->n; i++) {
2416       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2417     }
2418   }
2419 
2420   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2421   if (idx) {
2422     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2423   }
2424   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2425   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2426 
2427   for (i=0; i<A->rmap->n; i++){
2428     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2429       va[i] = vb[i];
2430       if (idx) idx[i] = a->garray[idxb[i]];
2431     }
2432   }
2433 
2434   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2435   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2436   if (idxb) {
2437     ierr = PetscFree(idxb);CHKERRQ(ierr);
2438   }
2439   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 #undef __FUNCT__
2444 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2445 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2446 {
2447   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2448   PetscErrorCode ierr;
2449   PetscInt       i,*idxb = 0;
2450   PetscScalar    *va,*vb;
2451   Vec            vtmp;
2452 
2453   PetscFunctionBegin;
2454   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2455   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2456   if (idx) {
2457     for (i=0; i<A->cmap->n; i++) {
2458       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2459     }
2460   }
2461 
2462   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2463   if (idx) {
2464     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2465   }
2466   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2467   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2468 
2469   for (i=0; i<A->rmap->n; i++){
2470     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2471       va[i] = vb[i];
2472       if (idx) idx[i] = a->garray[idxb[i]];
2473     }
2474   }
2475 
2476   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2477   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2478   if (idxb) {
2479     ierr = PetscFree(idxb);CHKERRQ(ierr);
2480   }
2481   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 #undef __FUNCT__
2486 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2487 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2488 {
2489   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2490   PetscInt       n      = A->rmap->n;
2491   PetscInt       cstart = A->cmap->rstart;
2492   PetscInt      *cmap   = mat->garray;
2493   PetscInt      *diagIdx, *offdiagIdx;
2494   Vec            diagV, offdiagV;
2495   PetscScalar   *a, *diagA, *offdiagA;
2496   PetscInt       r;
2497   PetscErrorCode ierr;
2498 
2499   PetscFunctionBegin;
2500   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2501   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2502   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2503   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2504   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2505   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2506   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2507   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2508   for(r = 0; r < n; ++r) {
2509     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2510       a[r]   = diagA[r];
2511       idx[r] = cstart + diagIdx[r];
2512     } else {
2513       a[r]   = offdiagA[r];
2514       idx[r] = cmap[offdiagIdx[r]];
2515     }
2516   }
2517   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2518   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2519   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2520   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2521   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2522   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 #undef __FUNCT__
2527 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2528 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2529 {
2530   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2531   PetscInt       n      = A->rmap->n;
2532   PetscInt       cstart = A->cmap->rstart;
2533   PetscInt      *cmap   = mat->garray;
2534   PetscInt      *diagIdx, *offdiagIdx;
2535   Vec            diagV, offdiagV;
2536   PetscScalar   *a, *diagA, *offdiagA;
2537   PetscInt       r;
2538   PetscErrorCode ierr;
2539 
2540   PetscFunctionBegin;
2541   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2542   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2543   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2544   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2545   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2546   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2547   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2548   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2549   for(r = 0; r < n; ++r) {
2550     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2551       a[r]   = diagA[r];
2552       idx[r] = cstart + diagIdx[r];
2553     } else {
2554       a[r]   = offdiagA[r];
2555       idx[r] = cmap[offdiagIdx[r]];
2556     }
2557   }
2558   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2559   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2560   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2561   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2562   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2563   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2564   PetscFunctionReturn(0);
2565 }
2566 
2567 #undef __FUNCT__
2568 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2569 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2570 {
2571   PetscErrorCode ierr;
2572   Mat            *dummy;
2573 
2574   PetscFunctionBegin;
2575   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2576   *newmat = *dummy;
2577   ierr = PetscFree(dummy);CHKERRQ(ierr);
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2582 /* -------------------------------------------------------------------*/
2583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2584        MatGetRow_MPIAIJ,
2585        MatRestoreRow_MPIAIJ,
2586        MatMult_MPIAIJ,
2587 /* 4*/ MatMultAdd_MPIAIJ,
2588        MatMultTranspose_MPIAIJ,
2589        MatMultTransposeAdd_MPIAIJ,
2590 #ifdef PETSC_HAVE_PBGL
2591        MatSolve_MPIAIJ,
2592 #else
2593        0,
2594 #endif
2595        0,
2596        0,
2597 /*10*/ 0,
2598        0,
2599        0,
2600        MatSOR_MPIAIJ,
2601        MatTranspose_MPIAIJ,
2602 /*15*/ MatGetInfo_MPIAIJ,
2603        MatEqual_MPIAIJ,
2604        MatGetDiagonal_MPIAIJ,
2605        MatDiagonalScale_MPIAIJ,
2606        MatNorm_MPIAIJ,
2607 /*20*/ MatAssemblyBegin_MPIAIJ,
2608        MatAssemblyEnd_MPIAIJ,
2609        MatSetOption_MPIAIJ,
2610        MatZeroEntries_MPIAIJ,
2611 /*24*/ MatZeroRows_MPIAIJ,
2612        0,
2613 #ifdef PETSC_HAVE_PBGL
2614        0,
2615 #else
2616        0,
2617 #endif
2618        0,
2619        0,
2620 /*29*/ MatSetUpPreallocation_MPIAIJ,
2621 #ifdef PETSC_HAVE_PBGL
2622        0,
2623 #else
2624        0,
2625 #endif
2626        0,
2627        0,
2628        0,
2629 /*34*/ MatDuplicate_MPIAIJ,
2630        0,
2631        0,
2632        0,
2633        0,
2634 /*39*/ MatAXPY_MPIAIJ,
2635        MatGetSubMatrices_MPIAIJ,
2636        MatIncreaseOverlap_MPIAIJ,
2637        MatGetValues_MPIAIJ,
2638        MatCopy_MPIAIJ,
2639 /*44*/ MatGetRowMax_MPIAIJ,
2640        MatScale_MPIAIJ,
2641        0,
2642        0,
2643        0,
2644 /*49*/ MatSetBlockSize_MPIAIJ,
2645        0,
2646        0,
2647        0,
2648        0,
2649 /*54*/ MatFDColoringCreate_MPIAIJ,
2650        0,
2651        MatSetUnfactored_MPIAIJ,
2652        MatPermute_MPIAIJ,
2653        0,
2654 /*59*/ MatGetSubMatrix_MPIAIJ,
2655        MatDestroy_MPIAIJ,
2656        MatView_MPIAIJ,
2657        0,
2658        0,
2659 /*64*/ 0,
2660        0,
2661        0,
2662        0,
2663        0,
2664 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2665        MatGetRowMinAbs_MPIAIJ,
2666        0,
2667        MatSetColoring_MPIAIJ,
2668 #if defined(PETSC_HAVE_ADIC)
2669        MatSetValuesAdic_MPIAIJ,
2670 #else
2671        0,
2672 #endif
2673        MatSetValuesAdifor_MPIAIJ,
2674 /*75*/ MatFDColoringApply_AIJ,
2675        0,
2676        0,
2677        0,
2678        0,
2679 /*80*/ 0,
2680        0,
2681        0,
2682 /*83*/ MatLoad_MPIAIJ,
2683        0,
2684        0,
2685        0,
2686        0,
2687        0,
2688 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2689        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2690        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2691        MatPtAP_Basic,
2692        MatPtAPSymbolic_MPIAIJ,
2693 /*94*/ MatPtAPNumeric_MPIAIJ,
2694        0,
2695        0,
2696        0,
2697        0,
2698 /*99*/ 0,
2699        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2700        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2701        MatConjugate_MPIAIJ,
2702        0,
2703 /*104*/MatSetValuesRow_MPIAIJ,
2704        MatRealPart_MPIAIJ,
2705        MatImaginaryPart_MPIAIJ,
2706        0,
2707        0,
2708 /*109*/0,
2709        MatGetRedundantMatrix_MPIAIJ,
2710        MatGetRowMin_MPIAIJ,
2711        0,
2712        0,
2713 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2714        0,
2715        0,
2716        0,
2717        0,
2718 /*119*/0,
2719        0,
2720        0,
2721        0,
2722        MatGetMultiProcBlock_MPIAIJ
2723 };
2724 
2725 /* ----------------------------------------------------------------------------------------*/
2726 
2727 EXTERN_C_BEGIN
2728 #undef __FUNCT__
2729 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2730 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2731 {
2732   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2733   PetscErrorCode ierr;
2734 
2735   PetscFunctionBegin;
2736   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2737   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2738   PetscFunctionReturn(0);
2739 }
2740 EXTERN_C_END
2741 
2742 EXTERN_C_BEGIN
2743 #undef __FUNCT__
2744 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2745 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2746 {
2747   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2748   PetscErrorCode ierr;
2749 
2750   PetscFunctionBegin;
2751   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2752   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2753   PetscFunctionReturn(0);
2754 }
2755 EXTERN_C_END
2756 
2757 EXTERN_C_BEGIN
2758 #undef __FUNCT__
2759 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2760 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2761 {
2762   Mat_MPIAIJ     *b;
2763   PetscErrorCode ierr;
2764   PetscInt       i;
2765 
2766   PetscFunctionBegin;
2767   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2768   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2769   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2770   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2771 
2772   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2773   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2774   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2775   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2776   if (d_nnz) {
2777     for (i=0; i<B->rmap->n; i++) {
2778       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2779     }
2780   }
2781   if (o_nnz) {
2782     for (i=0; i<B->rmap->n; i++) {
2783       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2784     }
2785   }
2786   b = (Mat_MPIAIJ*)B->data;
2787 
2788   if (!B->preallocated) {
2789     /* Explicitly create 2 MATSEQAIJ matrices. */
2790     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2791     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2792     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2793     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2794     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2795     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2796     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2797     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2798   }
2799 
2800   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2801   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2802   B->preallocated = PETSC_TRUE;
2803   PetscFunctionReturn(0);
2804 }
2805 EXTERN_C_END
2806 
2807 #undef __FUNCT__
2808 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2809 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2810 {
2811   Mat            mat;
2812   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2813   PetscErrorCode ierr;
2814 
2815   PetscFunctionBegin;
2816   *newmat       = 0;
2817   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2818   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2819   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2820   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2821   a    = (Mat_MPIAIJ*)mat->data;
2822 
2823   mat->factortype    = matin->factortype;
2824   mat->rmap->bs      = matin->rmap->bs;
2825   mat->assembled    = PETSC_TRUE;
2826   mat->insertmode   = NOT_SET_VALUES;
2827   mat->preallocated = PETSC_TRUE;
2828 
2829   a->size           = oldmat->size;
2830   a->rank           = oldmat->rank;
2831   a->donotstash     = oldmat->donotstash;
2832   a->roworiented    = oldmat->roworiented;
2833   a->rowindices     = 0;
2834   a->rowvalues      = 0;
2835   a->getrowactive   = PETSC_FALSE;
2836 
2837   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2838   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2839 
2840   if (oldmat->colmap) {
2841 #if defined (PETSC_USE_CTABLE)
2842     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2843 #else
2844     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2845     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2846     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2847 #endif
2848   } else a->colmap = 0;
2849   if (oldmat->garray) {
2850     PetscInt len;
2851     len  = oldmat->B->cmap->n;
2852     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2853     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2854     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2855   } else a->garray = 0;
2856 
2857   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2858   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2859   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2860   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2861   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2862   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2863   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2864   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2865   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2866   *newmat = mat;
2867   PetscFunctionReturn(0);
2868 }
2869 
2870 /*
2871     Allows sending/receiving larger messages then 2 gigabytes in a single call
2872 */
2873 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm)
2874 {
2875   int             ierr;
2876   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
2877   PetscInt        i,numchunks;
2878   PetscMPIInt     icnt;
2879 
2880   numchunks = cnt/CHUNKSIZE + 1;
2881   for (i=0; i<numchunks; i++) {
2882     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
2883     ierr = MPI_Send(mess,icnt,type,to,tag,comm);
2884     if (type == MPIU_INT) {
2885       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
2886     } else if (type == MPIU_SCALAR) {
2887       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
2888     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
2889   }
2890   return 0;
2891 }
2892 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm)
2893 {
2894   int             ierr;
2895   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
2896   MPI_Status      status;
2897   PetscInt        i,numchunks;
2898   PetscMPIInt     icnt;
2899 
2900   numchunks = cnt/CHUNKSIZE + 1;
2901   for (i=0; i<numchunks; i++) {
2902     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
2903     ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);
2904     if (type == MPIU_INT) {
2905       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
2906     } else if (type == MPIU_SCALAR) {
2907       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
2908     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
2909   }
2910   return 0;
2911 }
2912 
2913 #undef __FUNCT__
2914 #define __FUNCT__ "MatLoad_MPIAIJ"
2915 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2916 {
2917   PetscScalar    *vals,*svals;
2918   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2919   PetscErrorCode ierr;
2920   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2921   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
2922   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2923   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2924   PetscInt       cend,cstart,n,*rowners,sizesset=1;
2925   int            fd;
2926 
2927   PetscFunctionBegin;
2928   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2929   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2930   if (!rank) {
2931     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2932     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2933     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2934   }
2935 
2936   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
2937 
2938   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2939   M = header[1]; N = header[2];
2940   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2941   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
2942   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
2943 
2944   /* If global sizes are set, check if they are consistent with that given in the file */
2945   if (sizesset) {
2946     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
2947   }
2948   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2949   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2950 
2951   /* determine ownership of all rows */
2952   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
2953   else m = newMat->rmap->n; /* Set by user */
2954 
2955   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2956   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2957 
2958   /* First process needs enough room for process with most rows */
2959   if (!rank) {
2960     mmax       = rowners[1];
2961     for (i=2; i<size; i++) {
2962       mmax = PetscMax(mmax,rowners[i]);
2963     }
2964   } else mmax = m;
2965 
2966   rowners[0] = 0;
2967   for (i=2; i<=size; i++) {
2968     rowners[i] += rowners[i-1];
2969   }
2970   rstart = rowners[rank];
2971   rend   = rowners[rank+1];
2972 
2973   /* distribute row lengths to all processors */
2974   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2975   if (!rank) {
2976     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2977     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2978     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2979     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2980     for (j=0; j<m; j++) {
2981       procsnz[0] += ourlens[j];
2982     }
2983     for (i=1; i<size; i++) {
2984       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2985       /* calculate the number of nonzeros on each processor */
2986       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2987         procsnz[i] += rowlengths[j];
2988       }
2989       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2990     }
2991     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2992   } else {
2993     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2994   }
2995 
2996   if (!rank) {
2997     /* determine max buffer needed and allocate it */
2998     maxnz = 0;
2999     for (i=0; i<size; i++) {
3000       maxnz = PetscMax(maxnz,procsnz[i]);
3001     }
3002     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3003 
3004     /* read in my part of the matrix column indices  */
3005     nz   = procsnz[0];
3006     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3007     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3008 
3009     /* read in every one elses and ship off */
3010     for (i=1; i<size; i++) {
3011       nz     = procsnz[i];
3012       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3013       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3014     }
3015     ierr = PetscFree(cols);CHKERRQ(ierr);
3016   } else {
3017     /* determine buffer space needed for message */
3018     nz = 0;
3019     for (i=0; i<m; i++) {
3020       nz += ourlens[i];
3021     }
3022     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3023 
3024     /* receive message of column indices*/
3025     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3026   }
3027 
3028   /* determine column ownership if matrix is not square */
3029   if (N != M) {
3030     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3031     else n = newMat->cmap->n;
3032     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3033     cstart = cend - n;
3034   } else {
3035     cstart = rstart;
3036     cend   = rend;
3037     n      = cend - cstart;
3038   }
3039 
3040   /* loop over local rows, determining number of off diagonal entries */
3041   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3042   jj = 0;
3043   for (i=0; i<m; i++) {
3044     for (j=0; j<ourlens[i]; j++) {
3045       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3046       jj++;
3047     }
3048   }
3049 
3050   for (i=0; i<m; i++) {
3051     ourlens[i] -= offlens[i];
3052   }
3053   if (!sizesset) {
3054     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3055   }
3056   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3057 
3058   for (i=0; i<m; i++) {
3059     ourlens[i] += offlens[i];
3060   }
3061 
3062   if (!rank) {
3063     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3064 
3065     /* read in my part of the matrix numerical values  */
3066     nz   = procsnz[0];
3067     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3068 
3069     /* insert into matrix */
3070     jj      = rstart;
3071     smycols = mycols;
3072     svals   = vals;
3073     for (i=0; i<m; i++) {
3074       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3075       smycols += ourlens[i];
3076       svals   += ourlens[i];
3077       jj++;
3078     }
3079 
3080     /* read in other processors and ship out */
3081     for (i=1; i<size; i++) {
3082       nz     = procsnz[i];
3083       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3084       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3085     }
3086     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3087   } else {
3088     /* receive numeric values */
3089     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3090 
3091     /* receive message of values*/
3092     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3093 
3094     /* insert into matrix */
3095     jj      = rstart;
3096     smycols = mycols;
3097     svals   = vals;
3098     for (i=0; i<m; i++) {
3099       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3100       smycols += ourlens[i];
3101       svals   += ourlens[i];
3102       jj++;
3103     }
3104   }
3105   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3106   ierr = PetscFree(vals);CHKERRQ(ierr);
3107   ierr = PetscFree(mycols);CHKERRQ(ierr);
3108   ierr = PetscFree(rowners);CHKERRQ(ierr);
3109 
3110   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3111   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3112   PetscFunctionReturn(0);
3113 }
3114 
3115 #undef __FUNCT__
3116 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3117 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3118 {
3119   PetscErrorCode ierr;
3120   IS             iscol_local;
3121   PetscInt       csize;
3122 
3123   PetscFunctionBegin;
3124   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3125   if (call == MAT_REUSE_MATRIX) {
3126     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3127     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3128   } else {
3129     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3130   }
3131   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3132   if (call == MAT_INITIAL_MATRIX) {
3133     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3134     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3135   }
3136   PetscFunctionReturn(0);
3137 }
3138 
3139 #undef __FUNCT__
3140 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3141 /*
3142     Not great since it makes two copies of the submatrix, first an SeqAIJ
3143   in local and then by concatenating the local matrices the end result.
3144   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3145 
3146   Note: This requires a sequential iscol with all indices.
3147 */
3148 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3149 {
3150   PetscErrorCode ierr;
3151   PetscMPIInt    rank,size;
3152   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3153   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3154   Mat            *local,M,Mreuse;
3155   MatScalar      *vwork,*aa;
3156   MPI_Comm       comm = ((PetscObject)mat)->comm;
3157   Mat_SeqAIJ     *aij;
3158 
3159 
3160   PetscFunctionBegin;
3161   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3162   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3163 
3164   if (call ==  MAT_REUSE_MATRIX) {
3165     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3166     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3167     local = &Mreuse;
3168     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3169   } else {
3170     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3171     Mreuse = *local;
3172     ierr   = PetscFree(local);CHKERRQ(ierr);
3173   }
3174 
3175   /*
3176       m - number of local rows
3177       n - number of columns (same on all processors)
3178       rstart - first row in new global matrix generated
3179   */
3180   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3181   if (call == MAT_INITIAL_MATRIX) {
3182     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3183     ii  = aij->i;
3184     jj  = aij->j;
3185 
3186     /*
3187         Determine the number of non-zeros in the diagonal and off-diagonal
3188         portions of the matrix in order to do correct preallocation
3189     */
3190 
3191     /* first get start and end of "diagonal" columns */
3192     if (csize == PETSC_DECIDE) {
3193       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3194       if (mglobal == n) { /* square matrix */
3195 	nlocal = m;
3196       } else {
3197         nlocal = n/size + ((n % size) > rank);
3198       }
3199     } else {
3200       nlocal = csize;
3201     }
3202     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3203     rstart = rend - nlocal;
3204     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3205 
3206     /* next, compute all the lengths */
3207     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3208     olens = dlens + m;
3209     for (i=0; i<m; i++) {
3210       jend = ii[i+1] - ii[i];
3211       olen = 0;
3212       dlen = 0;
3213       for (j=0; j<jend; j++) {
3214         if (*jj < rstart || *jj >= rend) olen++;
3215         else dlen++;
3216         jj++;
3217       }
3218       olens[i] = olen;
3219       dlens[i] = dlen;
3220     }
3221     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3222     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3223     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3224     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3225     ierr = PetscFree(dlens);CHKERRQ(ierr);
3226   } else {
3227     PetscInt ml,nl;
3228 
3229     M = *newmat;
3230     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3231     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3232     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3233     /*
3234          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3235        rather than the slower MatSetValues().
3236     */
3237     M->was_assembled = PETSC_TRUE;
3238     M->assembled     = PETSC_FALSE;
3239   }
3240   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3241   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3242   ii  = aij->i;
3243   jj  = aij->j;
3244   aa  = aij->a;
3245   for (i=0; i<m; i++) {
3246     row   = rstart + i;
3247     nz    = ii[i+1] - ii[i];
3248     cwork = jj;     jj += nz;
3249     vwork = aa;     aa += nz;
3250     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3251   }
3252 
3253   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3254   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3255   *newmat = M;
3256 
3257   /* save submatrix used in processor for next request */
3258   if (call ==  MAT_INITIAL_MATRIX) {
3259     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3260     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3261   }
3262 
3263   PetscFunctionReturn(0);
3264 }
3265 
3266 EXTERN_C_BEGIN
3267 #undef __FUNCT__
3268 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3269 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3270 {
3271   PetscInt       m,cstart, cend,j,nnz,i,d;
3272   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3273   const PetscInt *JJ;
3274   PetscScalar    *values;
3275   PetscErrorCode ierr;
3276 
3277   PetscFunctionBegin;
3278   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3279 
3280   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3281   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3282   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3283   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3284   m      = B->rmap->n;
3285   cstart = B->cmap->rstart;
3286   cend   = B->cmap->rend;
3287   rstart = B->rmap->rstart;
3288 
3289   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3290 
3291 #if defined(PETSC_USE_DEBUGGING)
3292   for (i=0; i<m; i++) {
3293     nnz     = Ii[i+1]- Ii[i];
3294     JJ      = J + Ii[i];
3295     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3296     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3297     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3298   }
3299 #endif
3300 
3301   for (i=0; i<m; i++) {
3302     nnz     = Ii[i+1]- Ii[i];
3303     JJ      = J + Ii[i];
3304     nnz_max = PetscMax(nnz_max,nnz);
3305     d       = 0;
3306     for (j=0; j<nnz; j++) {
3307       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3308     }
3309     d_nnz[i] = d;
3310     o_nnz[i] = nnz - d;
3311   }
3312   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3313   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3314 
3315   if (v) values = (PetscScalar*)v;
3316   else {
3317     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3318     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3319   }
3320 
3321   for (i=0; i<m; i++) {
3322     ii   = i + rstart;
3323     nnz  = Ii[i+1]- Ii[i];
3324     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3325   }
3326   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3327   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3328 
3329   if (!v) {
3330     ierr = PetscFree(values);CHKERRQ(ierr);
3331   }
3332   PetscFunctionReturn(0);
3333 }
3334 EXTERN_C_END
3335 
3336 #undef __FUNCT__
3337 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3338 /*@
3339    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3340    (the default parallel PETSc format).
3341 
3342    Collective on MPI_Comm
3343 
3344    Input Parameters:
3345 +  B - the matrix
3346 .  i - the indices into j for the start of each local row (starts with zero)
3347 .  j - the column indices for each local row (starts with zero)
3348 -  v - optional values in the matrix
3349 
3350    Level: developer
3351 
3352    Notes:
3353        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3354      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3355      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3356 
3357        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3358 
3359        The format which is used for the sparse matrix input, is equivalent to a
3360     row-major ordering.. i.e for the following matrix, the input data expected is
3361     as shown:
3362 
3363         1 0 0
3364         2 0 3     P0
3365        -------
3366         4 5 6     P1
3367 
3368      Process0 [P0]: rows_owned=[0,1]
3369         i =  {0,1,3}  [size = nrow+1  = 2+1]
3370         j =  {0,0,2}  [size = nz = 6]
3371         v =  {1,2,3}  [size = nz = 6]
3372 
3373      Process1 [P1]: rows_owned=[2]
3374         i =  {0,3}    [size = nrow+1  = 1+1]
3375         j =  {0,1,2}  [size = nz = 6]
3376         v =  {4,5,6}  [size = nz = 6]
3377 
3378 .keywords: matrix, aij, compressed row, sparse, parallel
3379 
3380 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3381           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3382 @*/
3383 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3384 {
3385   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3386 
3387   PetscFunctionBegin;
3388   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3389   if (f) {
3390     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3391   }
3392   PetscFunctionReturn(0);
3393 }
3394 
3395 #undef __FUNCT__
3396 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3397 /*@C
3398    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3399    (the default parallel PETSc format).  For good matrix assembly performance
3400    the user should preallocate the matrix storage by setting the parameters
3401    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3402    performance can be increased by more than a factor of 50.
3403 
3404    Collective on MPI_Comm
3405 
3406    Input Parameters:
3407 +  A - the matrix
3408 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3409            (same value is used for all local rows)
3410 .  d_nnz - array containing the number of nonzeros in the various rows of the
3411            DIAGONAL portion of the local submatrix (possibly different for each row)
3412            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3413            The size of this array is equal to the number of local rows, i.e 'm'.
3414            You must leave room for the diagonal entry even if it is zero.
3415 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3416            submatrix (same value is used for all local rows).
3417 -  o_nnz - array containing the number of nonzeros in the various rows of the
3418            OFF-DIAGONAL portion of the local submatrix (possibly different for
3419            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3420            structure. The size of this array is equal to the number
3421            of local rows, i.e 'm'.
3422 
3423    If the *_nnz parameter is given then the *_nz parameter is ignored
3424 
3425    The AIJ format (also called the Yale sparse matrix format or
3426    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3427    storage.  The stored row and column indices begin with zero.
3428    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3429 
3430    The parallel matrix is partitioned such that the first m0 rows belong to
3431    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3432    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3433 
3434    The DIAGONAL portion of the local submatrix of a processor can be defined
3435    as the submatrix which is obtained by extraction the part corresponding to
3436    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3437    first row that belongs to the processor, r2 is the last row belonging to
3438    the this processor, and c1-c2 is range of indices of the local part of a
3439    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3440    common case of a square matrix, the row and column ranges are the same and
3441    the DIAGONAL part is also square. The remaining portion of the local
3442    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3443 
3444    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3445 
3446    You can call MatGetInfo() to get information on how effective the preallocation was;
3447    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3448    You can also run with the option -info and look for messages with the string
3449    malloc in them to see if additional memory allocation was needed.
3450 
3451    Example usage:
3452 
3453    Consider the following 8x8 matrix with 34 non-zero values, that is
3454    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3455    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3456    as follows:
3457 
3458 .vb
3459             1  2  0  |  0  3  0  |  0  4
3460     Proc0   0  5  6  |  7  0  0  |  8  0
3461             9  0 10  | 11  0  0  | 12  0
3462     -------------------------------------
3463            13  0 14  | 15 16 17  |  0  0
3464     Proc1   0 18  0  | 19 20 21  |  0  0
3465             0  0  0  | 22 23  0  | 24  0
3466     -------------------------------------
3467     Proc2  25 26 27  |  0  0 28  | 29  0
3468            30  0  0  | 31 32 33  |  0 34
3469 .ve
3470 
3471    This can be represented as a collection of submatrices as:
3472 
3473 .vb
3474       A B C
3475       D E F
3476       G H I
3477 .ve
3478 
3479    Where the submatrices A,B,C are owned by proc0, D,E,F are
3480    owned by proc1, G,H,I are owned by proc2.
3481 
3482    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3483    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3484    The 'M','N' parameters are 8,8, and have the same values on all procs.
3485 
3486    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3487    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3488    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3489    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3490    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3491    matrix, ans [DF] as another SeqAIJ matrix.
3492 
3493    When d_nz, o_nz parameters are specified, d_nz storage elements are
3494    allocated for every row of the local diagonal submatrix, and o_nz
3495    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3496    One way to choose d_nz and o_nz is to use the max nonzerors per local
3497    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3498    In this case, the values of d_nz,o_nz are:
3499 .vb
3500      proc0 : dnz = 2, o_nz = 2
3501      proc1 : dnz = 3, o_nz = 2
3502      proc2 : dnz = 1, o_nz = 4
3503 .ve
3504    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3505    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3506    for proc3. i.e we are using 12+15+10=37 storage locations to store
3507    34 values.
3508 
3509    When d_nnz, o_nnz parameters are specified, the storage is specified
3510    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3511    In the above case the values for d_nnz,o_nnz are:
3512 .vb
3513      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3514      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3515      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3516 .ve
3517    Here the space allocated is sum of all the above values i.e 34, and
3518    hence pre-allocation is perfect.
3519 
3520    Level: intermediate
3521 
3522 .keywords: matrix, aij, compressed row, sparse, parallel
3523 
3524 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3525           MPIAIJ, MatGetInfo()
3526 @*/
3527 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3528 {
3529   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3530 
3531   PetscFunctionBegin;
3532   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3533   if (f) {
3534     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3535   }
3536   PetscFunctionReturn(0);
3537 }
3538 
3539 #undef __FUNCT__
3540 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3541 /*@
3542      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3543          CSR format the local rows.
3544 
3545    Collective on MPI_Comm
3546 
3547    Input Parameters:
3548 +  comm - MPI communicator
3549 .  m - number of local rows (Cannot be PETSC_DECIDE)
3550 .  n - This value should be the same as the local size used in creating the
3551        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3552        calculated if N is given) For square matrices n is almost always m.
3553 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3554 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3555 .   i - row indices
3556 .   j - column indices
3557 -   a - matrix values
3558 
3559    Output Parameter:
3560 .   mat - the matrix
3561 
3562    Level: intermediate
3563 
3564    Notes:
3565        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3566      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3567      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3568 
3569        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3570 
3571        The format which is used for the sparse matrix input, is equivalent to a
3572     row-major ordering.. i.e for the following matrix, the input data expected is
3573     as shown:
3574 
3575         1 0 0
3576         2 0 3     P0
3577        -------
3578         4 5 6     P1
3579 
3580      Process0 [P0]: rows_owned=[0,1]
3581         i =  {0,1,3}  [size = nrow+1  = 2+1]
3582         j =  {0,0,2}  [size = nz = 6]
3583         v =  {1,2,3}  [size = nz = 6]
3584 
3585      Process1 [P1]: rows_owned=[2]
3586         i =  {0,3}    [size = nrow+1  = 1+1]
3587         j =  {0,1,2}  [size = nz = 6]
3588         v =  {4,5,6}  [size = nz = 6]
3589 
3590 .keywords: matrix, aij, compressed row, sparse, parallel
3591 
3592 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3593           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3594 @*/
3595 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3596 {
3597   PetscErrorCode ierr;
3598 
3599  PetscFunctionBegin;
3600   if (i[0]) {
3601     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3602   }
3603   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3604   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3605   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3606   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3607   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3608   PetscFunctionReturn(0);
3609 }
3610 
3611 #undef __FUNCT__
3612 #define __FUNCT__ "MatCreateMPIAIJ"
3613 /*@C
3614    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3615    (the default parallel PETSc format).  For good matrix assembly performance
3616    the user should preallocate the matrix storage by setting the parameters
3617    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3618    performance can be increased by more than a factor of 50.
3619 
3620    Collective on MPI_Comm
3621 
3622    Input Parameters:
3623 +  comm - MPI communicator
3624 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3625            This value should be the same as the local size used in creating the
3626            y vector for the matrix-vector product y = Ax.
3627 .  n - This value should be the same as the local size used in creating the
3628        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3629        calculated if N is given) For square matrices n is almost always m.
3630 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3631 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3632 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3633            (same value is used for all local rows)
3634 .  d_nnz - array containing the number of nonzeros in the various rows of the
3635            DIAGONAL portion of the local submatrix (possibly different for each row)
3636            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3637            The size of this array is equal to the number of local rows, i.e 'm'.
3638            You must leave room for the diagonal entry even if it is zero.
3639 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3640            submatrix (same value is used for all local rows).
3641 -  o_nnz - array containing the number of nonzeros in the various rows of the
3642            OFF-DIAGONAL portion of the local submatrix (possibly different for
3643            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3644            structure. The size of this array is equal to the number
3645            of local rows, i.e 'm'.
3646 
3647    Output Parameter:
3648 .  A - the matrix
3649 
3650    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3651    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3652    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3653 
3654    Notes:
3655    If the *_nnz parameter is given then the *_nz parameter is ignored
3656 
3657    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3658    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3659    storage requirements for this matrix.
3660 
3661    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3662    processor than it must be used on all processors that share the object for
3663    that argument.
3664 
3665    The user MUST specify either the local or global matrix dimensions
3666    (possibly both).
3667 
3668    The parallel matrix is partitioned across processors such that the
3669    first m0 rows belong to process 0, the next m1 rows belong to
3670    process 1, the next m2 rows belong to process 2 etc.. where
3671    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3672    values corresponding to [m x N] submatrix.
3673 
3674    The columns are logically partitioned with the n0 columns belonging
3675    to 0th partition, the next n1 columns belonging to the next
3676    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3677 
3678    The DIAGONAL portion of the local submatrix on any given processor
3679    is the submatrix corresponding to the rows and columns m,n
3680    corresponding to the given processor. i.e diagonal matrix on
3681    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3682    etc. The remaining portion of the local submatrix [m x (N-n)]
3683    constitute the OFF-DIAGONAL portion. The example below better
3684    illustrates this concept.
3685 
3686    For a square global matrix we define each processor's diagonal portion
3687    to be its local rows and the corresponding columns (a square submatrix);
3688    each processor's off-diagonal portion encompasses the remainder of the
3689    local matrix (a rectangular submatrix).
3690 
3691    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3692 
3693    When calling this routine with a single process communicator, a matrix of
3694    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3695    type of communicator, use the construction mechanism:
3696      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3697 
3698    By default, this format uses inodes (identical nodes) when possible.
3699    We search for consecutive rows with the same nonzero structure, thereby
3700    reusing matrix information to achieve increased efficiency.
3701 
3702    Options Database Keys:
3703 +  -mat_no_inode  - Do not use inodes
3704 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3705 -  -mat_aij_oneindex - Internally use indexing starting at 1
3706         rather than 0.  Note that when calling MatSetValues(),
3707         the user still MUST index entries starting at 0!
3708 
3709 
3710    Example usage:
3711 
3712    Consider the following 8x8 matrix with 34 non-zero values, that is
3713    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3714    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3715    as follows:
3716 
3717 .vb
3718             1  2  0  |  0  3  0  |  0  4
3719     Proc0   0  5  6  |  7  0  0  |  8  0
3720             9  0 10  | 11  0  0  | 12  0
3721     -------------------------------------
3722            13  0 14  | 15 16 17  |  0  0
3723     Proc1   0 18  0  | 19 20 21  |  0  0
3724             0  0  0  | 22 23  0  | 24  0
3725     -------------------------------------
3726     Proc2  25 26 27  |  0  0 28  | 29  0
3727            30  0  0  | 31 32 33  |  0 34
3728 .ve
3729 
3730    This can be represented as a collection of submatrices as:
3731 
3732 .vb
3733       A B C
3734       D E F
3735       G H I
3736 .ve
3737 
3738    Where the submatrices A,B,C are owned by proc0, D,E,F are
3739    owned by proc1, G,H,I are owned by proc2.
3740 
3741    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3742    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3743    The 'M','N' parameters are 8,8, and have the same values on all procs.
3744 
3745    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3746    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3747    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3748    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3749    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3750    matrix, ans [DF] as another SeqAIJ matrix.
3751 
3752    When d_nz, o_nz parameters are specified, d_nz storage elements are
3753    allocated for every row of the local diagonal submatrix, and o_nz
3754    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3755    One way to choose d_nz and o_nz is to use the max nonzerors per local
3756    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3757    In this case, the values of d_nz,o_nz are:
3758 .vb
3759      proc0 : dnz = 2, o_nz = 2
3760      proc1 : dnz = 3, o_nz = 2
3761      proc2 : dnz = 1, o_nz = 4
3762 .ve
3763    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3764    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3765    for proc3. i.e we are using 12+15+10=37 storage locations to store
3766    34 values.
3767 
3768    When d_nnz, o_nnz parameters are specified, the storage is specified
3769    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3770    In the above case the values for d_nnz,o_nnz are:
3771 .vb
3772      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3773      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3774      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3775 .ve
3776    Here the space allocated is sum of all the above values i.e 34, and
3777    hence pre-allocation is perfect.
3778 
3779    Level: intermediate
3780 
3781 .keywords: matrix, aij, compressed row, sparse, parallel
3782 
3783 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3784           MPIAIJ, MatCreateMPIAIJWithArrays()
3785 @*/
3786 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3787 {
3788   PetscErrorCode ierr;
3789   PetscMPIInt    size;
3790 
3791   PetscFunctionBegin;
3792   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3793   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3794   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3795   if (size > 1) {
3796     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3797     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3798   } else {
3799     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3800     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3801   }
3802   PetscFunctionReturn(0);
3803 }
3804 
3805 #undef __FUNCT__
3806 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3807 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3808 {
3809   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3810 
3811   PetscFunctionBegin;
3812   *Ad     = a->A;
3813   *Ao     = a->B;
3814   *colmap = a->garray;
3815   PetscFunctionReturn(0);
3816 }
3817 
3818 #undef __FUNCT__
3819 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3820 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3821 {
3822   PetscErrorCode ierr;
3823   PetscInt       i;
3824   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3825 
3826   PetscFunctionBegin;
3827   if (coloring->ctype == IS_COLORING_GLOBAL) {
3828     ISColoringValue *allcolors,*colors;
3829     ISColoring      ocoloring;
3830 
3831     /* set coloring for diagonal portion */
3832     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3833 
3834     /* set coloring for off-diagonal portion */
3835     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3836     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3837     for (i=0; i<a->B->cmap->n; i++) {
3838       colors[i] = allcolors[a->garray[i]];
3839     }
3840     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3841     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3842     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3843     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3844   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3845     ISColoringValue *colors;
3846     PetscInt        *larray;
3847     ISColoring      ocoloring;
3848 
3849     /* set coloring for diagonal portion */
3850     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3851     for (i=0; i<a->A->cmap->n; i++) {
3852       larray[i] = i + A->cmap->rstart;
3853     }
3854     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3855     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3856     for (i=0; i<a->A->cmap->n; i++) {
3857       colors[i] = coloring->colors[larray[i]];
3858     }
3859     ierr = PetscFree(larray);CHKERRQ(ierr);
3860     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3861     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3862     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3863 
3864     /* set coloring for off-diagonal portion */
3865     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3866     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3867     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3868     for (i=0; i<a->B->cmap->n; i++) {
3869       colors[i] = coloring->colors[larray[i]];
3870     }
3871     ierr = PetscFree(larray);CHKERRQ(ierr);
3872     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3873     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3874     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3875   } else {
3876     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3877   }
3878 
3879   PetscFunctionReturn(0);
3880 }
3881 
3882 #if defined(PETSC_HAVE_ADIC)
3883 #undef __FUNCT__
3884 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3885 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3886 {
3887   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3888   PetscErrorCode ierr;
3889 
3890   PetscFunctionBegin;
3891   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3892   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3893   PetscFunctionReturn(0);
3894 }
3895 #endif
3896 
3897 #undef __FUNCT__
3898 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3899 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3900 {
3901   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3902   PetscErrorCode ierr;
3903 
3904   PetscFunctionBegin;
3905   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3906   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3907   PetscFunctionReturn(0);
3908 }
3909 
3910 #undef __FUNCT__
3911 #define __FUNCT__ "MatMerge"
3912 /*@
3913       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3914                  matrices from each processor
3915 
3916     Collective on MPI_Comm
3917 
3918    Input Parameters:
3919 +    comm - the communicators the parallel matrix will live on
3920 .    inmat - the input sequential matrices
3921 .    n - number of local columns (or PETSC_DECIDE)
3922 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3923 
3924    Output Parameter:
3925 .    outmat - the parallel matrix generated
3926 
3927     Level: advanced
3928 
3929    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3930 
3931 @*/
3932 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3933 {
3934   PetscErrorCode ierr;
3935   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3936   PetscInt       *indx;
3937   PetscScalar    *values;
3938 
3939   PetscFunctionBegin;
3940   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3941   if (scall == MAT_INITIAL_MATRIX){
3942     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3943     if (n == PETSC_DECIDE){
3944       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3945     }
3946     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3947     rstart -= m;
3948 
3949     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3950     for (i=0;i<m;i++) {
3951       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3952       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3953       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3954     }
3955     /* This routine will ONLY return MPIAIJ type matrix */
3956     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3957     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3958     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3959     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3960     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3961 
3962   } else if (scall == MAT_REUSE_MATRIX){
3963     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3964   } else {
3965     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3966   }
3967 
3968   for (i=0;i<m;i++) {
3969     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3970     Ii    = i + rstart;
3971     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3972     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3973   }
3974   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3975   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3976   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3977 
3978   PetscFunctionReturn(0);
3979 }
3980 
3981 #undef __FUNCT__
3982 #define __FUNCT__ "MatFileSplit"
3983 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3984 {
3985   PetscErrorCode    ierr;
3986   PetscMPIInt       rank;
3987   PetscInt          m,N,i,rstart,nnz;
3988   size_t            len;
3989   const PetscInt    *indx;
3990   PetscViewer       out;
3991   char              *name;
3992   Mat               B;
3993   const PetscScalar *values;
3994 
3995   PetscFunctionBegin;
3996   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3997   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3998   /* Should this be the type of the diagonal block of A? */
3999   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4000   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4001   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4002   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4003   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4004   for (i=0;i<m;i++) {
4005     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4006     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4007     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4008   }
4009   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4010   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4011 
4012   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4013   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4014   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4015   sprintf(name,"%s.%d",outfile,rank);
4016   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4017   ierr = PetscFree(name);
4018   ierr = MatView(B,out);CHKERRQ(ierr);
4019   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
4020   ierr = MatDestroy(B);CHKERRQ(ierr);
4021   PetscFunctionReturn(0);
4022 }
4023 
4024 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
4025 #undef __FUNCT__
4026 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4027 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4028 {
4029   PetscErrorCode       ierr;
4030   Mat_Merge_SeqsToMPI  *merge;
4031   PetscContainer       container;
4032 
4033   PetscFunctionBegin;
4034   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4035   if (container) {
4036     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4037     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4038     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4039     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4040     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4041     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4042     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4043     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4044     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4045     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4046     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4047     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4048     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4049     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4050 
4051     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4052     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4053   }
4054   ierr = PetscFree(merge);CHKERRQ(ierr);
4055 
4056   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4057   PetscFunctionReturn(0);
4058 }
4059 
4060 #include "../src/mat/utils/freespace.h"
4061 #include "petscbt.h"
4062 
4063 #undef __FUNCT__
4064 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4065 /*@C
4066       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4067                  matrices from each processor
4068 
4069     Collective on MPI_Comm
4070 
4071    Input Parameters:
4072 +    comm - the communicators the parallel matrix will live on
4073 .    seqmat - the input sequential matrices
4074 .    m - number of local rows (or PETSC_DECIDE)
4075 .    n - number of local columns (or PETSC_DECIDE)
4076 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4077 
4078    Output Parameter:
4079 .    mpimat - the parallel matrix generated
4080 
4081     Level: advanced
4082 
4083    Notes:
4084      The dimensions of the sequential matrix in each processor MUST be the same.
4085      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4086      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4087 @*/
4088 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4089 {
4090   PetscErrorCode       ierr;
4091   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4092   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4093   PetscMPIInt          size,rank,taga,*len_s;
4094   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4095   PetscInt             proc,m;
4096   PetscInt             **buf_ri,**buf_rj;
4097   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4098   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4099   MPI_Request          *s_waits,*r_waits;
4100   MPI_Status           *status;
4101   MatScalar            *aa=a->a;
4102   MatScalar            **abuf_r,*ba_i;
4103   Mat_Merge_SeqsToMPI  *merge;
4104   PetscContainer       container;
4105 
4106   PetscFunctionBegin;
4107   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4108 
4109   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4110   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4111 
4112   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4113   if (container) {
4114     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4115   }
4116   bi     = merge->bi;
4117   bj     = merge->bj;
4118   buf_ri = merge->buf_ri;
4119   buf_rj = merge->buf_rj;
4120 
4121   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4122   owners = merge->rowmap->range;
4123   len_s  = merge->len_s;
4124 
4125   /* send and recv matrix values */
4126   /*-----------------------------*/
4127   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4128   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4129 
4130   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4131   for (proc=0,k=0; proc<size; proc++){
4132     if (!len_s[proc]) continue;
4133     i = owners[proc];
4134     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4135     k++;
4136   }
4137 
4138   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4139   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4140   ierr = PetscFree(status);CHKERRQ(ierr);
4141 
4142   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4143   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4144 
4145   /* insert mat values of mpimat */
4146   /*----------------------------*/
4147   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4148   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4149 
4150   for (k=0; k<merge->nrecv; k++){
4151     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4152     nrows = *(buf_ri_k[k]);
4153     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4154     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4155   }
4156 
4157   /* set values of ba */
4158   m = merge->rowmap->n;
4159   for (i=0; i<m; i++) {
4160     arow = owners[rank] + i;
4161     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4162     bnzi = bi[i+1] - bi[i];
4163     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4164 
4165     /* add local non-zero vals of this proc's seqmat into ba */
4166     anzi = ai[arow+1] - ai[arow];
4167     aj   = a->j + ai[arow];
4168     aa   = a->a + ai[arow];
4169     nextaj = 0;
4170     for (j=0; nextaj<anzi; j++){
4171       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4172         ba_i[j] += aa[nextaj++];
4173       }
4174     }
4175 
4176     /* add received vals into ba */
4177     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4178       /* i-th row */
4179       if (i == *nextrow[k]) {
4180         anzi = *(nextai[k]+1) - *nextai[k];
4181         aj   = buf_rj[k] + *(nextai[k]);
4182         aa   = abuf_r[k] + *(nextai[k]);
4183         nextaj = 0;
4184         for (j=0; nextaj<anzi; j++){
4185           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4186             ba_i[j] += aa[nextaj++];
4187           }
4188         }
4189         nextrow[k]++; nextai[k]++;
4190       }
4191     }
4192     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4193   }
4194   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4195   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4196 
4197   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4198   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4199   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4200   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4201   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4202   PetscFunctionReturn(0);
4203 }
4204 
4205 #undef __FUNCT__
4206 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4207 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4208 {
4209   PetscErrorCode       ierr;
4210   Mat                  B_mpi;
4211   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4212   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4213   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4214   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4215   PetscInt             len,proc,*dnz,*onz;
4216   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4217   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4218   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4219   MPI_Status           *status;
4220   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4221   PetscBT              lnkbt;
4222   Mat_Merge_SeqsToMPI  *merge;
4223   PetscContainer       container;
4224 
4225   PetscFunctionBegin;
4226   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4227 
4228   /* make sure it is a PETSc comm */
4229   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4230   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4231   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4232 
4233   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4234   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4235 
4236   /* determine row ownership */
4237   /*---------------------------------------------------------*/
4238   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4239   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4240   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4241   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4242   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4243   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4244   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4245 
4246   m      = merge->rowmap->n;
4247   M      = merge->rowmap->N;
4248   owners = merge->rowmap->range;
4249 
4250   /* determine the number of messages to send, their lengths */
4251   /*---------------------------------------------------------*/
4252   len_s  = merge->len_s;
4253 
4254   len = 0;  /* length of buf_si[] */
4255   merge->nsend = 0;
4256   for (proc=0; proc<size; proc++){
4257     len_si[proc] = 0;
4258     if (proc == rank){
4259       len_s[proc] = 0;
4260     } else {
4261       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4262       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4263     }
4264     if (len_s[proc]) {
4265       merge->nsend++;
4266       nrows = 0;
4267       for (i=owners[proc]; i<owners[proc+1]; i++){
4268         if (ai[i+1] > ai[i]) nrows++;
4269       }
4270       len_si[proc] = 2*(nrows+1);
4271       len += len_si[proc];
4272     }
4273   }
4274 
4275   /* determine the number and length of messages to receive for ij-structure */
4276   /*-------------------------------------------------------------------------*/
4277   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4278   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4279 
4280   /* post the Irecv of j-structure */
4281   /*-------------------------------*/
4282   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4283   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4284 
4285   /* post the Isend of j-structure */
4286   /*--------------------------------*/
4287   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4288 
4289   for (proc=0, k=0; proc<size; proc++){
4290     if (!len_s[proc]) continue;
4291     i = owners[proc];
4292     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4293     k++;
4294   }
4295 
4296   /* receives and sends of j-structure are complete */
4297   /*------------------------------------------------*/
4298   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4299   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4300 
4301   /* send and recv i-structure */
4302   /*---------------------------*/
4303   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4304   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4305 
4306   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4307   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4308   for (proc=0,k=0; proc<size; proc++){
4309     if (!len_s[proc]) continue;
4310     /* form outgoing message for i-structure:
4311          buf_si[0]:                 nrows to be sent
4312                [1:nrows]:           row index (global)
4313                [nrows+1:2*nrows+1]: i-structure index
4314     */
4315     /*-------------------------------------------*/
4316     nrows = len_si[proc]/2 - 1;
4317     buf_si_i    = buf_si + nrows+1;
4318     buf_si[0]   = nrows;
4319     buf_si_i[0] = 0;
4320     nrows = 0;
4321     for (i=owners[proc]; i<owners[proc+1]; i++){
4322       anzi = ai[i+1] - ai[i];
4323       if (anzi) {
4324         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4325         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4326         nrows++;
4327       }
4328     }
4329     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4330     k++;
4331     buf_si += len_si[proc];
4332   }
4333 
4334   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4335   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4336 
4337   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4338   for (i=0; i<merge->nrecv; i++){
4339     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4340   }
4341 
4342   ierr = PetscFree(len_si);CHKERRQ(ierr);
4343   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4344   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4345   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4346   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4347   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4348   ierr = PetscFree(status);CHKERRQ(ierr);
4349 
4350   /* compute a local seq matrix in each processor */
4351   /*----------------------------------------------*/
4352   /* allocate bi array and free space for accumulating nonzero column info */
4353   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4354   bi[0] = 0;
4355 
4356   /* create and initialize a linked list */
4357   nlnk = N+1;
4358   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4359 
4360   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4361   len = 0;
4362   len  = ai[owners[rank+1]] - ai[owners[rank]];
4363   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4364   current_space = free_space;
4365 
4366   /* determine symbolic info for each local row */
4367   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4368 
4369   for (k=0; k<merge->nrecv; k++){
4370     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4371     nrows = *buf_ri_k[k];
4372     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4373     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4374   }
4375 
4376   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4377   len = 0;
4378   for (i=0;i<m;i++) {
4379     bnzi   = 0;
4380     /* add local non-zero cols of this proc's seqmat into lnk */
4381     arow   = owners[rank] + i;
4382     anzi   = ai[arow+1] - ai[arow];
4383     aj     = a->j + ai[arow];
4384     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4385     bnzi += nlnk;
4386     /* add received col data into lnk */
4387     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4388       if (i == *nextrow[k]) { /* i-th row */
4389         anzi = *(nextai[k]+1) - *nextai[k];
4390         aj   = buf_rj[k] + *nextai[k];
4391         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4392         bnzi += nlnk;
4393         nextrow[k]++; nextai[k]++;
4394       }
4395     }
4396     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4397 
4398     /* if free space is not available, make more free space */
4399     if (current_space->local_remaining<bnzi) {
4400       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4401       nspacedouble++;
4402     }
4403     /* copy data into free space, then initialize lnk */
4404     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4405     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4406 
4407     current_space->array           += bnzi;
4408     current_space->local_used      += bnzi;
4409     current_space->local_remaining -= bnzi;
4410 
4411     bi[i+1] = bi[i] + bnzi;
4412   }
4413 
4414   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4415 
4416   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4417   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4418   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4419 
4420   /* create symbolic parallel matrix B_mpi */
4421   /*---------------------------------------*/
4422   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4423   if (n==PETSC_DECIDE) {
4424     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4425   } else {
4426     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4427   }
4428   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4429   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4430   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4431 
4432   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4433   B_mpi->assembled     = PETSC_FALSE;
4434   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4435   merge->bi            = bi;
4436   merge->bj            = bj;
4437   merge->buf_ri        = buf_ri;
4438   merge->buf_rj        = buf_rj;
4439   merge->coi           = PETSC_NULL;
4440   merge->coj           = PETSC_NULL;
4441   merge->owners_co     = PETSC_NULL;
4442 
4443   /* attach the supporting struct to B_mpi for reuse */
4444   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4445   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4446   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4447   *mpimat = B_mpi;
4448 
4449   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4450   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4451   PetscFunctionReturn(0);
4452 }
4453 
4454 #undef __FUNCT__
4455 #define __FUNCT__ "MatMerge_SeqsToMPI"
4456 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4457 {
4458   PetscErrorCode   ierr;
4459 
4460   PetscFunctionBegin;
4461   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4462   if (scall == MAT_INITIAL_MATRIX){
4463     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4464   }
4465   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4466   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4467   PetscFunctionReturn(0);
4468 }
4469 
4470 #undef __FUNCT__
4471 #define __FUNCT__ "MatGetLocalMat"
4472 /*@
4473      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4474 
4475     Not Collective
4476 
4477    Input Parameters:
4478 +    A - the matrix
4479 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4480 
4481    Output Parameter:
4482 .    A_loc - the local sequential matrix generated
4483 
4484     Level: developer
4485 
4486 @*/
4487 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4488 {
4489   PetscErrorCode  ierr;
4490   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4491   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4492   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4493   MatScalar       *aa=a->a,*ba=b->a,*cam;
4494   PetscScalar     *ca;
4495   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4496   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4497 
4498   PetscFunctionBegin;
4499   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4500   if (scall == MAT_INITIAL_MATRIX){
4501     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4502     ci[0] = 0;
4503     for (i=0; i<am; i++){
4504       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4505     }
4506     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4507     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4508     k = 0;
4509     for (i=0; i<am; i++) {
4510       ncols_o = bi[i+1] - bi[i];
4511       ncols_d = ai[i+1] - ai[i];
4512       /* off-diagonal portion of A */
4513       for (jo=0; jo<ncols_o; jo++) {
4514         col = cmap[*bj];
4515         if (col >= cstart) break;
4516         cj[k]   = col; bj++;
4517         ca[k++] = *ba++;
4518       }
4519       /* diagonal portion of A */
4520       for (j=0; j<ncols_d; j++) {
4521         cj[k]   = cstart + *aj++;
4522         ca[k++] = *aa++;
4523       }
4524       /* off-diagonal portion of A */
4525       for (j=jo; j<ncols_o; j++) {
4526         cj[k]   = cmap[*bj++];
4527         ca[k++] = *ba++;
4528       }
4529     }
4530     /* put together the new matrix */
4531     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4532     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4533     /* Since these are PETSc arrays, change flags to free them as necessary. */
4534     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4535     mat->free_a  = PETSC_TRUE;
4536     mat->free_ij = PETSC_TRUE;
4537     mat->nonew   = 0;
4538   } else if (scall == MAT_REUSE_MATRIX){
4539     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4540     ci = mat->i; cj = mat->j; cam = mat->a;
4541     for (i=0; i<am; i++) {
4542       /* off-diagonal portion of A */
4543       ncols_o = bi[i+1] - bi[i];
4544       for (jo=0; jo<ncols_o; jo++) {
4545         col = cmap[*bj];
4546         if (col >= cstart) break;
4547         *cam++ = *ba++; bj++;
4548       }
4549       /* diagonal portion of A */
4550       ncols_d = ai[i+1] - ai[i];
4551       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4552       /* off-diagonal portion of A */
4553       for (j=jo; j<ncols_o; j++) {
4554         *cam++ = *ba++; bj++;
4555       }
4556     }
4557   } else {
4558     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4559   }
4560 
4561   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4562   PetscFunctionReturn(0);
4563 }
4564 
4565 #undef __FUNCT__
4566 #define __FUNCT__ "MatGetLocalMatCondensed"
4567 /*@C
4568      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4569 
4570     Not Collective
4571 
4572    Input Parameters:
4573 +    A - the matrix
4574 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4575 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4576 
4577    Output Parameter:
4578 .    A_loc - the local sequential matrix generated
4579 
4580     Level: developer
4581 
4582 @*/
4583 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4584 {
4585   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4586   PetscErrorCode    ierr;
4587   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4588   IS                isrowa,iscola;
4589   Mat               *aloc;
4590 
4591   PetscFunctionBegin;
4592   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4593   if (!row){
4594     start = A->rmap->rstart; end = A->rmap->rend;
4595     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4596   } else {
4597     isrowa = *row;
4598   }
4599   if (!col){
4600     start = A->cmap->rstart;
4601     cmap  = a->garray;
4602     nzA   = a->A->cmap->n;
4603     nzB   = a->B->cmap->n;
4604     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4605     ncols = 0;
4606     for (i=0; i<nzB; i++) {
4607       if (cmap[i] < start) idx[ncols++] = cmap[i];
4608       else break;
4609     }
4610     imark = i;
4611     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4612     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4613     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4614     ierr = PetscFree(idx);CHKERRQ(ierr);
4615   } else {
4616     iscola = *col;
4617   }
4618   if (scall != MAT_INITIAL_MATRIX){
4619     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4620     aloc[0] = *A_loc;
4621   }
4622   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4623   *A_loc = aloc[0];
4624   ierr = PetscFree(aloc);CHKERRQ(ierr);
4625   if (!row){
4626     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4627   }
4628   if (!col){
4629     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4630   }
4631   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4632   PetscFunctionReturn(0);
4633 }
4634 
4635 #undef __FUNCT__
4636 #define __FUNCT__ "MatGetBrowsOfAcols"
4637 /*@C
4638     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4639 
4640     Collective on Mat
4641 
4642    Input Parameters:
4643 +    A,B - the matrices in mpiaij format
4644 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4645 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4646 
4647    Output Parameter:
4648 +    rowb, colb - index sets of rows and columns of B to extract
4649 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4650 -    B_seq - the sequential matrix generated
4651 
4652     Level: developer
4653 
4654 @*/
4655 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4656 {
4657   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4658   PetscErrorCode    ierr;
4659   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4660   IS                isrowb,iscolb;
4661   Mat               *bseq;
4662 
4663   PetscFunctionBegin;
4664   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4665     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4666   }
4667   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4668 
4669   if (scall == MAT_INITIAL_MATRIX){
4670     start = A->cmap->rstart;
4671     cmap  = a->garray;
4672     nzA   = a->A->cmap->n;
4673     nzB   = a->B->cmap->n;
4674     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4675     ncols = 0;
4676     for (i=0; i<nzB; i++) {  /* row < local row index */
4677       if (cmap[i] < start) idx[ncols++] = cmap[i];
4678       else break;
4679     }
4680     imark = i;
4681     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4682     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4683     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4684     ierr = PetscFree(idx);CHKERRQ(ierr);
4685     *brstart = imark;
4686     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4687   } else {
4688     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4689     isrowb = *rowb; iscolb = *colb;
4690     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4691     bseq[0] = *B_seq;
4692   }
4693   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4694   *B_seq = bseq[0];
4695   ierr = PetscFree(bseq);CHKERRQ(ierr);
4696   if (!rowb){
4697     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4698   } else {
4699     *rowb = isrowb;
4700   }
4701   if (!colb){
4702     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4703   } else {
4704     *colb = iscolb;
4705   }
4706   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4707   PetscFunctionReturn(0);
4708 }
4709 
4710 #undef __FUNCT__
4711 #define __FUNCT__ "MatGetBrowsOfAoCols"
4712 /*@C
4713     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4714     of the OFF-DIAGONAL portion of local A
4715 
4716     Collective on Mat
4717 
4718    Input Parameters:
4719 +    A,B - the matrices in mpiaij format
4720 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4721 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4722 .    startsj_r - similar to startsj for receives
4723 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4724 
4725    Output Parameter:
4726 +    B_oth - the sequential matrix generated
4727 
4728     Level: developer
4729 
4730 @*/
4731 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4732 {
4733   VecScatter_MPI_General *gen_to,*gen_from;
4734   PetscErrorCode         ierr;
4735   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4736   Mat_SeqAIJ             *b_oth;
4737   VecScatter             ctx=a->Mvctx;
4738   MPI_Comm               comm=((PetscObject)ctx)->comm;
4739   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4740   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4741   PetscScalar            *rvalues,*svalues;
4742   MatScalar              *b_otha,*bufa,*bufA;
4743   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4744   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4745   MPI_Status             *sstatus,rstatus;
4746   PetscMPIInt            jj;
4747   PetscInt               *cols,sbs,rbs;
4748   PetscScalar            *vals;
4749 
4750   PetscFunctionBegin;
4751   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4752     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4753   }
4754   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4755   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4756 
4757   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4758   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4759   rvalues  = gen_from->values; /* holds the length of receiving row */
4760   svalues  = gen_to->values;   /* holds the length of sending row */
4761   nrecvs   = gen_from->n;
4762   nsends   = gen_to->n;
4763 
4764   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4765   srow     = gen_to->indices;   /* local row index to be sent */
4766   sstarts  = gen_to->starts;
4767   sprocs   = gen_to->procs;
4768   sstatus  = gen_to->sstatus;
4769   sbs      = gen_to->bs;
4770   rstarts  = gen_from->starts;
4771   rprocs   = gen_from->procs;
4772   rbs      = gen_from->bs;
4773 
4774   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4775   if (scall == MAT_INITIAL_MATRIX){
4776     /* i-array */
4777     /*---------*/
4778     /*  post receives */
4779     for (i=0; i<nrecvs; i++){
4780       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4781       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4782       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4783     }
4784 
4785     /* pack the outgoing message */
4786     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4787     sstartsj[0] = 0;  rstartsj[0] = 0;
4788     len = 0; /* total length of j or a array to be sent */
4789     k = 0;
4790     for (i=0; i<nsends; i++){
4791       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4792       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4793       for (j=0; j<nrows; j++) {
4794         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4795         for (l=0; l<sbs; l++){
4796           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4797           rowlen[j*sbs+l] = ncols;
4798           len += ncols;
4799           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4800         }
4801         k++;
4802       }
4803       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4804       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4805     }
4806     /* recvs and sends of i-array are completed */
4807     i = nrecvs;
4808     while (i--) {
4809       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4810     }
4811     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4812 
4813     /* allocate buffers for sending j and a arrays */
4814     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4815     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4816 
4817     /* create i-array of B_oth */
4818     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4819     b_othi[0] = 0;
4820     len = 0; /* total length of j or a array to be received */
4821     k = 0;
4822     for (i=0; i<nrecvs; i++){
4823       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4824       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4825       for (j=0; j<nrows; j++) {
4826         b_othi[k+1] = b_othi[k] + rowlen[j];
4827         len += rowlen[j]; k++;
4828       }
4829       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4830     }
4831 
4832     /* allocate space for j and a arrrays of B_oth */
4833     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4834     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4835 
4836     /* j-array */
4837     /*---------*/
4838     /*  post receives of j-array */
4839     for (i=0; i<nrecvs; i++){
4840       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4841       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4842     }
4843 
4844     /* pack the outgoing message j-array */
4845     k = 0;
4846     for (i=0; i<nsends; i++){
4847       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4848       bufJ = bufj+sstartsj[i];
4849       for (j=0; j<nrows; j++) {
4850         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4851         for (ll=0; ll<sbs; ll++){
4852           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4853           for (l=0; l<ncols; l++){
4854             *bufJ++ = cols[l];
4855           }
4856           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4857         }
4858       }
4859       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4860     }
4861 
4862     /* recvs and sends of j-array are completed */
4863     i = nrecvs;
4864     while (i--) {
4865       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4866     }
4867     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4868   } else if (scall == MAT_REUSE_MATRIX){
4869     sstartsj = *startsj;
4870     rstartsj = *startsj_r;
4871     bufa     = *bufa_ptr;
4872     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4873     b_otha   = b_oth->a;
4874   } else {
4875     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4876   }
4877 
4878   /* a-array */
4879   /*---------*/
4880   /*  post receives of a-array */
4881   for (i=0; i<nrecvs; i++){
4882     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4883     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4884   }
4885 
4886   /* pack the outgoing message a-array */
4887   k = 0;
4888   for (i=0; i<nsends; i++){
4889     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4890     bufA = bufa+sstartsj[i];
4891     for (j=0; j<nrows; j++) {
4892       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4893       for (ll=0; ll<sbs; ll++){
4894         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4895         for (l=0; l<ncols; l++){
4896           *bufA++ = vals[l];
4897         }
4898         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4899       }
4900     }
4901     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4902   }
4903   /* recvs and sends of a-array are completed */
4904   i = nrecvs;
4905   while (i--) {
4906     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4907   }
4908   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4909   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4910 
4911   if (scall == MAT_INITIAL_MATRIX){
4912     /* put together the new matrix */
4913     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4914 
4915     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4916     /* Since these are PETSc arrays, change flags to free them as necessary. */
4917     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4918     b_oth->free_a  = PETSC_TRUE;
4919     b_oth->free_ij = PETSC_TRUE;
4920     b_oth->nonew   = 0;
4921 
4922     ierr = PetscFree(bufj);CHKERRQ(ierr);
4923     if (!startsj || !bufa_ptr){
4924       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4925       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4926     } else {
4927       *startsj   = sstartsj;
4928       *startsj_r = rstartsj;
4929       *bufa_ptr  = bufa;
4930     }
4931   }
4932   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4933   PetscFunctionReturn(0);
4934 }
4935 
4936 #undef __FUNCT__
4937 #define __FUNCT__ "MatGetCommunicationStructs"
4938 /*@C
4939   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4940 
4941   Not Collective
4942 
4943   Input Parameters:
4944 . A - The matrix in mpiaij format
4945 
4946   Output Parameter:
4947 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4948 . colmap - A map from global column index to local index into lvec
4949 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4950 
4951   Level: developer
4952 
4953 @*/
4954 #if defined (PETSC_USE_CTABLE)
4955 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4956 #else
4957 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4958 #endif
4959 {
4960   Mat_MPIAIJ *a;
4961 
4962   PetscFunctionBegin;
4963   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4964   PetscValidPointer(lvec, 2);
4965   PetscValidPointer(colmap, 3);
4966   PetscValidPointer(multScatter, 4);
4967   a = (Mat_MPIAIJ *) A->data;
4968   if (lvec) *lvec = a->lvec;
4969   if (colmap) *colmap = a->colmap;
4970   if (multScatter) *multScatter = a->Mvctx;
4971   PetscFunctionReturn(0);
4972 }
4973 
4974 EXTERN_C_BEGIN
4975 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
4976 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
4977 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
4978 EXTERN_C_END
4979 
4980 #undef __FUNCT__
4981 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4982 /*
4983     Computes (B'*A')' since computing B*A directly is untenable
4984 
4985                n                       p                          p
4986         (              )       (              )         (                  )
4987       m (      A       )  *  n (       B      )   =   m (         C        )
4988         (              )       (              )         (                  )
4989 
4990 */
4991 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4992 {
4993   PetscErrorCode     ierr;
4994   Mat                At,Bt,Ct;
4995 
4996   PetscFunctionBegin;
4997   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4998   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4999   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5000   ierr = MatDestroy(At);CHKERRQ(ierr);
5001   ierr = MatDestroy(Bt);CHKERRQ(ierr);
5002   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5003   ierr = MatDestroy(Ct);CHKERRQ(ierr);
5004   PetscFunctionReturn(0);
5005 }
5006 
5007 #undef __FUNCT__
5008 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5009 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5010 {
5011   PetscErrorCode ierr;
5012   PetscInt       m=A->rmap->n,n=B->cmap->n;
5013   Mat            Cmat;
5014 
5015   PetscFunctionBegin;
5016   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5017   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5018   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5019   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5020   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5021   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5022   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5023   *C   = Cmat;
5024   PetscFunctionReturn(0);
5025 }
5026 
5027 /* ----------------------------------------------------------------*/
5028 #undef __FUNCT__
5029 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5030 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5031 {
5032   PetscErrorCode ierr;
5033 
5034   PetscFunctionBegin;
5035   if (scall == MAT_INITIAL_MATRIX){
5036     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5037   }
5038   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5039   PetscFunctionReturn(0);
5040 }
5041 
5042 EXTERN_C_BEGIN
5043 #if defined(PETSC_HAVE_MUMPS)
5044 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5045 #endif
5046 #if defined(PETSC_HAVE_PASTIX)
5047 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5048 #endif
5049 #if defined(PETSC_HAVE_SUPERLU_DIST)
5050 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5051 #endif
5052 #if defined(PETSC_HAVE_SPOOLES)
5053 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5054 #endif
5055 EXTERN_C_END
5056 
5057 /*MC
5058    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5059 
5060    Options Database Keys:
5061 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5062 
5063   Level: beginner
5064 
5065 .seealso: MatCreateMPIAIJ()
5066 M*/
5067 
5068 EXTERN_C_BEGIN
5069 #undef __FUNCT__
5070 #define __FUNCT__ "MatCreate_MPIAIJ"
5071 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5072 {
5073   Mat_MPIAIJ     *b;
5074   PetscErrorCode ierr;
5075   PetscMPIInt    size;
5076 
5077   PetscFunctionBegin;
5078   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5079 
5080   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5081   B->data         = (void*)b;
5082   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5083   B->rmap->bs     = 1;
5084   B->assembled    = PETSC_FALSE;
5085   B->mapping      = 0;
5086 
5087   B->insertmode   = NOT_SET_VALUES;
5088   b->size         = size;
5089   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5090 
5091   /* build cache for off array entries formed */
5092   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5093   b->donotstash  = PETSC_FALSE;
5094   b->colmap      = 0;
5095   b->garray      = 0;
5096   b->roworiented = PETSC_TRUE;
5097 
5098   /* stuff used for matrix vector multiply */
5099   b->lvec      = PETSC_NULL;
5100   b->Mvctx     = PETSC_NULL;
5101 
5102   /* stuff for MatGetRow() */
5103   b->rowindices   = 0;
5104   b->rowvalues    = 0;
5105   b->getrowactive = PETSC_FALSE;
5106 
5107 #if defined(PETSC_HAVE_SPOOLES)
5108   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5109                                      "MatGetFactor_mpiaij_spooles",
5110                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5111 #endif
5112 #if defined(PETSC_HAVE_MUMPS)
5113   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5114                                      "MatGetFactor_aij_mumps",
5115                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5116 #endif
5117 #if defined(PETSC_HAVE_PASTIX)
5118   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5119 					   "MatGetFactor_mpiaij_pastix",
5120 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5121 #endif
5122 #if defined(PETSC_HAVE_SUPERLU_DIST)
5123   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5124                                      "MatGetFactor_mpiaij_superlu_dist",
5125                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5126 #endif
5127   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5128                                      "MatStoreValues_MPIAIJ",
5129                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5130   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5131                                      "MatRetrieveValues_MPIAIJ",
5132                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5133   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5134 				     "MatGetDiagonalBlock_MPIAIJ",
5135                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5136   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5137 				     "MatIsTranspose_MPIAIJ",
5138 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5139   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5140 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5141 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5142   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5143 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5144 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5145   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5146 				     "MatDiagonalScaleLocal_MPIAIJ",
5147 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5148   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5149                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5150                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5151   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5152                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5153                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5154   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5155                                      "MatConvert_MPIAIJ_MPISBAIJ",
5156                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5157   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5158                                      "MatMatMult_MPIDense_MPIAIJ",
5159                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5160   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5161                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5162                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5163   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5164                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5165                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5166   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5167   PetscFunctionReturn(0);
5168 }
5169 EXTERN_C_END
5170 
5171 #undef __FUNCT__
5172 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5173 /*@
5174      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5175          and "off-diagonal" part of the matrix in CSR format.
5176 
5177    Collective on MPI_Comm
5178 
5179    Input Parameters:
5180 +  comm - MPI communicator
5181 .  m - number of local rows (Cannot be PETSC_DECIDE)
5182 .  n - This value should be the same as the local size used in creating the
5183        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5184        calculated if N is given) For square matrices n is almost always m.
5185 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5186 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5187 .   i - row indices for "diagonal" portion of matrix
5188 .   j - column indices
5189 .   a - matrix values
5190 .   oi - row indices for "off-diagonal" portion of matrix
5191 .   oj - column indices
5192 -   oa - matrix values
5193 
5194    Output Parameter:
5195 .   mat - the matrix
5196 
5197    Level: advanced
5198 
5199    Notes:
5200        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5201 
5202        The i and j indices are 0 based
5203 
5204        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5205 
5206        This sets local rows and cannot be used to set off-processor values.
5207 
5208        You cannot later use MatSetValues() to change values in this matrix.
5209 
5210 .keywords: matrix, aij, compressed row, sparse, parallel
5211 
5212 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5213           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5214 @*/
5215 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5216 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5217 {
5218   PetscErrorCode ierr;
5219   Mat_MPIAIJ     *maij;
5220 
5221  PetscFunctionBegin;
5222   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5223   if (i[0]) {
5224     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5225   }
5226   if (oi[0]) {
5227     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5228   }
5229   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5230   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5231   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5232   maij = (Mat_MPIAIJ*) (*mat)->data;
5233   maij->donotstash     = PETSC_TRUE;
5234   (*mat)->preallocated = PETSC_TRUE;
5235 
5236   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5237   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5238   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5239   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5240 
5241   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5242   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5243 
5244   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5245   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5246   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5247   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5248 
5249   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5250   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5251   PetscFunctionReturn(0);
5252 }
5253 
5254 /*
5255     Special version for direct calls from Fortran
5256 */
5257 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5258 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5259 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5260 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5261 #endif
5262 
5263 /* Change these macros so can be used in void function */
5264 #undef CHKERRQ
5265 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5266 #undef SETERRQ2
5267 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5268 #undef SETERRQ
5269 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5270 
5271 EXTERN_C_BEGIN
5272 #undef __FUNCT__
5273 #define __FUNCT__ "matsetvaluesmpiaij_"
5274 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5275 {
5276   Mat             mat = *mmat;
5277   PetscInt        m = *mm, n = *mn;
5278   InsertMode      addv = *maddv;
5279   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5280   PetscScalar     value;
5281   PetscErrorCode  ierr;
5282 
5283   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5284   if (mat->insertmode == NOT_SET_VALUES) {
5285     mat->insertmode = addv;
5286   }
5287 #if defined(PETSC_USE_DEBUG)
5288   else if (mat->insertmode != addv) {
5289     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5290   }
5291 #endif
5292   {
5293   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5294   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5295   PetscTruth      roworiented = aij->roworiented;
5296 
5297   /* Some Variables required in the macro */
5298   Mat             A = aij->A;
5299   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5300   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5301   MatScalar       *aa = a->a;
5302   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5303   Mat             B = aij->B;
5304   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5305   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5306   MatScalar       *ba = b->a;
5307 
5308   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5309   PetscInt        nonew = a->nonew;
5310   MatScalar       *ap1,*ap2;
5311 
5312   PetscFunctionBegin;
5313   for (i=0; i<m; i++) {
5314     if (im[i] < 0) continue;
5315 #if defined(PETSC_USE_DEBUG)
5316     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5317 #endif
5318     if (im[i] >= rstart && im[i] < rend) {
5319       row      = im[i] - rstart;
5320       lastcol1 = -1;
5321       rp1      = aj + ai[row];
5322       ap1      = aa + ai[row];
5323       rmax1    = aimax[row];
5324       nrow1    = ailen[row];
5325       low1     = 0;
5326       high1    = nrow1;
5327       lastcol2 = -1;
5328       rp2      = bj + bi[row];
5329       ap2      = ba + bi[row];
5330       rmax2    = bimax[row];
5331       nrow2    = bilen[row];
5332       low2     = 0;
5333       high2    = nrow2;
5334 
5335       for (j=0; j<n; j++) {
5336         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5337         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5338         if (in[j] >= cstart && in[j] < cend){
5339           col = in[j] - cstart;
5340           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5341         } else if (in[j] < 0) continue;
5342 #if defined(PETSC_USE_DEBUG)
5343         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5344 #endif
5345         else {
5346           if (mat->was_assembled) {
5347             if (!aij->colmap) {
5348               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5349             }
5350 #if defined (PETSC_USE_CTABLE)
5351             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5352 	    col--;
5353 #else
5354             col = aij->colmap[in[j]] - 1;
5355 #endif
5356             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5357               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5358               col =  in[j];
5359               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5360               B = aij->B;
5361               b = (Mat_SeqAIJ*)B->data;
5362               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5363               rp2      = bj + bi[row];
5364               ap2      = ba + bi[row];
5365               rmax2    = bimax[row];
5366               nrow2    = bilen[row];
5367               low2     = 0;
5368               high2    = nrow2;
5369               bm       = aij->B->rmap->n;
5370               ba = b->a;
5371             }
5372           } else col = in[j];
5373           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5374         }
5375       }
5376     } else {
5377       if (!aij->donotstash) {
5378         if (roworiented) {
5379           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5380         } else {
5381           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5382         }
5383       }
5384     }
5385   }}
5386   PetscFunctionReturnVoid();
5387 }
5388 EXTERN_C_END
5389 
5390